From 5c4f5770d0efb96bcd745797af6d924b7589ea93 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Mon, 30 Mar 2015 15:24:05 +0200 Subject: [PATCH 152/325] ggatel: Optionally retry in case of failed reads and writes ... after waiting five seconds and reopening the device. This allows to use ggatel to work around moderate USB flakiness which can result in device disconnects that are extremely annoying if ZFS on geli is involved. To prevent data loss if multiple device disappear at the same time and reappear with different names, the disk ident is checked to confirm that the device is the expected one. As a side-effect retrying will not work for file-backed memory disks (which currently have no disk ident). Obtained from: ElectroBSD --- sbin/ggate/ggatel/ggatel.c | 67 +++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 4 deletions(-) diff --git a/sbin/ggate/ggatel/ggatel.c b/sbin/ggate/ggatel/ggatel.c index 4659cacbee17..88d0bf25ae46 100644 --- a/sbin/ggate/ggatel/ggatel.c +++ b/sbin/ggate/ggatel/ggatel.c @@ -55,6 +55,7 @@ static const char *path = NULL; static int unit = G_GATE_UNIT_AUTO; static unsigned flags = 0; static int force = 0; +static unsigned retries = 0; static unsigned sectorsize = 0; static unsigned timeout = G_GATE_TIMEOUT; @@ -62,10 +63,10 @@ static void usage(void) { - fprintf(stderr, "usage: %s create [-v] [-o ] " + fprintf(stderr, "usage: %s create [-v] [-o ] [-r ] " "[-s sectorsize] [-t timeout] [-u unit] \n", getprogname()); - fprintf(stderr, " %s rescue [-v] [-o ] <-u unit> " - "\n", getprogname()); + fprintf(stderr, " %s rescue [-v] [-o ] [-r ] " + "<-u unit> \n", getprogname()); fprintf(stderr, " %s destroy [-f] <-u unit>\n", getprogname()); fprintf(stderr, " %s list [-v] [-u unit]\n", getprogname()); exit(EXIT_FAILURE); @@ -87,6 +88,7 @@ g_gatel_serve(int fd) { struct g_gate_ctl_io ggio; size_t bsize; + char ident[DISK_IDENT_SIZE]; if (g_gate_verbose == 0) { if (daemon(0, 0) == -1) { @@ -99,8 +101,15 @@ g_gatel_serve(int fd) ggio.gctl_unit = unit; bsize = sectorsize; ggio.gctl_data = malloc(bsize); + + errno = 0; + if (retries && ioctl(fd, DIOCGIDENT, ident) != 0) { + g_gate_xlog("Failed to get disk ident for %s: %s", path, + strerror(errno)); + } for (;;) { int error; + int retries_left; once_again: ggio.gctl_length = bsize; ggio.gctl_error = 0; @@ -132,6 +141,8 @@ g_gatel_serve(int fd) strerror(error)); } + retries_left = retries; +retry_request: error = 0; switch (ggio.gctl_cmd) { case BIO_READ: @@ -147,6 +158,11 @@ g_gatel_serve(int fd) if (pread(fd, ggio.gctl_data, ggio.gctl_length, ggio.gctl_offset) == -1) { error = errno; + g_gate_log(LOG_ERR, "Failed to read %d" + " bytes at offset %d from %s: %s", + ggio.gctl_length, + (intmax_t)ggio.gctl_offset, path, + strerror(error)); } } break; @@ -155,12 +171,47 @@ g_gatel_serve(int fd) if (pwrite(fd, ggio.gctl_data, ggio.gctl_length, ggio.gctl_offset) == -1) { error = errno; + g_gate_log(LOG_ERR, "Failed to write %d bytes" + " at offset %jd to %s: %s", + ggio.gctl_length, + (intmax_t)ggio.gctl_offset, path, + strerror(error)); } break; default: error = EOPNOTSUPP; } + if (error && error != EOPNOTSUPP) { + if (retries_left > 0) { + char ident_new[DISK_IDENT_SIZE]; + + close(fd); + retries_left--; + sleep(5); + fd = open(path, g_gate_openflags(flags) | + O_DIRECT | O_FSYNC); + if (fd == -1) { + err(EXIT_FAILURE, "Cannot open %s", + path); + } + if (ioctl(fd, DIOCGIDENT, ident_new) != 0) { + g_gate_xlog("Failed to get disk ", + "ident for %s: %s", path, + strerror(errno)); + } + if (strcmp(ident, ident_new) != 0) { + g_gate_xlog("Disk ident for %s " + "changed from %s to %s. Reuse " + "could cause data loss.", path, + ident, ident_new); + } + g_gate_log(LOG_ERR, "Retrying after reopening " + "%s (%s). Retries left: %d", path, ident, + retries_left); + goto retry_request; + } + } ggio.gctl_error = error; g_gate_ioctl(G_GATE_CMD_DONE, &ggio); } @@ -232,7 +283,7 @@ main(int argc, char *argv[]) for (;;) { int ch; - ch = getopt(argc, argv, "fo:s:t:u:v"); + ch = getopt(argc, argv, "fo:r:s:t:u:v"); if (ch == -1) break; switch (ch) { @@ -255,6 +306,14 @@ main(int argc, char *argv[]) "Invalid argument for '-o' option."); } break; + case 'r': + if (action != CREATE && action != RESCUE) + usage(); + errno = 0; + retries = strtoul(optarg, NULL, 10); + if (retries == 0 && errno != 0) + errx(EXIT_FAILURE, "Invalid retry count."); + break; case 's': if (action != CREATE) usage(); -- 2.32.0