From 73a3742034086144c5bcb66e34ba218ff94624cb Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Mon, 30 Mar 2015 15:24:05 +0200 Subject: [PATCH 053/257] ggatel: Optionally retry in case of failed reads and writes ... after waiting five seconds and reopening the device. This allows to use ggatel as a workaround against USB flakiness which can result in device disconnects that are extremely annoying if ZFS on geli is involved. To prevent data loss if multiple device disappear at the same time and reappear with different names, the disk ident is checked to confirm that the device is the expected one. As a side-effect retrying will not work for files (which have no disk ident). Obtained from: ElectroBSD --- sbin/ggate/ggatel/ggatel.c | 63 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/sbin/ggate/ggatel/ggatel.c b/sbin/ggate/ggatel/ggatel.c index 0b89df091d2a..864f3ae40d70 100644 --- a/sbin/ggate/ggatel/ggatel.c +++ b/sbin/ggate/ggatel/ggatel.c @@ -53,6 +53,7 @@ static const char *path = NULL; static int unit = G_GATE_UNIT_AUTO; static unsigned flags = 0; static int force = 0; +static unsigned retries = 0; static unsigned sectorsize = 0; static unsigned timeout = G_GATE_TIMEOUT; @@ -60,10 +61,10 @@ static void usage(void) { - fprintf(stderr, "usage: %s create [-v] [-o ] " + fprintf(stderr, "usage: %s create [-v] [-o ] [-r ] " "[-s sectorsize] [-t timeout] [-u unit] \n", getprogname()); - fprintf(stderr, " %s rescue [-v] [-o ] <-u unit> " - "\n", getprogname()); + fprintf(stderr, " %s rescue [-v] [-o ] [-r ] " + "<-u unit> \n", getprogname()); fprintf(stderr, " %s destroy [-f] <-u unit>\n", getprogname()); fprintf(stderr, " %s list [-v] [-u unit]\n", getprogname()); exit(EXIT_FAILURE); @@ -85,6 +86,7 @@ g_gatel_serve(int fd) { struct g_gate_ctl_io ggio; size_t bsize; + char ident[DISK_IDENT_SIZE]; if (g_gate_verbose == 0) { if (daemon(0, 0) == -1) { @@ -97,8 +99,15 @@ g_gatel_serve(int fd) ggio.gctl_unit = unit; bsize = sectorsize; ggio.gctl_data = malloc(bsize); + + errno = 0; + if (retries && ioctl(fd, DIOCGIDENT, ident) != 0) { + g_gate_xlog("Failed to get disk ident for %s: %s", path, + strerror(errno)); + } for (;;) { int error; + int retries_left; once_again: ggio.gctl_length = bsize; ggio.gctl_error = 0; @@ -130,6 +139,8 @@ once_again: strerror(error)); } + retries_left = retries; +retry_request: error = 0; switch (ggio.gctl_cmd) { case BIO_READ: @@ -145,6 +156,10 @@ once_again: if (pread(fd, ggio.gctl_data, ggio.gctl_length, ggio.gctl_offset) == -1) { error = errno; + g_gate_log(LOG_ERR, "Failed to read %d" + " bytes from %s: %s", + ggio.gctl_length, path, + strerror(error)); } } break; @@ -153,12 +168,44 @@ once_again: if (pwrite(fd, ggio.gctl_data, ggio.gctl_length, ggio.gctl_offset) == -1) { error = errno; + g_gate_log(LOG_ERR, "Failed to write %d bytes" + " to %s: %s", ggio.gctl_length, path, + strerror(error)); } break; default: error = EOPNOTSUPP; } + if (error && error != EOPNOTSUPP) { + if (retries_left > 0) { + char ident_new[DISK_IDENT_SIZE]; + + close(fd); + retries_left--; + sleep(5); + fd = open(path, g_gate_openflags(flags) | + O_DIRECT | O_FSYNC); + if (fd == -1) { + err(EXIT_FAILURE, "Cannot open %s", + path); + } + if (ioctl(fd, DIOCGIDENT, ident_new) != 0) { + g_gate_xlog("Failed to get disk ", + "ident for %s: %s", path, + strerror(errno)); + } + if (strcmp(ident, ident_new) != 0) { + g_gate_xlog("Disk ident for %s " + "changed from %s to %s. Reuse " + "could cause data loss.", path, + ident, ident_new); + } + g_gate_log(LOG_ERR, "Retrying after reopening " + "%s (%s)", path, ident); + goto retry_request; + } + } ggio.gctl_error = error; g_gate_ioctl(G_GATE_CMD_DONE, &ggio); } @@ -230,7 +277,7 @@ main(int argc, char *argv[]) for (;;) { int ch; - ch = getopt(argc, argv, "fo:s:t:u:v"); + ch = getopt(argc, argv, "fo:r:s:t:u:v"); if (ch == -1) break; switch (ch) { @@ -253,6 +300,14 @@ main(int argc, char *argv[]) "Invalid argument for '-o' option."); } break; + case 'r': + if (action != CREATE && action != RESCUE) + usage(); + errno = 0; + retries = strtoul(optarg, NULL, 10); + if (retries == 0 && errno != 0) + errx(EXIT_FAILURE, "Invalid retry count."); + break; case 's': if (action != CREATE) usage(); -- 2.11.0