From d3046a4dac678e7b4fe24acd4207223f07487767 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Mon, 30 Mar 2015 15:24:05 +0200 Subject: [PATCH 1/2] ggatel: Optionally retry in case of failed reads and writes ... after waiting five seconds and reopening the device. This allows to use ggatel as a workaround against USB flakiness which can result in device disconnects that are extremely annoying if ZFS on geli is involved. To prevent data loss if multiple device disappear at the same time and reappear with different names, the disk ident is checked to confirm that the device is the expected one. As a side-effect retrying will not work for files (which have no disk ident). Obtained from: ElectroBSD --- sbin/ggate/ggatel/ggatel.c | 63 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/sbin/ggate/ggatel/ggatel.c b/sbin/ggate/ggatel/ggatel.c index abfe7c1..e9c6e88 100644 --- a/sbin/ggate/ggatel/ggatel.c +++ b/sbin/ggate/ggatel/ggatel.c @@ -53,6 +53,7 @@ static const char *path = NULL; static int unit = G_GATE_UNIT_AUTO; static unsigned flags = 0; static int force = 0; +static unsigned retries = 0; static unsigned sectorsize = 0; static unsigned timeout = G_GATE_TIMEOUT; @@ -60,10 +61,10 @@ static void usage(void) { - fprintf(stderr, "usage: %s create [-v] [-o ] " + fprintf(stderr, "usage: %s create [-v] [-o ] [-r ] " "[-s sectorsize] [-t timeout] [-u unit] \n", getprogname()); - fprintf(stderr, " %s rescue [-v] [-o ] <-u unit> " - "\n", getprogname()); + fprintf(stderr, " %s rescue [-v] [-o ] [-r ] " + "<-u unit> \n", getprogname()); fprintf(stderr, " %s destroy [-f] <-u unit>\n", getprogname()); fprintf(stderr, " %s list [-v] [-u unit]\n", getprogname()); exit(EXIT_FAILURE); @@ -85,6 +86,7 @@ g_gatel_serve(int fd) { struct g_gate_ctl_io ggio; size_t bsize; + char ident[DISK_IDENT_SIZE]; if (g_gate_verbose == 0) { if (daemon(0, 0) == -1) { @@ -97,8 +99,15 @@ g_gatel_serve(int fd) ggio.gctl_unit = unit; bsize = sectorsize; ggio.gctl_data = malloc(bsize); + + errno = 0; + if (retries && ioctl(fd, DIOCGIDENT, ident) != 0) { + g_gate_xlog("Failed to get disk ident for %s: %s", path, + strerror(errno)); + } for (;;) { int error; + int retries_left; once_again: ggio.gctl_length = bsize; ggio.gctl_error = 0; @@ -130,6 +139,8 @@ once_again: strerror(error)); } + retries_left = retries; +retry_request: error = 0; switch (ggio.gctl_cmd) { case BIO_READ: @@ -145,6 +156,10 @@ once_again: if (pread(fd, ggio.gctl_data, ggio.gctl_length, ggio.gctl_offset) == -1) { error = errno; + g_gate_log(LOG_ERR, "Failed to read %d" + " bytes from %s: %s", + ggio.gctl_length, path, + strerror(error)); } } break; @@ -153,12 +168,44 @@ once_again: if (pwrite(fd, ggio.gctl_data, ggio.gctl_length, ggio.gctl_offset) == -1) { error = errno; + g_gate_log(LOG_ERR, "Failed to write %d bytes" + " to %s: %s", ggio.gctl_length, path, + strerror(error)); } break; default: error = EOPNOTSUPP; } + if (error && error != EOPNOTSUPP) { + if (retries_left > 0) { + char ident_new[DISK_IDENT_SIZE]; + + close(fd); + retries_left--; + sleep(5); + fd = open(path, g_gate_openflags(flags) | + O_DIRECT | O_FSYNC); + if (fd == -1) { + err(EXIT_FAILURE, "Cannot open %s", + path); + } + if (ioctl(fd, DIOCGIDENT, ident_new) != 0) { + g_gate_xlog("Failed to get disk ", + "ident for %s: %s", path, + strerror(errno)); + } + if (strcmp(ident, ident_new) != 0) { + g_gate_xlog("Disk ident for %s " + "changed from %s to %s. Reuse " + "could cause data loss.", path, + ident, ident_new); + } + g_gate_log(LOG_ERR, "Retrying after reopening " + "%s (%s)", path, ident); + goto retry_request; + } + } ggio.gctl_error = error; g_gate_ioctl(G_GATE_CMD_DONE, &ggio); } @@ -229,7 +276,7 @@ main(int argc, char *argv[]) for (;;) { int ch; - ch = getopt(argc, argv, "fo:s:t:u:v"); + ch = getopt(argc, argv, "fo:r:s:t:u:v"); if (ch == -1) break; switch (ch) { @@ -252,6 +299,14 @@ main(int argc, char *argv[]) "Invalid argument for '-o' option."); } break; + case 'r': + if (action != CREATE && action != RESCUE) + usage(); + errno = 0; + retries = strtoul(optarg, NULL, 10); + if (retries == 0 && errno != 0) + errx(EXIT_FAILURE, "Invalid retry count."); + break; case 's': if (action != CREATE) usage(); -- 2.4.2 From 8165d021b8ee6c81c7740ea955d86b186129c07b Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sun, 26 Apr 2015 17:54:14 +0200 Subject: [PATCH 2/2] ggatel.8: Document the shiny new -r option --- sbin/ggate/ggatel/ggatel.8 | 61 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/sbin/ggate/ggatel/ggatel.8 b/sbin/ggate/ggatel/ggatel.8 index 37e11cd..86d8e56 100644 --- a/sbin/ggate/ggatel/ggatel.8 +++ b/sbin/ggate/ggatel/ggatel.8 @@ -35,6 +35,7 @@ .Cm create .Op Fl v .Op Fl o Cm ro | wo | rw +.Op Fl r Ar retries .Op Fl s Ar sectorsize .Op Fl t Ar timeout .Op Fl u Ar unit @@ -51,6 +52,7 @@ .Cm rescue .Op Fl v .Op Fl o Cm ro | wo | rw +.Op Fl r Ar retries .Fl u Ar unit .Ar path .Sh DESCRIPTION @@ -101,6 +103,19 @@ or read-write .Pq Cm rw . Default is .Cm rw . +.It Fl r Ar retries +Number of times a failed request should be retried before forwarding +the error to the kernel. +Between retries, +.Nm ggatel +waits for five seconds and reopens the device in case it temporarily +disappeared. +The reopened device is only used if the disk identification did not +change. +This option is useful when using unreliable USB devices as geli +consumer (as long as the device loss does not cause the USB +stack to deadlock). +By default failed requests are not retried. .It Fl s Ar sectorsize Sector size for .Nm ggate @@ -143,6 +158,52 @@ umount /secret gbde detach ggate5 ggatel destroy -u 5 .Ed + +Scrub a pool on an USB device that occasionally disappears: +.Bd -literal -offset indent +$ glabel list da0 +Geom name: da0 +Providers: +1. Name: label/extreme + Mediasize: 4023385600 (3.7G) + Sectorsize: 512 + Mode: r0w0e0 + secoffset: 0 + offset: 0 + seclength: 7858175 + length: 4023385600 + index: 0 +Consumers: +1. Name: da0 + Mediasize: 4023386112 (3.7G) + Sectorsize: 512 + Mode: r0w0e0 + +$ sudo ggatel create -r 2 /dev/da0 +ggate0 +$ glabel list da0 +glabel: No such geom: da0. +glabel list ggate0 +Geom name: ggate0 +Providers: +1. Name: label/extreme + Mediasize: 4023385600 (3.7G) + Sectorsize: 512 + Mode: r0w0e0 + secoffset: 0 + offset: 0 + seclength: 7858175 + length: 4023385600 + index: 0 +Consumers: +1. Name: ggate0 + Mediasize: 4023386112 (3.7G) + Sectorsize: 512 + Mode: r0w0e0 + +$ zogftw import extreme +$ sudo zpool scrub extreme +.Ed .Sh SEE ALSO .Xr geom 4 , .Xr gbde 8 , -- 2.4.2