Author: delphij
Date: Tue Dec 31 19:39:15 2013
New Revision: 260138
URL: http://svnweb.freebsd.org/changeset/base/260138

Log:
  MFV r242733:
  
  3306 zdb should be able to issue reads in parallel
  3321 'zpool reopen' command should be documented in the man page
  and help message
  
  illumos/illumos-gate@31d7e8fa33fae995f558673adb22641b5aa8b6e1
  
  FreeBSD porting notes: the kernel part of this changeset depends
  on Solaris buf(9S) interfaces and are not really applicable for
  our use.  vdev_disk.c is patched as-is to reduce diverge from
  upstream, but vdev_file.c is left intact.
  
  MFC after:    2 weeks

Modified:
  head/cddl/contrib/opensolaris/cmd/zdb/zdb.8
  head/cddl/contrib/opensolaris/cmd/zdb/zdb.c
  head/cddl/contrib/opensolaris/cmd/zpool/zpool.8
  head/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
  head/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
  head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
Directory Properties:
  head/cddl/contrib/opensolaris/   (props changed)
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/cddl/contrib/opensolaris/cmd/zdb/zdb.8
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zdb/zdb.8 Tue Dec 31 18:25:15 2013        
(r260137)
+++ head/cddl/contrib/opensolaris/cmd/zdb/zdb.8 Tue Dec 31 19:39:15 2013        
(r260138)
@@ -14,11 +14,12 @@
 .\"
 .\" Copyright 2012, Richard Lowe.
 .\" Copyright (c) 2012, Marcelo Araujo <ara...@freebsd.org>.
+.\" Copyright (c) 2012 by Delphix. All rights reserved.
 .\" All Rights Reserved.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd May 10, 2012
+.Dd December 31, 2013
 .Dt ZDB 8
 .Os
 .Sh NAME
@@ -29,27 +30,35 @@
 .Op Fl CumdibcsDvhLXFPA
 .Op Fl e Op Fl p Ar path...
 .Op Fl t Ar txg
+.Op Fl U Ar cache
+.Op Fl M Ar inflight I/Os
 .Ar poolname
 .Op Ar object ...
 .Nm
 .Op Fl divPA
 .Op Fl e Op Fl p Ar path...
+.Op Fl U Ar cache
 .Ar dataset
 .Op Ar object ...
 .Nm
 .Fl m Op Fl LXFPA
 .Op Fl t Ar txg
 .Op Fl e Op Fl p Ar path...
+.Op Fl U Ar cache
 .Ar poolname
 .Nm
 .Fl R Op Fl A
 .Op Fl e Op Fl p Ar path...
+.Op Fl U Ar cache
+.Ar poolname
 .Ar poolname
 .Ar vdev Ns : Ns Ar offset Ns : Ns Ar size Ns Op Ns : Ns Ar flags
 .Nm
 .Fl S
 .Op Fl AP
 .Op Fl e Op Fl p Ar path...
+.Op Fl U Ar cache
+.Ar poolname
 .Ar poolname
 .Nm
 .Fl l
@@ -205,6 +214,11 @@ flag specifies the path under which devi
 .It Fl F
 Attempt to make an unreadable pool readable by trying progressively older
 transactions.
+.It Fl M Ar inflight I/Os
+Limit the number of outstanding checksum I/Os to the specified value.
+The default value is 200. This option affects the performance of the
+.Fl c
+option.
 .It Fl P
 Print numbers in an unscaled form more amenable to parsing, eg. 1000000 rather
 than 1M.
@@ -218,9 +232,7 @@ options for a means to see the available
 transaction numbers.
 .It Fl U Ar cachefile
 Use a cache file other than
-.Pa /etc/zfs/zpool.cache .
-This option is only valid with
-.Fl C
+.Pa /boot/zfs/zpool.cache .
 .It Fl v
 Enable verbosity.
 Specify multiple times for increased verbosity.

Modified: head/cddl/contrib/opensolaris/cmd/zdb/zdb.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zdb/zdb.c Tue Dec 31 18:25:15 2013        
(r260137)
+++ head/cddl/contrib/opensolaris/cmd/zdb/zdb.c Tue Dec 31 19:39:15 2013        
(r260138)
@@ -89,6 +89,7 @@ extern void dump_intent_log(zilog_t *);
 uint64_t *zopt_object = NULL;
 int zopt_objects = 0;
 libzfs_handle_t *g_zfs;
+uint64_t max_inflight = 200;
 
 /*
  * These libumem hooks provide a reasonable set of defaults for the allocator's
@@ -110,16 +111,17 @@ static void
 usage(void)
 {
        (void) fprintf(stderr,
-            "Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]]"
-            "poolname [object...]\n"
-            "       %s [-divPA] [-e -p path...] dataset [object...]\n"
-            "       %s -m [-LXFPA] [-t txg] [-e [-p path...]]"
-            "poolname [vdev [metaslab...]]\n"
-            "       %s -R [-A] [-e [-p path...]] poolname "
-            "vdev:offset:size[:flags]\n"
-            "       %s -S [-PA] [-e [-p path...]] poolname\n"
-            "       %s -l [-uA] device\n"
-            "       %s -C [-A] [-U config]\n\n",
+           "Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
+           "[-U config] [-M inflight I/Os] poolname [object...]\n"
+           "       %s [-divPA] [-e -p path...] [-U config] dataset "
+           "[object...]\n"
+           "       %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
+           "poolname [vdev [metaslab...]]\n"
+           "       %s -R [-A] [-e [-p path...]] poolname "
+           "vdev:offset:size[:flags]\n"
+           "       %s -S [-PA] [-e [-p path...]] [-U config] poolname\n"
+           "       %s -l [-uA] device\n"
+           "       %s -C [-A] [-U config]\n\n",
            cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
 
        (void) fprintf(stderr, "    Dataset name must include at least one "
@@ -164,6 +166,8 @@ usage(void)
        (void) fprintf(stderr, "        -P print numbers in parseable form\n");
        (void) fprintf(stderr, "        -t <txg> -- highest txg to use when "
            "searching for uberblocks\n");
+       (void) fprintf(stderr, "        -M <number of inflight I/Os> -- "
+           "specify the maximum number of checksumming I/Os [default is 200]");
        (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
            "to make only that option verbose\n");
        (void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
@@ -2154,6 +2158,47 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *
            bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
 }
 
+/* ARGSUSED */
+static void
+zdb_blkptr_done(zio_t *zio)
+{
+       spa_t *spa = zio->io_spa;
+       blkptr_t *bp = zio->io_bp;
+       int ioerr = zio->io_error;
+       zdb_cb_t *zcb = zio->io_private;
+       zbookmark_t *zb = &zio->io_bookmark;
+
+       zio_data_buf_free(zio->io_data, zio->io_size);
+
+       mutex_enter(&spa->spa_scrub_lock);
+       spa->spa_scrub_inflight--;
+       cv_broadcast(&spa->spa_scrub_io_cv);
+
+       if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
+               char blkbuf[BP_SPRINTF_LEN];
+
+               zcb->zcb_haderrors = 1;
+               zcb->zcb_errors[ioerr]++;
+
+               if (dump_opt['b'] >= 2)
+                       sprintf_blkptr(blkbuf, bp);
+               else
+                       blkbuf[0] = '\0';
+
+               (void) printf("zdb_blkptr_cb: "
+                   "Got error %d reading "
+                   "<%llu, %llu, %lld, %llx> %s -- skipping\n",
+                   ioerr,
+                   (u_longlong_t)zb->zb_objset,
+                   (u_longlong_t)zb->zb_object,
+                   (u_longlong_t)zb->zb_level,
+                   (u_longlong_t)zb->zb_blkid,
+                   blkbuf);
+       }
+       mutex_exit(&spa->spa_scrub_lock);
+}
+
+/* ARGSUSED */
 static int
 zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
@@ -2174,38 +2219,22 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog
        is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
 
        if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
-               int ioerr;
                size_t size = BP_GET_PSIZE(bp);
-               void *data = malloc(size);
+               void *data = zio_data_buf_alloc(size);
                int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
 
                /* If it's an intent log block, failure is expected. */
                if (zb->zb_level == ZB_ZIL_LEVEL)
                        flags |= ZIO_FLAG_SPECULATIVE;
 
-               ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
-                   NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
-
-               free(data);
-               if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
-                       zcb->zcb_haderrors = 1;
-                       zcb->zcb_errors[ioerr]++;
+               mutex_enter(&spa->spa_scrub_lock);
+               while (spa->spa_scrub_inflight > max_inflight)
+                       cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
+               spa->spa_scrub_inflight++;
+               mutex_exit(&spa->spa_scrub_lock);
 
-                       if (dump_opt['b'] >= 2)
-                               sprintf_blkptr(blkbuf, bp);
-                       else
-                               blkbuf[0] = '\0';
-
-                       (void) printf("zdb_blkptr_cb: "
-                           "Got error %d reading "
-                           "<%llu, %llu, %lld, %llx> %s -- skipping\n",
-                           ioerr,
-                           (u_longlong_t)zb->zb_objset,
-                           (u_longlong_t)zb->zb_object,
-                           (u_longlong_t)zb->zb_level,
-                           (u_longlong_t)zb->zb_blkid,
-                           blkbuf);
-               }
+               zio_nowait(zio_read(NULL, spa, bp, data, size,
+                   zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
        }
 
        zcb->zcb_readfails = 0;
@@ -2433,6 +2462,18 @@ dump_block_stats(spa_t *spa)
        zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
        zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
 
+       /*
+        * If we've traversed the data blocks then we need to wait for those
+        * I/Os to complete. We leverage "The Godfather" zio to wait on
+        * all async I/Os to complete.
+        */
+       if (dump_opt['c']) {
+               (void) zio_wait(spa->spa_async_zio_root);
+               spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
+                   ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
+                   ZIO_FLAG_GODFATHER);
+       }
+
        if (zcb.zcb_haderrors) {
                (void) printf("\nError counts:\n\n");
                (void) printf("\t%5s  %s\n", "errno", "count");
@@ -3202,7 +3243,7 @@ main(int argc, char **argv)
 
        dprintf_setup(&argc, argv);
 
-       while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:P")) != -1) {
+       while ((c = getopt(argc, argv, "bcdhilmM:suCDRSAFLXevp:t:U:P")) != -1) {
                switch (c) {
                case 'b':
                case 'c':
@@ -3231,6 +3272,15 @@ main(int argc, char **argv)
                case 'v':
                        verbose++;
                        break;
+               case 'M':
+                       max_inflight = strtoull(optarg, NULL, 0);
+                       if (max_inflight == 0) {
+                               (void) fprintf(stderr, "maximum number "
+                                   "of inflight I/Os must be greater "
+                                   "than 0\n");
+                               usage();
+                       }
+                       break;
                case 'p':
                        if (searchdirs == NULL) {
                                searchdirs = umem_alloc(sizeof (char *),

Modified: head/cddl/contrib/opensolaris/cmd/zpool/zpool.8
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zpool/zpool.8     Tue Dec 31 18:25:15 
2013        (r260137)
+++ head/cddl/contrib/opensolaris/cmd/zpool/zpool.8     Tue Dec 31 19:39:15 
2013        (r260138)
@@ -25,7 +25,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd March 14, 2013
+.Dd December 31, 2013
 .Dt ZPOOL 8
 .Os
 .Sh NAME
@@ -141,6 +141,9 @@
 .Cm remove
 .Ar pool device ...
 .Nm
+.Cm reopen
+.Ar pool
+.Nm
 .Cm replace
 .Op Fl f
 .Ar pool device
@@ -1431,6 +1434,13 @@ command. Non-redundant and
 devices cannot be removed from a pool.
 .It Xo
 .Nm
+.Cm reopen
+.Ar pool
+.Xc
+.Pp
+Reopen all the vdevs associated with the pool.
+.It Xo
+.Nm
 .Cm replace
 .Op Fl f
 .Ar pool device

Modified: head/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c        Tue Dec 31 
18:25:15 2013        (r260137)
+++ head/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c        Tue Dec 31 
19:39:15 2013        (r260138)
@@ -248,7 +248,7 @@ get_usage(zpool_help_t idx) {
        case HELP_REMOVE:
                return (gettext("\tremove <pool> <device> ...\n"));
        case HELP_REOPEN:
-               return (""); /* Undocumented command */
+               return (gettext("\treopen <pool>\n"));
        case HELP_SCRUB:
                return (gettext("\tscrub [-s] <pool> ...\n"));
        case HELP_STATUS:
@@ -3720,22 +3720,37 @@ zpool_do_reguid(int argc, char **argv)
  * zpool reopen <pool>
  *
  * Reopen the pool so that the kernel can update the sizes of all vdevs.
- *
- * NOTE: This command is currently undocumented.  If the command is ever
- * exposed then the appropriate usage() messages will need to be made.
  */
 int
 zpool_do_reopen(int argc, char **argv)
 {
+       int c;
        int ret = 0;
        zpool_handle_t *zhp;
        char *pool;
 
+       /* check options */
+       while ((c = getopt(argc, argv, "")) != -1) {
+               switch (c) {
+               case '?':
+                       (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+                           optopt);
+                       usage(B_FALSE);
+               }
+       }
+
        argc--;
        argv++;
 
-       if (argc != 1)
-               return (2);
+       if (argc < 1) {
+               (void) fprintf(stderr, gettext("missing pool name\n"));
+               usage(B_FALSE);
+       }
+
+       if (argc > 1) {
+               (void) fprintf(stderr, gettext("too many arguments\n"));
+               usage(B_FALSE);
+       }
 
        pool = argv[0];
        if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL)

Modified: head/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c  Tue Dec 31 
18:25:15 2013        (r260137)
+++ head/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c  Tue Dec 31 
19:39:15 2013        (r260138)
@@ -661,7 +661,7 @@ __dprintf(const char *file, const char *
                if (dprintf_find_string("pid"))
                        (void) printf("%d ", getpid());
                if (dprintf_find_string("tid"))
-                       (void) printf("%u ", thr_self());
+                       (void) printf("%ul ", thr_self());
 #if 0
                if (dprintf_find_string("cpu"))
                        (void) printf("%u ", getcpuid());
@@ -1125,3 +1125,50 @@ zvol_create_minors(const char *name)
        return (0);
 }
 #endif
+
+#ifdef illumos
+void
+bioinit(buf_t *bp)
+{
+       bzero(bp, sizeof (buf_t));
+}
+
+void
+biodone(buf_t *bp)
+{
+       if (bp->b_iodone != NULL) {
+               (*(bp->b_iodone))(bp);
+               return;
+       }
+       ASSERT((bp->b_flags & B_DONE) == 0);
+       bp->b_flags |= B_DONE;
+}
+
+void
+bioerror(buf_t *bp, int error)
+{
+       ASSERT(bp != NULL);
+       ASSERT(error >= 0);
+
+       if (error != 0) {
+               bp->b_flags |= B_ERROR;
+       } else {
+               bp->b_flags &= ~B_ERROR;
+       }
+       bp->b_error = error;
+}
+
+
+int
+geterror(struct buf *bp)
+{
+       int error = 0;
+
+       if (bp->b_flags & B_ERROR) {
+               error = bp->b_error;
+               if (!error)
+                       error = EIO;
+       }
+       return (error);
+}
+#endif

Modified: head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h Tue Dec 
31 18:25:15 2013        (r260137)
+++ head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h Tue Dec 
31 19:39:15 2013        (r260138)
@@ -778,6 +778,38 @@ extern void cyclic_remove(cyclic_id_t);
 extern int cyclic_reprogram(cyclic_id_t, hrtime_t);
 #endif /* illumos */
 
+#ifdef illumos
+/*
+ * Buf structure
+ */
+#define        B_BUSY          0x0001
+#define        B_DONE          0x0002
+#define        B_ERROR         0x0004
+#define        B_READ          0x0040  /* read when I/O occurs */
+#define        B_WRITE         0x0100  /* non-read pseudo-flag */
+
+typedef struct buf {
+       int     b_flags;
+       size_t b_bcount;
+       union {
+               caddr_t b_addr;
+       } b_un;
+
+       lldaddr_t       _b_blkno;
+#define        b_lblkno        _b_blkno._f
+       size_t  b_resid;
+       size_t  b_bufsize;
+       int     (*b_iodone)(struct buf *);
+       int     b_error;
+       void    *b_private;
+} buf_t;
+
+extern void bioinit(buf_t *);
+extern void biodone(buf_t *);
+extern void bioerror(buf_t *, int);
+extern int geterror(buf_t *);
+#endif
+
 #ifdef __cplusplus
 }
 #endif

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h Tue Dec 
31 18:25:15 2013        (r260137)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h Tue Dec 
31 19:39:15 2013        (r260138)
@@ -363,6 +363,16 @@ extern void vdev_set_min_asize(vdev_t *v
 /* zdb uses this tunable, so it must be declared here to make lint happy. */
 extern int zfs_vdev_cache_size;
 
+#ifdef illumos
+/*
+ * The vdev_buf_t is used to translate between zio_t and buf_t, and back again.
+ */
+typedef struct vdev_buf {
+       buf_t   vb_buf;         /* buffer that describes the io */
+       zio_t   *vb_io;         /* pointer back to the original zio_t */
+} vdev_buf_t;
+#endif
+
 #ifdef __cplusplus
 }
 #endif

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c     Tue Dec 
31 18:25:15 2013        (r260137)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c     Tue Dec 
31 19:39:15 2013        (r260138)
@@ -42,11 +42,6 @@
 
 extern ldi_ident_t zfs_li;
 
-typedef struct vdev_disk_buf {
-       buf_t   vdb_buf;
-       zio_t   *vdb_io;
-} vdev_disk_buf_t;
-
 static void
 vdev_disk_hold(vdev_t *vd)
 {
@@ -483,8 +478,8 @@ vdev_disk_ldi_physio(ldi_handle_t vd_lh,
 static void
 vdev_disk_io_intr(buf_t *bp)
 {
-       vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp;
-       zio_t *zio = vdb->vdb_io;
+       vdev_buf_t *vb = (vdev_buf_t *)bp;
+       zio_t *zio = vb->vb_io;
 
        /*
         * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO.
@@ -496,7 +491,7 @@ vdev_disk_io_intr(buf_t *bp)
        if (zio->io_error == 0 && bp->b_resid != 0)
                zio->io_error = SET_ERROR(EIO);
 
-       kmem_free(vdb, sizeof (vdev_disk_buf_t));
+       kmem_free(vb, sizeof (vdev_buf_t));
 
        zio_interrupt(zio);
 }
@@ -527,7 +522,7 @@ vdev_disk_io_start(zio_t *zio)
 {
        vdev_t *vd = zio->io_vd;
        vdev_disk_t *dvd = vd->vdev_tsd;
-       vdev_disk_buf_t *vdb;
+       vdev_buf_t *vb;
        struct dk_callback *dkc;
        buf_t *bp;
        int error;
@@ -591,10 +586,10 @@ vdev_disk_io_start(zio_t *zio)
                return (ZIO_PIPELINE_CONTINUE);
        }
 
-       vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP);
+       vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP);
 
-       vdb->vdb_io = zio;
-       bp = &vdb->vdb_buf;
+       vb->vb_io = zio;
+       bp = &vb->vb_buf;
 
        bioinit(bp);
        bp->b_flags = B_BUSY | B_NOCACHE |
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to