Author: mm
Date: Thu May 13 20:32:56 2010
New Revision: 208047
URL: http://svn.freebsd.org/changeset/base/208047

Log:
  Import OpenSolaris revision 7837:001de5627df3
  It includes the following changes:
  - parallel reads in traversal code (Bug ID 6333409)
  - faster traversal for zfs send (Bug ID 6418042)
  - traversal code cleanup (Bug ID 6725675)
  - fix for two scrub related bugs (Bug ID 6729696, 6730101)
  - fix assertion in dbuf_verify (Bug ID 6752226)
  - fix panic during zfs send with i/o errors (Bug ID 6577985)
  - replace P2CROSS with P2BOUNDARY (Bug ID 6725680)
  
  List of OpenSolaris Bug IDs:
  6333409, 6418042, 6757112, 6725668, 6725675, 6725680,
  6725698, 6729696, 6730101, 6752226, 6577985, 6755042
  
  Approved by:  pjd, delphij (mentor)
  Obtained from:        OpenSolaris (multiple Bug IDs)
  MFC after:    1 week

Modified:
  head/cddl/contrib/opensolaris/cmd/zdb/zdb.c
  head/cddl/contrib/opensolaris/cmd/ztest/ztest.c
  head/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
  head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
  head/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c
  head/sys/cddl/boot/zfs/zfsimpl.h
  head/sys/cddl/compat/opensolaris/sys/sysmacros.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_traverse.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c

Modified: head/cddl/contrib/opensolaris/cmd/zdb/zdb.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zdb/zdb.c Thu May 13 20:31:24 2010        
(r208046)
+++ head/cddl/contrib/opensolaris/cmd/zdb/zdb.c Thu May 13 20:32:56 2010        
(r208047)
@@ -50,6 +50,7 @@
 #include <sys/zio_checksum.h>
 #include <sys/zio_compress.h>
 #include <sys/zfs_fuid.h>
+#include <sys/arc.h>
 #undef ZFS_MAXNAMELEN
 #undef verify
 #include <libzfs.h>
@@ -62,8 +63,6 @@ typedef void object_viewer_t(objset_t *,
 extern void dump_intent_log(zilog_t *);
 uint64_t *zopt_object = NULL;
 int zopt_objects = 0;
-int zdb_advance = ADVANCE_PRE;
-zbookmark_t zdb_noread = { 0, 0, ZB_NO_LEVEL, 0 };
 libzfs_handle_t *g_zfs;
 boolean_t zdb_sig_user_data = B_TRUE;
 int zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
@@ -88,8 +87,8 @@ static void
 usage(void)
 {
        (void) fprintf(stderr,
-           "Usage: %s [-udibcsvL] [-U cachefile_path] [-O order] "
-           "[-B os:obj:level:blkid] [-S user:cksumalg] "
+           "Usage: %s [-udibcsv] [-U cachefile_path] "
+           "[-S user:cksumalg] "
            "dataset [object...]\n"
            "       %s -C [pool]\n"
            "       %s -l dev\n"
@@ -109,13 +108,8 @@ usage(void)
            "dump blkptr signatures\n");
        (void) fprintf(stderr, "        -v verbose (applies to all others)\n");
        (void) fprintf(stderr, "        -l dump label contents\n");
-       (void) fprintf(stderr, "        -L live pool (allows some errors)\n");
-       (void) fprintf(stderr, "        -O [!]<pre|post|prune|data|holes> "
-           "visitation order\n");
        (void) fprintf(stderr, "        -U cachefile_path -- use alternate "
            "cachefile\n");
-       (void) fprintf(stderr, "        -B objset:object:level:blkid -- "
-           "simulate bad block\n");
        (void) fprintf(stderr, "        -R read and display block from a "
            "device\n");
        (void) fprintf(stderr, "        -e Pool is exported/destroyed/"
@@ -138,7 +132,7 @@ fatal(const char *fmt, ...)
        va_end(ap);
        (void) fprintf(stderr, "\n");
 
-       exit(1);
+       abort();
 }
 
 static void
@@ -571,7 +565,7 @@ dump_dnode(objset_t *os, uint64_t object
 }
 
 static uint64_t
-blkid2offset(dnode_phys_t *dnp, int level, uint64_t blkid)
+blkid2offset(const dnode_phys_t *dnp, int level, uint64_t blkid)
 {
        if (level < 0)
                return (blkid);
@@ -602,115 +596,104 @@ sprintf_blkptr_compact(char *blkbuf, blk
            (u_longlong_t)bp->blk_birth);
 }
 
-/* ARGSUSED */
-static int
-zdb_indirect_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a)
+static void
+print_indirect(blkptr_t *bp, const zbookmark_t *zb,
+    const dnode_phys_t *dnp)
 {
-       zbookmark_t *zb = &bc->bc_bookmark;
-       blkptr_t *bp = &bc->bc_blkptr;
-       void *data = bc->bc_data;
-       dnode_phys_t *dnp = bc->bc_dnode;
-       char blkbuf[BP_SPRINTF_LEN + 80];
+       char blkbuf[BP_SPRINTF_LEN];
        int l;
 
-       if (bc->bc_errno) {
-               (void) sprintf(blkbuf,
-                   "Error %d reading <%llu, %llu, %lld, %llu>: ",
-                   bc->bc_errno,
-                   (u_longlong_t)zb->zb_objset,
-                   (u_longlong_t)zb->zb_object,
-                   (u_longlong_t)zb->zb_level,
-                   (u_longlong_t)zb->zb_blkid);
-               goto out;
-       }
-
-       if (zb->zb_level == -1) {
-               ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
-               ASSERT3U(BP_GET_LEVEL(bp), ==, 0);
-       } else {
-               ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
-               ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
-       }
-
-       if (zb->zb_level > 0) {
-               uint64_t fill = 0;
-               blkptr_t *bpx, *bpend;
-
-               for (bpx = data, bpend = bpx + BP_GET_LSIZE(bp) / sizeof (*bpx);
-                   bpx < bpend; bpx++) {
-                       if (bpx->blk_birth != 0) {
-                               fill += bpx->blk_fill;
-                       } else {
-                               ASSERT(bpx->blk_fill == 0);
-                       }
-               }
-               ASSERT3U(fill, ==, bp->blk_fill);
-       }
+       ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
+       ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
 
-       if (zb->zb_level == 0 && dnp->dn_type == DMU_OT_DNODE) {
-               uint64_t fill = 0;
-               dnode_phys_t *dnx, *dnend;
-
-               for (dnx = data, dnend = dnx + (BP_GET_LSIZE(bp)>>DNODE_SHIFT);
-                   dnx < dnend; dnx++) {
-                       if (dnx->dn_type != DMU_OT_NONE)
-                               fill++;
-               }
-               ASSERT3U(fill, ==, bp->blk_fill);
-       }
-
-       (void) sprintf(blkbuf, "%16llx ",
+       (void) printf("%16llx ",
            (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid));
 
        ASSERT(zb->zb_level >= 0);
 
        for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
                if (l == zb->zb_level) {
-                       (void) sprintf(blkbuf + strlen(blkbuf), "L%llx",
-                           (u_longlong_t)zb->zb_level);
+                       (void) printf("L%llx", (u_longlong_t)zb->zb_level);
                } else {
-                       (void) sprintf(blkbuf + strlen(blkbuf), " ");
+                       (void) printf(" ");
                }
        }
 
-out:
-       if (bp->blk_birth == 0) {
-               (void) sprintf(blkbuf + strlen(blkbuf), "<hole>");
-               (void) printf("%s\n", blkbuf);
-       } else {
-               sprintf_blkptr_compact(blkbuf + strlen(blkbuf), bp,
-                   dump_opt['d'] > 5 ? 1 : 0);
-               (void) printf("%s\n", blkbuf);
+       sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
+       (void) printf("%s\n", blkbuf);
+}
+
+#define        SET_BOOKMARK(zb, objset, object, level, blkid)  \
+{                                                       \
+       (zb)->zb_objset = objset;                       \
+       (zb)->zb_object = object;                       \
+       (zb)->zb_level = level;                         \
+       (zb)->zb_blkid = blkid;                         \
+}
+
+static int
+visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
+    blkptr_t *bp, const zbookmark_t *zb)
+{
+       int err;
+
+       if (bp->blk_birth == 0)
+               return (0);
+
+       print_indirect(bp, zb, dnp);
+
+       if (BP_GET_LEVEL(bp) > 0) {
+               uint32_t flags = ARC_WAIT;
+               int i;
+               blkptr_t *cbp;
+               int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
+               arc_buf_t *buf;
+               uint64_t fill = 0;
+
+               err = arc_read_nolock(NULL, spa, bp, arc_getbuf_func, &buf,
+                   ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
+               if (err)
+                       return (err);
+
+               /* recursively visit blocks below this */
+               cbp = buf->b_data;
+               for (i = 0; i < epb; i++, cbp++) {
+                       zbookmark_t czb;
+
+                       SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
+                           zb->zb_level - 1,
+                           zb->zb_blkid * epb + i);
+                       err = visit_indirect(spa, dnp, cbp, &czb);
+                       if (err)
+                               break;
+                       fill += cbp->blk_fill;
+               }
+               ASSERT3U(fill, ==, bp->blk_fill);
+               (void) arc_buf_remove_ref(buf, &buf);
        }
 
-       return (bc->bc_errno ? ERESTART : 0);
+       return (err);
 }
 
 /*ARGSUSED*/
 static void
-dump_indirect(objset_t *os, uint64_t object, void *data, size_t size)
+dump_indirect(dnode_t *dn)
 {
-       traverse_handle_t *th;
-       uint64_t objset = dmu_objset_id(os);
-       int advance = zdb_advance;
+       dnode_phys_t *dnp = dn->dn_phys;
+       int j;
+       zbookmark_t czb;
 
        (void) printf("Indirect blocks:\n");
 
-       if (object == 0)
-               advance |= ADVANCE_DATA;
-
-       th = traverse_init(dmu_objset_spa(os), zdb_indirect_cb, NULL, advance,
-           ZIO_FLAG_CANFAIL);
-       th->th_noread = zdb_noread;
-
-       traverse_add_dnode(th, 0, -1ULL, objset, object);
-
-       while (traverse_more(th) == EAGAIN)
-               continue;
+       SET_BOOKMARK(&czb, dmu_objset_id(&dn->dn_objset->os),
+           dn->dn_object, dnp->dn_nlevels - 1, 0);
+       for (j = 0; j < dnp->dn_nblkptr; j++) {
+               czb.zb_blkid = j;
+               (void) visit_indirect(dmu_objset_spa(&dn->dn_objset->os), dnp,
+                   &dnp->dn_blkptr[j], &czb);
+       }
 
        (void) printf("\n");
-
-       traverse_fini(th);
 }
 
 /*ARGSUSED*/
@@ -1093,7 +1076,7 @@ dump_object(objset_t *os, uint64_t objec
        }
 
        if (verbosity >= 5)
-               dump_indirect(os, object, NULL, 0);
+               dump_indirect(dn);
 
        if (verbosity >= 5) {
                /*
@@ -1458,18 +1441,17 @@ typedef struct zdb_blkstats {
 #define        DMU_OT_DEFERRED DMU_OT_NONE
 #define        DMU_OT_TOTAL    DMU_OT_NUMTYPES
 
-#define        ZB_TOTAL        ZB_MAXLEVEL
+#define        ZB_TOTAL        DN_MAX_LEVELS
 
 typedef struct zdb_cb {
        zdb_blkstats_t  zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1];
        uint64_t        zcb_errors[256];
-       traverse_blk_cache_t *zcb_cache;
        int             zcb_readfails;
        int             zcb_haderrors;
 } zdb_cb_t;
 
 static void
-zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type)
+zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, dmu_object_type_t 
type)
 {
        for (int i = 0; i < 4; i++) {
                int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
@@ -1485,7 +1467,7 @@ zdb_count_block(spa_t *spa, zdb_cb_t *zc
        if (dump_opt['S']) {
                boolean_t print_sig;
 
-               print_sig  = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 &&
+               print_sig = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 &&
                    BP_GET_TYPE(bp) == DMU_OT_PLAIN_FILE_CONTENTS);
 
                if (BP_GET_CHECKSUM(bp) < zdb_sig_cksumalg)
@@ -1507,56 +1489,55 @@ zdb_count_block(spa_t *spa, zdb_cb_t *zc
                }
        }
 
-       if (!dump_opt['L'])
-               VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp,
-                   NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0);
+       VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp,
+           NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0);
 }
 
 static int
-zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
+zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
+    const dnode_phys_t *dnp, void *arg)
 {
-       zbookmark_t *zb = &bc->bc_bookmark;
        zdb_cb_t *zcb = arg;
-       blkptr_t *bp = &bc->bc_blkptr;
-       dmu_object_type_t type = BP_GET_TYPE(bp);
        char blkbuf[BP_SPRINTF_LEN];
-       int error = 0;
 
-       ASSERT(!BP_IS_HOLE(bp));
+       if (bp == NULL)
+               return (0);
 
-       zdb_count_block(spa, zcb, bp, type);
+       zdb_count_block(spa, zcb, bp, BP_GET_TYPE(bp));
 
-       if (bc->bc_errno) {
-               if (zcb->zcb_readfails++ < 10 && dump_opt['L']) {
-                       uberblock_t ub;
-                       vdev_uberblock_load(NULL, spa->spa_root_vdev, &ub);
-                       if (ub.ub_txg != 0)
-                               spa->spa_ubsync = ub;
-                       error = EAGAIN;
-               } else {
+       if (dump_opt['c'] || dump_opt['S']) {
+               int ioerr, size;
+               void *data;
+
+               size = BP_GET_LSIZE(bp);
+               data = malloc(size);
+               ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
+                   NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
+                   ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB, zb));
+               free(data);
+
+               /* We expect io errors on intent log */
+               if (ioerr && BP_GET_TYPE(bp) != DMU_OT_INTENT_LOG) {
                        zcb->zcb_haderrors = 1;
-                       zcb->zcb_errors[bc->bc_errno]++;
-                       error = ERESTART;
-               }
+                       zcb->zcb_errors[ioerr]++;
 
-               if (dump_opt['b'] >= 3 || (dump_opt['b'] >= 2 && bc->bc_errno))
-                       sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
-               else
-                       blkbuf[0] = '\0';
+                       if (dump_opt['b'] >= 2)
+                               sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
+                       else
+                               blkbuf[0] = '\0';
 
-               if (!dump_opt['S']) {
-                       (void) printf("zdb_blkptr_cb: Got error %d reading "
-                           "<%llu, %llu, %lld, %llx> %s -- %s\n",
-                           bc->bc_errno,
-                           (u_longlong_t)zb->zb_objset,
-                           (u_longlong_t)zb->zb_object,
-                           (u_longlong_t)zb->zb_level,
-                           (u_longlong_t)zb->zb_blkid,
-                           blkbuf,
-                           error == EAGAIN ? "retrying" : "skipping");
+                       if (!dump_opt['S']) {
+                               (void) printf("zdb_blkptr_cb: "
+                                   "Got error %d reading "
+                                   "<%llu, %llu, %lld, %llx> %s -- skipping\n",
+                                   ioerr,
+                                   (u_longlong_t)zb->zb_objset,
+                                   (u_longlong_t)zb->zb_object,
+                                   (u_longlong_t)zb->zb_level,
+                                   (u_longlong_t)zb->zb_blkid,
+                                   blkbuf);
+                       }
                }
-
-               return (error);
        }
 
        zcb->zcb_readfails = 0;
@@ -1566,8 +1547,8 @@ zdb_blkptr_cb(traverse_blk_cache_t *bc, 
                (void) printf("objset %llu object %llu offset 0x%llx %s\n",
                    (u_longlong_t)zb->zb_objset,
                    (u_longlong_t)zb->zb_object,
-                   (u_longlong_t)blkid2offset(bc->bc_dnode,
-                   zb->zb_level, zb->zb_blkid), blkbuf);
+                   (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid),
+                   blkbuf);
        }
 
        return (0);
@@ -1576,22 +1557,12 @@ zdb_blkptr_cb(traverse_blk_cache_t *bc, 
 static int
 dump_block_stats(spa_t *spa)
 {
-       traverse_handle_t *th;
        zdb_cb_t zcb = { 0 };
-       traverse_blk_cache_t dummy_cache = { 0 };
        zdb_blkstats_t *zb, *tzb;
        uint64_t alloc, space, logalloc;
        vdev_t *rvd = spa->spa_root_vdev;
        int leaks = 0;
-       int advance = zdb_advance;
-       int c, e, flags;
-
-       zcb.zcb_cache = &dummy_cache;
-
-       if (dump_opt['c'] || dump_opt['S'])
-               advance |= ADVANCE_DATA;
-
-       advance |= ADVANCE_PRUNE | ADVANCE_ZIL;
+       int c, e;
 
        if (!dump_opt['S']) {
                (void) printf("\nTraversing all blocks to %sverify"
@@ -1607,8 +1578,7 @@ dump_block_stats(spa_t *spa)
         * it's not part of any space map) is a double allocation,
         * reference to a freed block, or an unclaimed log block.
         */
-       if (!dump_opt['L'])
-               zdb_leak_init(spa);
+       zdb_leak_init(spa);
 
        /*
         * If there's a deferred-free bplist, process that first.
@@ -1634,22 +1604,7 @@ dump_block_stats(spa_t *spa)
                bplist_close(bpl);
        }
 
-       /*
-        * Now traverse the pool.  If we're reading all data to verify
-        * checksums, do a scrubbing read so that we validate all copies.
-        */
-       flags = ZIO_FLAG_CANFAIL;
-       if (advance & ADVANCE_DATA)
-               flags |= ZIO_FLAG_SCRUB;
-       th = traverse_init(spa, zdb_blkptr_cb, &zcb, advance, flags);
-       th->th_noread = zdb_noread;
-
-       traverse_add_pool(th, 0, spa_first_txg(spa) + TXG_CONCURRENT_STATES);
-
-       while (traverse_more(th) == EAGAIN)
-               continue;
-
-       traverse_fini(th);
+       zcb.zcb_haderrors |= traverse_pool(spa, zdb_blkptr_cb, &zcb);
 
        if (zcb.zcb_haderrors && !dump_opt['S']) {
                (void) printf("\nError counts:\n\n");
@@ -1665,8 +1620,7 @@ dump_block_stats(spa_t *spa)
        /*
         * Report any leaked segments.
         */
-       if (!dump_opt['L'])
-               zdb_leak_fini(spa);
+       zdb_leak_fini(spa);
 
        /*
         * If we're interested in printing out the blkptr signatures,
@@ -1676,10 +1630,6 @@ dump_block_stats(spa_t *spa)
        if (dump_opt['S'])
                return (zcb.zcb_haderrors ? 3 : 0);
 
-       if (dump_opt['L'])
-               (void) printf("\n\n *** Live pool traversal; "
-                   "block counts are only approximate ***\n\n");
-
        alloc = spa_get_alloc(spa);
        space = spa_get_space(spa);
 
@@ -2285,7 +2235,6 @@ main(int argc, char **argv)
        int dump_all = 1;
        int verbose = 0;
        int error;
-       int flag, set;
        int exported = 0;
        char *vdev_dir = NULL;
 
@@ -2294,7 +2243,7 @@ main(int argc, char **argv)
 
        dprintf_setup(&argc, argv);
 
-       while ((c = getopt(argc, argv, "udibcsvCLO:B:S:U:lRep:")) != -1) {
+       while ((c = getopt(argc, argv, "udibcsvCS:U:lRep:")) != -1) {
                switch (c) {
                case 'u':
                case 'd':
@@ -2308,49 +2257,6 @@ main(int argc, char **argv)
                        dump_opt[c]++;
                        dump_all = 0;
                        break;
-               case 'L':
-                       dump_opt[c]++;
-                       break;
-               case 'O':
-                       endstr = optarg;
-                       if (endstr[0] == '!') {
-                               endstr++;
-                               set = 0;
-                       } else {
-                               set = 1;
-                       }
-                       if (strcmp(endstr, "post") == 0) {
-                               flag = ADVANCE_PRE;
-                               set = !set;
-                       } else if (strcmp(endstr, "pre") == 0) {
-                               flag = ADVANCE_PRE;
-                       } else if (strcmp(endstr, "prune") == 0) {
-                               flag = ADVANCE_PRUNE;
-                       } else if (strcmp(endstr, "data") == 0) {
-                               flag = ADVANCE_DATA;
-                       } else if (strcmp(endstr, "holes") == 0) {
-                               flag = ADVANCE_HOLES;
-                       } else {
-                               usage();
-                       }
-                       if (set)
-                               zdb_advance |= flag;
-                       else
-                               zdb_advance &= ~flag;
-                       break;
-               case 'B':
-                       endstr = optarg - 1;
-                       zdb_noread.zb_objset = strtoull(endstr + 1, &endstr, 0);
-                       zdb_noread.zb_object = strtoull(endstr + 1, &endstr, 0);
-                       zdb_noread.zb_level = strtol(endstr + 1, &endstr, 0);
-                       zdb_noread.zb_blkid = strtoull(endstr + 1, &endstr, 16);
-                       (void) printf("simulating bad block "
-                           "<%llu, %llu, %lld, %llx>\n",
-                           (u_longlong_t)zdb_noread.zb_objset,
-                           (u_longlong_t)zdb_noread.zb_object,
-                           (u_longlong_t)zdb_noread.zb_level,
-                           (u_longlong_t)zdb_noread.zb_blkid);
-                       break;
                case 'v':
                        verbose++;
                        break;
@@ -2387,21 +2293,17 @@ main(int argc, char **argv)
                }
        }
 
-       if (vdev_dir != NULL && exported == 0)
-               (void) fatal("-p option requires use of -e\n");
+       if (vdev_dir != NULL && exported == 0) {
+               (void) fprintf(stderr, "-p option requires use of -e\n");
+               usage();
+       }
 
        kernel_init(FREAD);
        g_zfs = libzfs_init();
        ASSERT(g_zfs != NULL);
 
-       /*
-        * Disable vdev caching.  If we don't do this, live pool traversal
-        * won't make progress because it will never see disk updates.
-        */
-       zfs_vdev_cache_size = 0;
-
        for (c = 0; c < 256; c++) {
-               if (dump_all && c != 'L' && c != 'l' && c != 'R')
+               if (dump_all && c != 'l' && c != 'R')
                        dump_opt[c] = 1;
                if (dump_opt[c])
                        dump_opt[c] += verbose;

Modified: head/cddl/contrib/opensolaris/cmd/ztest/ztest.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/ztest/ztest.c     Thu May 13 20:31:24 
2010        (r208046)
+++ head/cddl/contrib/opensolaris/cmd/ztest/ztest.c     Thu May 13 20:32:56 
2010        (r208047)
@@ -77,7 +77,6 @@
 #include <sys/dmu.h>
 #include <sys/txg.h>
 #include <sys/zap.h>
-#include <sys/dmu_traverse.h>
 #include <sys/dmu_objset.h>
 #include <sys/poll.h>
 #include <sys/stat.h>
@@ -151,7 +150,6 @@ typedef struct ztest_args {
        hrtime_t        za_start;
        hrtime_t        za_stop;
        hrtime_t        za_kill;
-       traverse_handle_t *za_th;
        /*
         * Thread-local variables can go here to aid debugging.
         */
@@ -206,7 +204,6 @@ ztest_info_t ztest_info[] = {
        { ztest_dmu_object_alloc_free,          1,      &zopt_always    },
        { ztest_zap,                            30,     &zopt_always    },
        { ztest_zap_parallel,                   100,    &zopt_always    },
-       { ztest_traverse,                       1,      &zopt_often     },
        { ztest_dsl_prop_get_set,               1,      &zopt_sometimes },
        { ztest_dmu_objset_create_destroy,      1,      &zopt_sometimes },
        { ztest_dmu_snapshot_create_destroy,    1,      &zopt_sometimes },
@@ -1447,152 +1444,6 @@ ztest_dmu_snapshot_create_destroy(ztest_
        (void) rw_unlock(&ztest_shared->zs_name_lock);
 }
 
-#define        ZTEST_TRAVERSE_BLOCKS   1000
-
-static int
-ztest_blk_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
-{
-       ztest_args_t *za = arg;
-       zbookmark_t *zb = &bc->bc_bookmark;
-       blkptr_t *bp = &bc->bc_blkptr;
-       dnode_phys_t *dnp = bc->bc_dnode;
-       traverse_handle_t *th = za->za_th;
-       uint64_t size = BP_GET_LSIZE(bp);
-
-       /*
-        * Level -1 indicates the objset_phys_t or something in its intent log.
-        */
-       if (zb->zb_level == -1) {
-               if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
-                       ASSERT3U(zb->zb_object, ==, 0);
-                       ASSERT3U(zb->zb_blkid, ==, 0);
-                       ASSERT3U(size, ==, sizeof (objset_phys_t));
-                       za->za_zil_seq = 0;
-               } else if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) {
-                       ASSERT3U(zb->zb_object, ==, 0);
-                       ASSERT3U(zb->zb_blkid, >, za->za_zil_seq);
-                       za->za_zil_seq = zb->zb_blkid;
-               } else {
-                       ASSERT3U(zb->zb_object, !=, 0); /* lr_write_t */
-               }
-
-               return (0);
-       }
-
-       ASSERT(dnp != NULL);
-
-       if (bc->bc_errno)
-               return (ERESTART);
-
-       /*
-        * Once in a while, abort the traverse.   We only do this to odd
-        * instance numbers to ensure that even ones can run to completion.
-        */
-       if ((za->za_instance & 1) && ztest_random(10000) == 0)
-               return (EINTR);
-
-       if (bp->blk_birth == 0) {
-               ASSERT(th->th_advance & ADVANCE_HOLES);
-               return (0);
-       }
-
-       if (zb->zb_level == 0 && !(th->th_advance & ADVANCE_DATA) &&
-           bc == &th->th_cache[ZB_DN_CACHE][0]) {
-               ASSERT(bc->bc_data == NULL);
-               return (0);
-       }
-
-       ASSERT(bc->bc_data != NULL);
-
-       /*
-        * This is an expensive question, so don't ask it too often.
-        */
-       if (((za->za_random ^ th->th_callbacks) & 0xff) == 0) {
-               void *xbuf = umem_alloc(size, UMEM_NOFAIL);
-               if (arc_tryread(spa, bp, xbuf) == 0) {
-                       ASSERT(bcmp(bc->bc_data, xbuf, size) == 0);
-               }
-               umem_free(xbuf, size);
-       }
-
-       if (zb->zb_level > 0) {
-               ASSERT3U(size, ==, 1ULL << dnp->dn_indblkshift);
-               return (0);
-       }
-
-       ASSERT(zb->zb_level == 0);
-       ASSERT3U(size, ==, dnp->dn_datablkszsec << DEV_BSHIFT);
-
-       return (0);
-}
-
-/*
- * Verify that live pool traversal works.
- */
-void
-ztest_traverse(ztest_args_t *za)
-{
-       spa_t *spa = za->za_spa;
-       traverse_handle_t *th = za->za_th;
-       int rc, advance;
-       uint64_t cbstart, cblimit;
-
-       if (th == NULL) {
-               advance = 0;
-
-               if (ztest_random(2) == 0)
-                       advance |= ADVANCE_PRE;
-
-               if (ztest_random(2) == 0)
-                       advance |= ADVANCE_PRUNE;
-
-               if (ztest_random(2) == 0)
-                       advance |= ADVANCE_DATA;
-
-               if (ztest_random(2) == 0)
-                       advance |= ADVANCE_HOLES;
-
-               if (ztest_random(2) == 0)
-                       advance |= ADVANCE_ZIL;
-
-               th = za->za_th = traverse_init(spa, ztest_blk_cb, za, advance,
-                   ZIO_FLAG_CANFAIL);
-
-               traverse_add_pool(th, 0, -1ULL);
-       }
-
-       advance = th->th_advance;
-       cbstart = th->th_callbacks;
-       cblimit = cbstart + ((advance & ADVANCE_DATA) ? 100 : 1000);
-
-       while ((rc = traverse_more(th)) == EAGAIN && th->th_callbacks < cblimit)
-               continue;
-
-       if (zopt_verbose >= 5)
-               (void) printf("traverse %s%s%s%s %llu blocks to "
-                   "<%llu, %llu, %lld, %llx>%s\n",
-                   (advance & ADVANCE_PRE) ? "pre" : "post",
-                   (advance & ADVANCE_PRUNE) ? "|prune" : "",
-                   (advance & ADVANCE_DATA) ? "|data" : "",
-                   (advance & ADVANCE_HOLES) ? "|holes" : "",
-                   (u_longlong_t)(th->th_callbacks - cbstart),
-                   (u_longlong_t)th->th_lastcb.zb_objset,
-                   (u_longlong_t)th->th_lastcb.zb_object,
-                   (u_longlong_t)th->th_lastcb.zb_level,
-                   (u_longlong_t)th->th_lastcb.zb_blkid,
-                   rc == 0 ? " [done]" :
-                   rc == EINTR ? " [aborted]" :
-                   rc == EAGAIN ? "" :
-                   strerror(rc));
-
-       if (rc != EAGAIN) {
-               if (rc != 0 && rc != EINTR)
-                       fatal(0, "traverse_more(%p) = %d", th, rc);
-               traverse_fini(th);
-               za->za_th = NULL;
-       }
-}
-
 /*
  * Verify dsl_dataset_promote handles EBUSY
  */
@@ -3067,12 +2918,12 @@ ztest_verify_blocks(char *pool)
        isa = strdup(isa);
        /* LINTED */
        (void) sprintf(bin,
-           "/usr/sbin%.*s/zdb -bc%s%s -U /tmp/zpool.cache -O %s %s",
+           "/usr/sbin%.*s/zdb -bc%s%s -U /tmp/zpool.cache %s",
            isalen,
            isa,
            zopt_verbose >= 3 ? "s" : "",
            zopt_verbose >= 4 ? "v" : "",
-           ztest_random(2) == 0 ? "pre" : "post", pool);
+           pool);
        free(isa);
 
        if (zopt_verbose >= 5)
@@ -3438,8 +3289,6 @@ ztest_run(char *pool)
 
        while (--t >= 0) {
                VERIFY(thr_join(za[t].za_thread, NULL, NULL) == 0);
-               if (za[t].za_th)
-                       traverse_fini(za[t].za_th);
                if (t < zopt_datasets) {
                        zil_close(za[t].za_zilog);
                        dmu_objset_close(za[t].za_os);

Modified: head/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c  Thu May 13 
20:31:24 2010        (r208046)
+++ head/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c  Thu May 13 
20:32:56 2010        (r208047)
@@ -23,8 +23,6 @@
  * Use is subject to license terms.
  */
 
-#pragma ident  "%Z%%M% %I%     %E% SMI"
-
 #include <assert.h>
 #include <fcntl.h>
 #include <poll.h>
@@ -842,6 +840,8 @@ kernel_init(int mode)
        VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
        VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
 
+       system_taskq_init();
+
        spa_init(mode);
 }
 

Modified: head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h Thu May 
13 20:31:24 2010        (r208046)
+++ head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h Thu May 
13 20:32:56 2010        (r208047)
@@ -334,11 +334,14 @@ typedef void (task_func_t)(void *);
 #define        TQ_NOSLEEP      KM_NOSLEEP      /* cannot block for memory; may 
fail */
 #define        TQ_NOQUEUE      0x02    /* Do not enqueue if can't dispatch */
 
+extern taskq_t *system_taskq;
+
 extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
 extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
 extern void    taskq_destroy(taskq_t *);
 extern void    taskq_wait(taskq_t *);
 extern int     taskq_member(taskq_t *, void *);
+extern void    system_taskq_init(void);
 
 #define        XVA_MAPSIZE     3
 #define        XVA_MAGIC       0x78766174

Modified: head/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c   Thu May 13 
20:31:24 2010        (r208046)
+++ head/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c   Thu May 13 
20:32:56 2010        (r208047)
@@ -19,15 +19,14 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident  "%Z%%M% %I%     %E% SMI"
-
 #include <sys/zfs_context.h>
 
 int taskq_now;
+taskq_t *system_taskq;
 
 typedef struct task {
        struct task     *task_next;
@@ -253,3 +252,10 @@ taskq_member(taskq_t *tq, void *t)
 
        return (0);
 }
+
+void
+system_taskq_init(void)
+{
+       system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512,
+           TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
+}

Modified: head/sys/cddl/boot/zfs/zfsimpl.h
==============================================================================
--- head/sys/cddl/boot/zfs/zfsimpl.h    Thu May 13 20:31:24 2010        
(r208046)
+++ head/sys/cddl/boot/zfs/zfsimpl.h    Thu May 13 20:32:56 2010        
(r208047)
@@ -66,7 +66,7 @@
 #define        P2ROUNDUP(x, align)             (-(-(x) & -(align)))
 #define        P2END(x, align)                 (-(~(x) & -(align)))
 #define        P2PHASEUP(x, align, phase)      ((phase) - (((phase) - (x)) & 
-(align)))
-#define        P2CROSS(x, y, align)            (((x) ^ (y)) > (align) - 1)
+#define        P2BOUNDARY(off, len, align)     (((off) ^ ((off) + (len) - 1)) 
> (align) - 1)
 
 /*
  * General-purpose 32-bit and 64-bit bitfield encodings.

Modified: head/sys/cddl/compat/opensolaris/sys/sysmacros.h
==============================================================================
--- head/sys/cddl/compat/opensolaris/sys/sysmacros.h    Thu May 13 20:31:24 
2010        (r208046)
+++ head/sys/cddl/compat/opensolaris/sys/sysmacros.h    Thu May 13 20:32:56 
2010        (r208047)
@@ -43,6 +43,10 @@ extern "C" {
 #define        ABS(a)  ((a) < 0 ? -(a) : (a))
 #endif
 
+#ifndef        SIGNOF
+#define        SIGNOF(a)       ((a) < 0 ? -1 : (a) > 0)
+#endif
+
 /*
  * Macro for checking power of 2 address alignment.
  */
@@ -63,7 +67,7 @@ extern "C" {
 #define        P2ROUNDUP(x, align)             (-(-(x) & -(align)))
 #define        P2END(x, align)                 (-(~(x) & -(align)))
 #define        P2PHASEUP(x, align, phase)      ((phase) - (((phase) - (x)) & 
-(align)))
-#define        P2CROSS(x, y, align)            (((x) ^ (y)) > (align) - 1)
+#define        P2BOUNDARY(off, len, align)     (((off) ^ ((off) + (len) - 1)) 
> (align) - 1)
 /*
  * Determine whether two numbers have the same high-order bit.
  */

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c  Thu May 13 
20:31:24 2010        (r208046)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c  Thu May 13 
20:32:56 2010        (r208047)
@@ -308,20 +308,18 @@ dbuf_verify(dmu_buf_impl_t *db)
                ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size);
        }
 
-       if (db->db_level == 0) {
-               /* we can be momentarily larger in dnode_set_blksz() */
-               if (db->db_blkid != DB_BONUS_BLKID && dn) {
-                       ASSERT3U(db->db.db_size, >=, dn->dn_datablksz);
-               }
-               if (db->db.db_object == DMU_META_DNODE_OBJECT) {
-                       dbuf_dirty_record_t *dr = db->db_data_pending;
-                       /*
-                        * it should only be modified in syncing
-                        * context, so make sure we only have
-                        * one copy of the data.
-                        */
-                       ASSERT(dr == NULL || dr->dt.dl.dr_data == db->db_buf);
-               }
+       /*
+        * We can't assert that db_size matches dn_datablksz because it
+        * can be momentarily different when another thread is doing
+        * dnode_set_blksz().
+        */
+       if (db->db_level == 0 && db->db.db_object == DMU_META_DNODE_OBJECT) {
+               dbuf_dirty_record_t *dr = db->db_data_pending;
+               /*
+                * It should only be modified in syncing context, so
+                * make sure we only have one copy of the data.
+                */
+               ASSERT(dr == NULL || dr->dt.dl.dr_data == db->db_buf);
        }
 
        /* verify db->db_blkptr */

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c      Thu May 
13 20:31:24 2010        (r208046)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c      Thu May 
13 20:32:56 2010        (r208047)
@@ -23,8 +23,6 @@
  * Use is subject to license terms.
  */
 
-#pragma ident  "%Z%%M% %I%     %E% SMI"
-
 #include <sys/dmu.h>
 #include <sys/dmu_impl.h>
 #include <sys/dmu_tx.h>
@@ -172,66 +170,59 @@ dump_dnode(struct backuparg *ba, uint64_
        (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)))
 
 static int
-backup_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
+backup_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
+    const dnode_phys_t *dnp, void *arg)
 {
        struct backuparg *ba = arg;
-       uint64_t object = bc->bc_bookmark.zb_object;
-       int level = bc->bc_bookmark.zb_level;
-       uint64_t blkid = bc->bc_bookmark.zb_blkid;
-       blkptr_t *bp = bc->bc_blkptr.blk_birth ? &bc->bc_blkptr : NULL;
        dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE;
-       void *data = bc->bc_data;
        int err = 0;
 
        if (issig(JUSTLOOKING) && issig(FORREAL))
                return (EINTR);
 
-       ASSERT(data || bp == NULL);
-
-       if (bp == NULL && object == 0) {
-               uint64_t span = BP_SPAN(bc->bc_dnode, level);
-               uint64_t dnobj = (blkid * span) >> DNODE_SHIFT;
+       if (bp == NULL && zb->zb_object == 0) {
+               uint64_t span = BP_SPAN(dnp, zb->zb_level);
+               uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
                err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT);
        } else if (bp == NULL) {
-               uint64_t span = BP_SPAN(bc->bc_dnode, level);
-               err = dump_free(ba, object, blkid * span, span);
-       } else if (data && level == 0 && type == DMU_OT_DNODE) {
-               dnode_phys_t *blk = data;
+               uint64_t span = BP_SPAN(dnp, zb->zb_level);
+               err = dump_free(ba, zb->zb_object, zb->zb_blkid * span, span);
+       } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
+               return (0);
+       } else if (type == DMU_OT_DNODE) {
+               dnode_phys_t *blk;
                int i;
                int blksz = BP_GET_LSIZE(bp);
+               uint32_t aflags = ARC_WAIT;
+               arc_buf_t *abuf;
 
+               if (arc_read_nolock(NULL, spa, bp,
+                   arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
+                   ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
+                       return (EIO);
+
+               blk = abuf->b_data;
                for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
-                       uint64_t dnobj =
-                           (blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
+                       uint64_t dnobj = (zb->zb_blkid <<
+                           (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
                        err = dump_dnode(ba, dnobj, blk+i);
                        if (err)
                                break;
                }
-       } else if (level == 0 &&
-           type != DMU_OT_DNODE && type != DMU_OT_OBJSET) {
+               (void) arc_buf_remove_ref(abuf, &abuf);
+       } else { /* it's a level-0 block of a regular object */
+               uint32_t aflags = ARC_WAIT;
+               arc_buf_t *abuf;
                int blksz = BP_GET_LSIZE(bp);
-               if (data == NULL) {
-                       uint32_t aflags = ARC_WAIT;
-                       arc_buf_t *abuf;
-                       zbookmark_t zb;
-
-                       zb.zb_objset = ba->os->os->os_dsl_dataset->ds_object;
-                       zb.zb_object = object;
-                       zb.zb_level = level;
-                       zb.zb_blkid = blkid;
-                       (void) arc_read_nolock(NULL, spa, bp,
-                           arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
-                           ZIO_FLAG_MUSTSUCCEED, &aflags, &zb);
-
-                       if (abuf) {
-                               err = dump_data(ba, type, object, blkid * blksz,
-                                   blksz, abuf->b_data);
-                               (void) arc_buf_remove_ref(abuf, &abuf);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to