Author: avg
Date: Wed May 24 22:21:24 2017
New Revision: 318828
URL: https://svnweb.freebsd.org/changeset/base/318828

Log:
  MFV r316917: 7968 multi-threaded spa_sync()
  
  illumos/illumos-gate@94c2d0eb22e9624151ee84a7edbf7178e1bf4087
  
https://github.com/illumos/illumos-gate/commit/94c2d0eb22e9624151ee84a7edbf7178e1bf4087
  
  https://www.illumos.org/issues/7968
    spa_sync() iterates over all the dirty dnodes and processes each of them by
    calling dnode_sync(). If there are many dirty dnodes (e.g. because we 
created
    or removed a lot of files), the single thread of spa_sync() calling
    dnode_sync() can become a bottleneck. Additionally, if many dnodes are 
dirtied
    concurrently in open context (e.g. due to concurrent file creation), the
    os_lock will experience lock contention via dnode_setdirty().
    The solution is to track dirty dnodes on a multilist_t, and for spa_sync() 
to
    use separate threads to process each of the sublists in the multilist.
    On the concurrent file creation microbenchmark, the performance improvement
    from dnode_setdirty() is up to 7%. Additionally, the wall clock time spent 
in
    spa_sync() is reduced to 15%-40% of the single-threaded case. In terms of 
cost/
    reward, once the other bottlenecks are addressed, fixing this bug will 
provide
    a medium-large performance gain and require a medium amount of effort to
    implement.
  
  Reviewed by: Pavel Zakharov <pavel.zakha...@delphix.com>
  Reviewed by: Brad Lewis <brad.le...@delphix.com>
  Reviewed by: Saso Kiselkov <saso.kisel...@nexenta.com>
  Reviewed by: Brian Behlendorf <behlendo...@llnl.gov>
  Approved by: Dan McDonald <dan...@omniti.com>
  Author: Matthew Ahrens <mahr...@delphix.com>
  MFC after:    3 weeks

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/multilist.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/multilist.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
Directory Properties:
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c   Wed May 24 
22:15:16 2017        (r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c   Wed May 24 
22:21:24 2017        (r318828)
@@ -473,7 +473,7 @@ typedef struct arc_state {
        /*
         * list of evictable buffers
         */
-       multilist_t arcs_list[ARC_BUFC_NUMTYPES];
+       multilist_t *arcs_list[ARC_BUFC_NUMTYPES];
        /*
         * total amount of evictable data in this state
         */
@@ -2359,7 +2359,7 @@ add_reference(arc_buf_hdr_t *hdr, void *
            (state != arc_anon)) {
                /* We don't use the L2-only state list. */
                if (state != arc_l2c_only) {
-                       multilist_remove(&state->arcs_list[arc_buf_type(hdr)],
+                       multilist_remove(state->arcs_list[arc_buf_type(hdr)],
                            hdr);
                        arc_evictable_space_decrement(hdr, state);
                }
@@ -2389,7 +2389,7 @@ remove_reference(arc_buf_hdr_t *hdr, kmu
         */
        if (((cnt = refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) == 0) &&
            (state != arc_anon)) {
-               multilist_insert(&state->arcs_list[arc_buf_type(hdr)], hdr);
+               multilist_insert(state->arcs_list[arc_buf_type(hdr)], hdr);
                ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0);
                arc_evictable_space_increment(hdr, state);
        }
@@ -2442,7 +2442,7 @@ arc_change_state(arc_state_t *new_state,
        if (refcnt == 0) {
                if (old_state != arc_anon && old_state != arc_l2c_only) {
                        ASSERT(HDR_HAS_L1HDR(hdr));
-                       multilist_remove(&old_state->arcs_list[buftype], hdr);
+                       multilist_remove(old_state->arcs_list[buftype], hdr);
 
                        if (GHOST_STATE(old_state)) {
                                ASSERT0(bufcnt);
@@ -2460,7 +2460,7 @@ arc_change_state(arc_state_t *new_state,
                         * beforehand.
                         */
                        ASSERT(HDR_HAS_L1HDR(hdr));
-                       multilist_insert(&new_state->arcs_list[buftype], hdr);
+                       multilist_insert(new_state->arcs_list[buftype], hdr);
 
                        if (GHOST_STATE(new_state)) {
                                ASSERT0(bufcnt);
@@ -2586,8 +2586,8 @@ arc_change_state(arc_state_t *new_state,
         * L2 headers should never be on the L2 state list since they don't
         * have L1 headers allocated.
         */
-       ASSERT(multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]) &&
-           multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]));
+       ASSERT(multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_DATA]) &&
+           multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]));
 }
 
 void
@@ -3671,7 +3671,7 @@ arc_evict_state(arc_state_t *state, uint
     arc_buf_contents_t type)
 {
        uint64_t total_evicted = 0;
-       multilist_t *ml = &state->arcs_list[type];
+       multilist_t *ml = state->arcs_list[type];
        int num_sublists;
        arc_buf_hdr_t **markers;
 
@@ -3875,8 +3875,8 @@ arc_adjust_meta(void)
 static arc_buf_contents_t
 arc_adjust_type(arc_state_t *state)
 {
-       multilist_t *data_ml = &state->arcs_list[ARC_BUFC_DATA];
-       multilist_t *meta_ml = &state->arcs_list[ARC_BUFC_METADATA];
+       multilist_t *data_ml = state->arcs_list[ARC_BUFC_DATA];
+       multilist_t *meta_ml = state->arcs_list[ARC_BUFC_METADATA];
        int data_idx = multilist_get_random_index(data_ml);
        int meta_idx = multilist_get_random_index(meta_ml);
        multilist_sublist_t *data_mls;
@@ -6209,44 +6209,44 @@ arc_state_init(void)
        arc_mfu_ghost = &ARC_mfu_ghost;
        arc_l2c_only = &ARC_l2c_only;
 
-       multilist_create(&arc_mru->arcs_list[ARC_BUFC_METADATA],
-           sizeof (arc_buf_hdr_t),
+       arc_mru->arcs_list[ARC_BUFC_METADATA] =
+           multilist_create(sizeof (arc_buf_hdr_t),
            offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
            arc_state_multilist_index_func);
-       multilist_create(&arc_mru->arcs_list[ARC_BUFC_DATA],
-           sizeof (arc_buf_hdr_t),
+       arc_mru->arcs_list[ARC_BUFC_DATA] =
+           multilist_create(sizeof (arc_buf_hdr_t),
            offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
            arc_state_multilist_index_func);
-       multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA],
-           sizeof (arc_buf_hdr_t),
+       arc_mru_ghost->arcs_list[ARC_BUFC_METADATA] =
+           multilist_create(sizeof (arc_buf_hdr_t),
            offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
            arc_state_multilist_index_func);
-       multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA],
-           sizeof (arc_buf_hdr_t),
+       arc_mru_ghost->arcs_list[ARC_BUFC_DATA] =
+           multilist_create(sizeof (arc_buf_hdr_t),
            offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
            arc_state_multilist_index_func);
-       multilist_create(&arc_mfu->arcs_list[ARC_BUFC_METADATA],
-           sizeof (arc_buf_hdr_t),
+       arc_mfu->arcs_list[ARC_BUFC_METADATA] =
+           multilist_create(sizeof (arc_buf_hdr_t),
            offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
            arc_state_multilist_index_func);
-       multilist_create(&arc_mfu->arcs_list[ARC_BUFC_DATA],
-           sizeof (arc_buf_hdr_t),
+       arc_mfu->arcs_list[ARC_BUFC_DATA] =
+           multilist_create(sizeof (arc_buf_hdr_t),
            offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
            arc_state_multilist_index_func);
-       multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA],
-           sizeof (arc_buf_hdr_t),
+       arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA] =
+           multilist_create(sizeof (arc_buf_hdr_t),
            offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
            arc_state_multilist_index_func);
-       multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA],
-           sizeof (arc_buf_hdr_t),
+       arc_mfu_ghost->arcs_list[ARC_BUFC_DATA] =
+           multilist_create(sizeof (arc_buf_hdr_t),
            offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
            arc_state_multilist_index_func);
-       multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA],
-           sizeof (arc_buf_hdr_t),
+       arc_l2c_only->arcs_list[ARC_BUFC_METADATA] =
+           multilist_create(sizeof (arc_buf_hdr_t),
            offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
            arc_state_multilist_index_func);
-       multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA],
-           sizeof (arc_buf_hdr_t),
+       arc_l2c_only->arcs_list[ARC_BUFC_DATA] =
+           multilist_create(sizeof (arc_buf_hdr_t),
            offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
            arc_state_multilist_index_func);
 
@@ -6294,14 +6294,14 @@ arc_state_fini(void)
        refcount_destroy(&arc_mfu_ghost->arcs_size);
        refcount_destroy(&arc_l2c_only->arcs_size);
 
-       multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_METADATA]);
-       multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
-       multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_METADATA]);
-       multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]);
-       multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_DATA]);
-       multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
-       multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_DATA]);
-       multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
+       multilist_destroy(arc_mru->arcs_list[ARC_BUFC_METADATA]);
+       multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
+       multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_METADATA]);
+       multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]);
+       multilist_destroy(arc_mru->arcs_list[ARC_BUFC_DATA]);
+       multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
+       multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_DATA]);
+       multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
 }
 
 uint64_t
@@ -7098,16 +7098,16 @@ l2arc_sublist_lock(int list_num)
 
        switch (list_num) {
        case 0:
-               ml = &arc_mfu->arcs_list[ARC_BUFC_METADATA];
+               ml = arc_mfu->arcs_list[ARC_BUFC_METADATA];
                break;
        case 1:
-               ml = &arc_mru->arcs_list[ARC_BUFC_METADATA];
+               ml = arc_mru->arcs_list[ARC_BUFC_METADATA];
                break;
        case 2:
-               ml = &arc_mfu->arcs_list[ARC_BUFC_DATA];
+               ml = arc_mfu->arcs_list[ARC_BUFC_DATA];
                break;
        case 3:
-               ml = &arc_mru->arcs_list[ARC_BUFC_DATA];
+               ml = arc_mru->arcs_list[ARC_BUFC_DATA];
                break;
        }
 

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c  Wed May 24 
22:15:16 2017        (r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c  Wed May 24 
22:21:24 2017        (r318828)
@@ -21,7 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@@ -80,7 +80,7 @@ static boolean_t dbuf_evict_thread_exit;
  * Dbufs that are aged out of the cache will be immediately destroyed and
  * become eligible for arc eviction.
  */
-static multilist_t dbuf_cache;
+static multilist_t *dbuf_cache;
 static refcount_t dbuf_cache_size;
 uint64_t dbuf_cache_max_bytes = 100 * 1024 * 1024;
 
@@ -454,8 +454,8 @@ dbuf_cache_above_lowater(void)
 static void
 dbuf_evict_one(void)
 {
-       int idx = multilist_get_random_index(&dbuf_cache);
-       multilist_sublist_t *mls = multilist_sublist_lock(&dbuf_cache, idx);
+       int idx = multilist_get_random_index(dbuf_cache);
+       multilist_sublist_t *mls = multilist_sublist_lock(dbuf_cache, idx);
 
        ASSERT(!MUTEX_HELD(&dbuf_evict_lock));
 
@@ -621,7 +621,7 @@ retry:
         */
        dbu_evict_taskq = taskq_create("dbu_evict", 1, minclsyspri, 0, 0, 0);
 
-       multilist_create(&dbuf_cache, sizeof (dmu_buf_impl_t),
+       dbuf_cache = multilist_create(sizeof (dmu_buf_impl_t),
            offsetof(dmu_buf_impl_t, db_cache_link),
            dbuf_cache_multilist_index_func);
        refcount_create(&dbuf_cache_size);
@@ -659,7 +659,7 @@ dbuf_fini(void)
        cv_destroy(&dbuf_evict_cv);
 
        refcount_destroy(&dbuf_cache_size);
-       multilist_destroy(&dbuf_cache);
+       multilist_destroy(dbuf_cache);
 }
 
 /*
@@ -2029,7 +2029,7 @@ dbuf_destroy(dmu_buf_impl_t *db)
        dbuf_clear_data(db);
 
        if (multilist_link_active(&db->db_cache_link)) {
-               multilist_remove(&dbuf_cache, db);
+               multilist_remove(dbuf_cache, db);
                (void) refcount_remove_many(&dbuf_cache_size,
                    db->db.db_size, db);
        }
@@ -2577,7 +2577,7 @@ top:
 
        if (multilist_link_active(&db->db_cache_link)) {
                ASSERT(refcount_is_zero(&db->db_holds));
-               multilist_remove(&dbuf_cache, db);
+               multilist_remove(dbuf_cache, db);
                (void) refcount_remove_many(&dbuf_cache_size,
                    db->db.db_size, db);
        }
@@ -2796,7 +2796,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db,
                            db->db_pending_evict) {
                                dbuf_destroy(db);
                        } else if (!multilist_link_active(&db->db_cache_link)) {
-                               multilist_insert(&dbuf_cache, db);
+                               multilist_insert(dbuf_cache, db);
                                (void) refcount_add_many(&dbuf_cache_size,
                                    db->db.db_size, db);
                                mutex_exit(&db->db_mtx);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c    Wed May 
24 22:15:16 2017        (r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c    Wed May 
24 22:21:24 2017        (r318828)
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@@ -303,6 +303,42 @@ dmu_objset_byteswap(void *buf, size_t si
        }
 }
 
+/*
+ * The hash is a CRC-based hash of the objset_t pointer and the object number.
+ */
+static uint64_t
+dnode_hash(const objset_t *os, uint64_t obj)
+{
+       uintptr_t osv = (uintptr_t)os;
+       uint64_t crc = -1ULL;
+
+       ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
+       /*
+        * The low 6 bits of the pointer don't have much entropy, because
+        * the objset_t is larger than 2^6 bytes long.
+        */
+       crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (osv >> 6)) & 0xFF];
+       crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 0)) & 0xFF];
+       crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 8)) & 0xFF];
+       crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 16)) & 0xFF];
+
+       crc ^= (osv>>14) ^ (obj>>24);
+
+       return (crc);
+}
+
+unsigned int
+dnode_multilist_index_func(multilist_t *ml, void *obj)
+{
+       dnode_t *dn = obj;
+       return (dnode_hash(dn->dn_objset, dn->dn_object) %
+           multilist_get_num_sublists(ml));
+}
+
+/*
+ * Instantiates the objset_t in-memory structure corresponding to the
+ * objset_phys_t that's pointed to by the specified blkptr_t.
+ */
 int
 dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
     objset_t **osp)
@@ -454,10 +490,9 @@ dmu_objset_open_impl(spa_t *spa, dsl_dat
        os->os_zil = zil_alloc(os, &os->os_zil_header);
 
        for (i = 0; i < TXG_SIZE; i++) {
-               list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
-                   offsetof(dnode_t, dn_dirty_link[i]));
-               list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
-                   offsetof(dnode_t, dn_dirty_link[i]));
+               os->os_dirty_dnodes[i] = multilist_create(sizeof (dnode_t),
+                   offsetof(dnode_t, dn_dirty_link[i]),
+                   dnode_multilist_index_func);
        }
        list_create(&os->os_dnodes, sizeof (dnode_t),
            offsetof(dnode_t, dn_link));
@@ -465,6 +500,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dat
            offsetof(dmu_buf_impl_t, db_link));
 
        mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
+       mutex_init(&os->os_userused_lock, NULL, MUTEX_DEFAULT, NULL);
        mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
        mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
 
@@ -748,8 +784,12 @@ dmu_objset_evict_done(objset_t *os)
        rw_exit(&os_lock);
 
        mutex_destroy(&os->os_lock);
+       mutex_destroy(&os->os_userused_lock);
        mutex_destroy(&os->os_obj_lock);
        mutex_destroy(&os->os_user_ptr_lock);
+       for (int i = 0; i < TXG_SIZE; i++) {
+               multilist_destroy(os->os_dirty_dnodes[i]);
+       }
        spa_evicting_os_deregister(os->os_spa, os);
        kmem_free(os, sizeof (objset_t));
 }
@@ -1027,11 +1067,11 @@ dmu_objset_snapshot_one(const char *fsna
 }
 
 static void
-dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)
+dmu_objset_sync_dnodes(multilist_sublist_t *list, dmu_tx_t *tx)
 {
        dnode_t *dn;
 
-       while (dn = list_head(list)) {
+       while ((dn = multilist_sublist_head(list)) != NULL) {
                ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
                ASSERT(dn->dn_dbuf->db_data_pending);
                /*
@@ -1042,11 +1082,12 @@ dmu_objset_sync_dnodes(list_t *list, lis
                ASSERT(dn->dn_zio);
 
                ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
-               list_remove(list, dn);
+               multilist_sublist_remove(list, dn);
 
-               if (newlist) {
+               multilist_t *newlist = dn->dn_objset->os_synced_dnodes;
+               if (newlist != NULL) {
                        (void) dnode_add_ref(dn, newlist);
-                       list_insert_tail(newlist, dn);
+                       multilist_insert(newlist, dn);
                }
 
                dnode_sync(dn, tx);
@@ -1101,6 +1142,29 @@ dmu_objset_write_done(zio_t *zio, arc_bu
        kmem_free(bp, sizeof (*bp));
 }
 
+typedef struct sync_dnodes_arg {
+       multilist_t *sda_list;
+       int sda_sublist_idx;
+       multilist_t *sda_newlist;
+       dmu_tx_t *sda_tx;
+} sync_dnodes_arg_t;
+
+static void
+sync_dnodes_task(void *arg)
+{
+       sync_dnodes_arg_t *sda = arg;
+
+       multilist_sublist_t *ms =
+           multilist_sublist_lock(sda->sda_list, sda->sda_sublist_idx);
+
+       dmu_objset_sync_dnodes(ms, sda->sda_tx);
+
+       multilist_sublist_unlock(ms);
+
+       kmem_free(sda, sizeof (*sda));
+}
+
+
 /* called from dsl */
 void
 dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
@@ -1110,7 +1174,6 @@ dmu_objset_sync(objset_t *os, zio_t *pio
        zio_prop_t zp;
        zio_t *zio;
        list_t *list;
-       list_t *newlist = NULL;
        dbuf_dirty_record_t *dr;
        blkptr_t *blkptr_copy = kmem_alloc(sizeof (*os->os_rootbp), KM_SLEEP);
        *blkptr_copy = *os->os_rootbp;
@@ -1164,20 +1227,36 @@ dmu_objset_sync(objset_t *os, zio_t *pio
        txgoff = tx->tx_txg & TXG_MASK;
 
        if (dmu_objset_userused_enabled(os)) {
-               newlist = &os->os_synced_dnodes;
                /*
                 * We must create the list here because it uses the
-                * dn_dirty_link[] of this txg.
+                * dn_dirty_link[] of this txg.  But it may already
+                * exist because we call dsl_dataset_sync() twice per txg.
                 */
-               list_create(newlist, sizeof (dnode_t),
-                   offsetof(dnode_t, dn_dirty_link[txgoff]));
+               if (os->os_synced_dnodes == NULL) {
+                       os->os_synced_dnodes =
+                           multilist_create(sizeof (dnode_t),
+                           offsetof(dnode_t, dn_dirty_link[txgoff]),
+                           dnode_multilist_index_func);
+               } else {
+                       ASSERT3U(os->os_synced_dnodes->ml_offset, ==,
+                           offsetof(dnode_t, dn_dirty_link[txgoff]));
+               }
        }
 
-       dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx);
-       dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx);
+       for (int i = 0;
+           i < multilist_get_num_sublists(os->os_dirty_dnodes[txgoff]); i++) {
+               sync_dnodes_arg_t *sda = kmem_alloc(sizeof (*sda), KM_SLEEP);
+               sda->sda_list = os->os_dirty_dnodes[txgoff];
+               sda->sda_sublist_idx = i;
+               sda->sda_tx = tx;
+               (void) taskq_dispatch(dmu_objset_pool(os)->dp_sync_taskq,
+                   sync_dnodes_task, sda, 0);
+               /* callback frees sda */
+       }
+       taskq_wait(dmu_objset_pool(os)->dp_sync_taskq);
 
        list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
-       while (dr = list_head(list)) {
+       while ((dr = list_head(list)) != NULL) {
                ASSERT0(dr->dr_dbuf->db_level);
                list_remove(list, dr);
                if (dr->dr_zio)
@@ -1201,8 +1280,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio
 boolean_t
 dmu_objset_is_dirty(objset_t *os, uint64_t txg)
 {
-       return (!list_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]) ||
-           !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK]));
+       return (!multilist_is_empty(os->os_dirty_dnodes[txg & TXG_MASK]));
 }
 
 static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
@@ -1256,8 +1334,15 @@ do_userquota_cacheflush(objset_t *os, us
        cookie = NULL;
        while ((uqn = avl_destroy_nodes(&cache->uqc_user_deltas,
            &cookie)) != NULL) {
+               /*
+                * os_userused_lock protects against concurrent calls to
+                * zap_increment_int().  It's needed because zap_increment_int()
+                * is not thread-safe (i.e. not atomic).
+                */
+               mutex_enter(&os->os_userused_lock);
                VERIFY0(zap_increment_int(os, DMU_USERUSED_OBJECT,
                    uqn->uqn_id, uqn->uqn_delta, tx));
+               mutex_exit(&os->os_userused_lock);
                kmem_free(uqn, sizeof (*uqn));
        }
        avl_destroy(&cache->uqc_user_deltas);
@@ -1265,8 +1350,10 @@ do_userquota_cacheflush(objset_t *os, us
        cookie = NULL;
        while ((uqn = avl_destroy_nodes(&cache->uqc_group_deltas,
            &cookie)) != NULL) {
+               mutex_enter(&os->os_userused_lock);
                VERIFY0(zap_increment_int(os, DMU_GROUPUSED_OBJECT,
                    uqn->uqn_id, uqn->uqn_delta, tx));
+               mutex_exit(&os->os_userused_lock);
                kmem_free(uqn, sizeof (*uqn));
        }
        avl_destroy(&cache->uqc_group_deltas);
@@ -1301,37 +1388,38 @@ do_userquota_update(userquota_cache_t *c
        }
 }
 
-void
-dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
+typedef struct userquota_updates_arg {
+       objset_t *uua_os;
+       int uua_sublist_idx;
+       dmu_tx_t *uua_tx;
+} userquota_updates_arg_t;
+
+static void
+userquota_updates_task(void *arg)
 {
+       userquota_updates_arg_t *uua = arg;
+       objset_t *os = uua->uua_os;
+       dmu_tx_t *tx = uua->uua_tx;
        dnode_t *dn;
-       list_t *list = &os->os_synced_dnodes;
        userquota_cache_t cache = { 0 };
 
-       ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os));
+       multilist_sublist_t *list =
+           multilist_sublist_lock(os->os_synced_dnodes, uua->uua_sublist_idx);
 
+       ASSERT(multilist_sublist_head(list) == NULL ||
+           dmu_objset_userused_enabled(os));
        avl_create(&cache.uqc_user_deltas, userquota_compare,
            sizeof (userquota_node_t), offsetof(userquota_node_t, uqn_node));
        avl_create(&cache.uqc_group_deltas, userquota_compare,
            sizeof (userquota_node_t), offsetof(userquota_node_t, uqn_node));
 
-       while (dn = list_head(list)) {
+       while ((dn = multilist_sublist_head(list)) != NULL) {
                int flags;
                ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object));
                ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE ||
                    dn->dn_phys->dn_flags &
                    DNODE_FLAG_USERUSED_ACCOUNTED);
 
-               /* Allocate the user/groupused objects if necessary. */
-               if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
-                       VERIFY0(zap_create_claim(os,
-                           DMU_USERUSED_OBJECT,
-                           DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
-                       VERIFY0(zap_create_claim(os,
-                           DMU_GROUPUSED_OBJECT,
-                           DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
-               }
-
                flags = dn->dn_id_flags;
                ASSERT(flags);
                if (flags & DN_ID_OLD_EXIST)  {
@@ -1361,10 +1449,42 @@ dmu_objset_do_userquota_updates(objset_t
                dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
                mutex_exit(&dn->dn_mtx);
 
-               list_remove(list, dn);
-               dnode_rele(dn, list);
+               multilist_sublist_remove(list, dn);
+               dnode_rele(dn, os->os_synced_dnodes);
        }
        do_userquota_cacheflush(os, &cache, tx);
+       multilist_sublist_unlock(list);
+       kmem_free(uua, sizeof (*uua));
+}
+
+void
+dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
+{
+       if (!dmu_objset_userused_enabled(os))
+               return;
+
+       /* Allocate the user/groupused objects if necessary. */
+       if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
+               VERIFY0(zap_create_claim(os,
+                   DMU_USERUSED_OBJECT,
+                   DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
+               VERIFY0(zap_create_claim(os,
+                   DMU_GROUPUSED_OBJECT,
+                   DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
+       }
+
+       for (int i = 0;
+           i < multilist_get_num_sublists(os->os_synced_dnodes); i++) {
+               userquota_updates_arg_t *uua =
+                   kmem_alloc(sizeof (*uua), KM_SLEEP);
+               uua->uua_os = os;
+               uua->uua_sublist_idx = i;
+               uua->uua_tx = tx;
+               /* note: caller does taskq_wait() */
+               (void) taskq_dispatch(dmu_objset_pool(os)->dp_sync_taskq,
+                   userquota_updates_task, uua, 0);
+               /* callback frees uua */
+       }
 }
 
 /*

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c Wed May 24 
22:15:16 2017        (r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c Wed May 24 
22:21:24 2017        (r318828)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  */
@@ -1287,13 +1287,14 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx
         */
        dmu_objset_userquota_get_ids(dn, B_TRUE, tx);
 
-       mutex_enter(&os->os_lock);
+       multilist_t *dirtylist = os->os_dirty_dnodes[txg & TXG_MASK];
+       multilist_sublist_t *mls = multilist_sublist_lock_obj(dirtylist, dn);
 
        /*
         * If we are already marked dirty, we're done.
         */
        if (list_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) {
-               mutex_exit(&os->os_lock);
+               multilist_sublist_unlock(mls);
                return;
        }
 
@@ -1307,13 +1308,9 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx
        dprintf_ds(os->os_dsl_dataset, "obj=%llu txg=%llu\n",
            dn->dn_object, txg);
 
-       if (dn->dn_free_txg > 0 && dn->dn_free_txg <= txg) {
-               list_insert_tail(&os->os_free_dnodes[txg&TXG_MASK], dn);
-       } else {
-               list_insert_tail(&os->os_dirty_dnodes[txg&TXG_MASK], dn);
-       }
+       multilist_sublist_insert_head(mls, dn);
 
-       mutex_exit(&os->os_lock);
+       multilist_sublist_unlock(mls);
 
        /*
         * The dnode maintains a hold on its containing dbuf as
@@ -1334,13 +1331,6 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx
 void
 dnode_free(dnode_t *dn, dmu_tx_t *tx)
 {
-       int txgoff = tx->tx_txg & TXG_MASK;
-
-       dprintf("dn=%p txg=%llu\n", dn, tx->tx_txg);
-
-       /* we should be the only holder... hopefully */
-       /* ASSERT3U(refcount_count(&dn->dn_holds), ==, 1); */
-
        mutex_enter(&dn->dn_mtx);
        if (dn->dn_type == DMU_OT_NONE || dn->dn_free_txg) {
                mutex_exit(&dn->dn_mtx);
@@ -1349,19 +1339,7 @@ dnode_free(dnode_t *dn, dmu_tx_t *tx)
        dn->dn_free_txg = tx->tx_txg;
        mutex_exit(&dn->dn_mtx);
 
-       /*
-        * If the dnode is already dirty, it needs to be moved from
-        * the dirty list to the free list.
-        */
-       mutex_enter(&dn->dn_objset->os_lock);
-       if (list_link_active(&dn->dn_dirty_link[txgoff])) {
-               list_remove(&dn->dn_objset->os_dirty_dnodes[txgoff], dn);
-               list_insert_tail(&dn->dn_objset->os_free_dnodes[txgoff], dn);
-               mutex_exit(&dn->dn_objset->os_lock);
-       } else {
-               mutex_exit(&dn->dn_objset->os_lock);
-               dnode_setdirty(dn, tx);
-       }
+       dnode_setdirty(dn, tx);
 }
 
 /*

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c    Wed May 
24 22:15:16 2017        (r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c    Wed May 
24 22:21:24 2017        (r318828)
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  */
 

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c   Wed May 
24 22:15:16 2017        (r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c   Wed May 
24 22:21:24 2017        (r318828)
@@ -1740,6 +1740,11 @@ dsl_dataset_sync_done(dsl_dataset_t *ds,
        bplist_iterate(&ds->ds_pending_deadlist,
            deadlist_enqueue_cb, &ds->ds_deadlist, tx);
 
+       if (os->os_synced_dnodes != NULL) {
+               multilist_destroy(os->os_synced_dnodes);
+               os->os_synced_dnodes = NULL;
+       }
+
        ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx)));
 
        dmu_buf_rele(ds->ds_dbuf, ds);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c      Wed May 
24 22:15:16 2017        (r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c      Wed May 
24 22:21:24 2017        (r318828)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
@@ -132,6 +132,10 @@ int zfs_delay_min_dirty_percent = 60;
  */
 uint64_t zfs_delay_scale = 1000 * 1000 * 1000 / 2000;
 
+/*
+ * This determines the number of threads used by the dp_sync_taskq.
+ */
+int zfs_sync_taskq_batch_pct = 75;
 
 #if defined(__FreeBSD__) && defined(_KERNEL)
 
@@ -267,6 +271,10 @@ dsl_pool_open_impl(spa_t *spa, uint64_t 
        txg_list_create(&dp->dp_sync_tasks,
            offsetof(dsl_sync_task_t, dst_node));
 
+       dp->dp_sync_taskq = taskq_create("dp_sync_taskq",
+           zfs_sync_taskq_batch_pct, minclsyspri, 1, INT_MAX,
+           TASKQ_THREADS_CPU_PCT);
+
        mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
        cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);
 
@@ -417,6 +425,8 @@ dsl_pool_close(dsl_pool_t *dp)
        txg_list_destroy(&dp->dp_sync_tasks);
        txg_list_destroy(&dp->dp_dirty_dirs);
 
+       taskq_destroy(dp->dp_sync_taskq);
+
        /*
         * We can't set retry to TRUE since we're explicitly specifying
         * a spa to flush. This is good enough; any missed buffers for
@@ -605,12 +615,15 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t t
 
        /*
         * After the data blocks have been written (ensured by the zio_wait()
-        * above), update the user/group space accounting.
+        * above), update the user/group space accounting.  This happens
+        * in tasks dispatched to dp_sync_taskq, so wait for them before
+        * continuing.
         */
        for (ds = list_head(&synced_datasets); ds != NULL;
            ds = list_next(&synced_datasets, ds)) {
                dmu_objset_do_userquota_updates(ds->ds_objset, tx);
        }
+       taskq_wait(dp->dp_sync_taskq);
 
        /*
         * Sync the datasets again to push out the changes due to
@@ -657,8 +670,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t t
                dp->dp_mos_uncompressed_delta = 0;
        }
 
-       if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
-           list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) {
+       if (!multilist_is_empty(mos->os_dirty_dnodes[txg & TXG_MASK])) {
                dsl_pool_sync_mos(dp, tx);
        }
 
@@ -716,7 +728,8 @@ int
 dsl_pool_sync_context(dsl_pool_t *dp)
 {
        return (curthread == dp->dp_tx.tx_sync_thread ||
-           spa_is_initializing(dp->dp_spa));
+           spa_is_initializing(dp->dp_spa) ||
+           taskq_member(dp->dp_sync_taskq, curthread));
 }
 
 uint64_t

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/multilist.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/multilist.c     Wed May 
24 22:15:16 2017        (r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/multilist.c     Wed May 
24 22:21:24 2017        (r318828)
@@ -65,16 +65,16 @@ multilist_d2l(multilist_t *ml, void *obj
  *     requirement, but a general rule of thumb in order to garner the
  *     best multi-threaded performance out of the data structure.
  */
-static void
-multilist_create_impl(multilist_t *ml, size_t size, size_t offset,
+static multilist_t *
+multilist_create_impl(size_t size, size_t offset,
     unsigned int num, multilist_sublist_index_func_t *index_func)
 {
-       ASSERT3P(ml, !=, NULL);
        ASSERT3U(size, >, 0);
        ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
        ASSERT3U(num, >, 0);
        ASSERT3P(index_func, !=, NULL);
 
+       multilist_t *ml = kmem_alloc(sizeof (*ml), KM_SLEEP);
        ml->ml_offset = offset;
        ml->ml_num_sublists = num;
        ml->ml_index_func = index_func;
@@ -89,15 +89,16 @@ multilist_create_impl(multilist_t *ml, s
                mutex_init(&mls->mls_lock, NULL, MUTEX_DEFAULT, NULL);
                list_create(&mls->mls_list, size, offset);
        }
+       return (ml);
 }
 
 /*
- * Initialize a new sublist, using the default number of sublists
+ * Allocate a new multilist, using the default number of sublists
  * (the number of CPUs, or at least 4, or the tunable
  * zfs_multilist_num_sublists).
  */
-void
-multilist_create(multilist_t *ml, size_t size, size_t offset,
+multilist_t *
+multilist_create(size_t size, size_t offset,
     multilist_sublist_index_func_t *index_func)
 {
        int num_sublists;
@@ -108,7 +109,7 @@ multilist_create(multilist_t *ml, size_t
                num_sublists = MAX(max_ncpus, 4);
        }
 
-       multilist_create_impl(ml, size, offset, num_sublists, index_func);
+       return (multilist_create_impl(size, offset, num_sublists, index_func));
 }
 
 /*
@@ -134,6 +135,7 @@ multilist_destroy(multilist_t *ml)
 
        ml->ml_num_sublists = 0;
        ml->ml_offset = 0;
+       kmem_free(ml, sizeof (multilist_t));
 }
 
 /*
@@ -285,6 +287,13 @@ multilist_sublist_lock(multilist_t *ml, 
        return (mls);
 }
 
+/* Lock and return the sublist that would be used to store the specified obj */
+multilist_sublist_t *
+multilist_sublist_lock_obj(multilist_t *ml, void *obj)
+{
+       return (multilist_sublist_lock(ml, ml->ml_index_func(ml, obj)));
+}
+
 void
 multilist_sublist_unlock(multilist_sublist_t *mls)
 {

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c      Wed May 
24 22:15:16 2017        (r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c      Wed May 
24 22:21:24 2017        (r318828)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  * Copyright 2013 Martin Matuska <m...@freebsd.org>. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@@ -740,7 +740,7 @@ spa_add(const char *name, nvlist_t *conf
                spa_active_count++;
        }
 
-       avl_create(&spa->spa_alloc_tree, zio_timestamp_compare,
+       avl_create(&spa->spa_alloc_tree, zio_bookmark_compare,
            sizeof (zio_t), offsetof(zio_t, io_alloc_node));
 
        /*

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h        
Wed May 24 22:15:16 2017        (r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h        
Wed May 24 22:21:24 2017        (r318828)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
@@ -110,7 +110,7 @@ struct objset {
        /* no lock needed: */
        struct dmu_tx *os_synctx; /* XXX sketchy */
        zil_header_t os_zil_header;
-       list_t os_synced_dnodes;
+       multilist_t *os_synced_dnodes;
        uint64_t os_flags;
        uint64_t os_freed_dnodes;
        boolean_t os_rescan_dnodes;
@@ -121,11 +121,13 @@ struct objset {
 
        /* Protected by os_lock */
        kmutex_t os_lock;
-       list_t os_dirty_dnodes[TXG_SIZE];
-       list_t os_free_dnodes[TXG_SIZE];
+       multilist_t *os_dirty_dnodes[TXG_SIZE];
        list_t os_dnodes;
        list_t os_downgraded_dbufs;
 
+       /* Protects changes to DMU_{USER,GROUP}USED_OBJECT */
+       kmutex_t os_userused_lock;
+
        /* stuff we store for the user */
        kmutex_t os_user_ptr_lock;
        void *os_user_ptr;

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h     Wed May 
24 22:15:16 2017        (r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h     Wed May 
24 22:21:24 2017        (r318828)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  */
 
@@ -35,6 +35,7 @@
 #include <sys/refcount.h>
 #include <sys/dmu_zfetch.h>
 #include <sys/zrlock.h>
+#include <sys/multilist.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -203,7 +204,7 @@ struct dnode {
        uint32_t dn_dbufs_count;        /* count of dn_dbufs */
 
        /* protected by os_lock: */
-       list_node_t dn_dirty_link[TXG_SIZE];    /* next on dataset's dirty */
+       multilist_node_t dn_dirty_link[TXG_SIZE]; /* next on dataset's dirty */
 
        /* protected by dn_mtx: */
        kmutex_t dn_mtx;

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h  Wed May 
24 22:15:16 2017        (r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h  Wed May 
24 22:21:24 2017        (r318828)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  */
 
@@ -121,6 +121,7 @@ typedef struct dsl_pool {
        txg_list_t dp_dirty_zilogs;
        txg_list_t dp_dirty_dirs;
        txg_list_t dp_sync_tasks;
+       taskq_t *dp_sync_taskq;
 
        /*
         * Protects administrative changes (properties, namespace)

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/multilist.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/multilist.h Wed May 
24 22:15:16 2017        (r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/multilist.h Wed May 
24 22:21:24 2017        (r318828)
@@ -73,8 +73,7 @@ struct multilist {
 };
 
 void multilist_destroy(multilist_t *);
-void multilist_create(multilist_t *, size_t, size_t,
-    multilist_sublist_index_func_t *);
+multilist_t *multilist_create(size_t, size_t, multilist_sublist_index_func_t 
*);
 
 void multilist_insert(multilist_t *, void *);
 void multilist_remove(multilist_t *, void *);
@@ -84,6 +83,7 @@ unsigned int multilist_get_num_sublists(
 unsigned int multilist_get_random_index(multilist_t *);
 
 multilist_sublist_t *multilist_sublist_lock(multilist_t *, unsigned int);
+multilist_sublist_t *multilist_sublist_lock_obj(multilist_t *, void *);
 void multilist_sublist_unlock(multilist_sublist_t *);
 
 void multilist_sublist_insert_head(multilist_sublist_t *, void *);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h       Wed May 
24 22:15:16 2017        (r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h       Wed May 
24 22:21:24 2017        (r318828)
@@ -22,7 +22,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright 2016 Toomas Soome <tso...@me.com>
  */
@@ -485,7 +485,7 @@ struct zio {
        list_node_t     io_trim_link;
 };
 
-extern int zio_timestamp_compare(const void *, const void *);
+extern int zio_bookmark_compare(const void *, const void *);
 
 extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
     zio_done_func_t *done, void *priv, enum zio_flag flags);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c   Wed May 24 
22:15:16 2017        (r318827)

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to