Author: mav
Date: Sat Oct  3 07:22:24 2015
New Revision: 288541
URL: https://svnweb.freebsd.org/changeset/base/288541

Log:
  MFC r286545:
  5630 stale bonus buffer in recycled dnode_t leads to data corruption
  
  Reviewed by: Matthew Ahrens <mahr...@delphix.com>
  Reviewed by: George Wilson <geo...@delphix.com>
  Reviewed by: Will Andrews <w...@freebsd.org>
  Approved by: Robert Mustacchi <r...@joyent.com>
  Author: Justin T. Gibbs <just...@spectralogic.com>

Modified:
  stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
  stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
  stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
  stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c     Sat Oct 
 3 07:22:07 2015        (r288540)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c     Sat Oct 
 3 07:22:24 2015        (r288541)
@@ -2128,21 +2128,60 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db,
 
        if (holds == 0) {
                if (db->db_blkid == DMU_BONUS_BLKID) {
-                       mutex_exit(&db->db_mtx);
+                       dnode_t *dn;
 
                        /*
-                        * If the dnode moves here, we cannot cross this barrier
-                        * until the move completes.
+                        * If the dnode moves here, we cannot cross this
+                        * barrier until the move completes.
                         */
                        DB_DNODE_ENTER(db);
-                       atomic_dec_32(&DB_DNODE(db)->dn_dbufs_count);
+
+                       dn = DB_DNODE(db);
+                       atomic_dec_32(&dn->dn_dbufs_count);
+
+                       /*
+                        * Decrementing the dbuf count means that the bonus
+                        * buffer's dnode hold is no longer discounted in
+                        * dnode_move(). The dnode cannot move until after
+                        * the dnode_rele_and_unlock() below.
+                        */
                        DB_DNODE_EXIT(db);
+
                        /*
-                        * The bonus buffer's dnode hold is no longer discounted
-                        * in dnode_move(). The dnode cannot move until after
-                        * the dnode_rele().
+                        * Do not reference db after its lock is dropped.
+                        * Another thread may evict it.
                         */
-                       dnode_rele(DB_DNODE(db), db);
+                       mutex_exit(&db->db_mtx);
+
+                       /*
+                        * If the dnode has been freed, evict the bonus
+                        * buffer immediately.  The data in the bonus
+                        * buffer is no longer relevant and this prevents
+                        * a stale bonus buffer from being associated
+                        * with this dnode_t should the dnode_t be reused
+                        * prior to being destroyed.
+                        */
+                       mutex_enter(&dn->dn_mtx);
+                       if (dn->dn_type == DMU_OT_NONE ||
+                           dn->dn_free_txg != 0) {
+                               /*
+                                * Drop dn_mtx.  It is a leaf lock and
+                                * cannot be held when dnode_evict_bonus()
+                                * acquires other locks in order to
+                                * perform the eviction.
+                                *
+                                * Freed dnodes cannot be reused until the
+                                * last hold is released.  Since this bonus
+                                * buffer has a hold, the dnode will remain
+                                * in the free state, even without dn_mtx
+                                * held, until the dnode_rele_and_unlock()
+                                * below.
+                                */
+                               mutex_exit(&dn->dn_mtx);
+                               dnode_evict_bonus(dn);
+                               mutex_enter(&dn->dn_mtx);
+                       }
+                       dnode_rele_and_unlock(dn, db);
                } else if (db->db_buf == NULL) {
                        /*
                         * This is a special case: we never associated this

Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c    Sat Oct 
 3 07:22:07 2015        (r288540)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c    Sat Oct 
 3 07:22:24 2015        (r288541)
@@ -1205,12 +1205,18 @@ dnode_add_ref(dnode_t *dn, void *tag)
 void
 dnode_rele(dnode_t *dn, void *tag)
 {
+       mutex_enter(&dn->dn_mtx);
+       dnode_rele_and_unlock(dn, tag);
+}
+
+void
+dnode_rele_and_unlock(dnode_t *dn, void *tag)
+{
        uint64_t refs;
        /* Get while the hold prevents the dnode from moving. */
        dmu_buf_impl_t *db = dn->dn_dbuf;
        dnode_handle_t *dnh = dn->dn_handle;
 
-       mutex_enter(&dn->dn_mtx);
        refs = refcount_remove(&dn->dn_holds, tag);
        mutex_exit(&dn->dn_mtx);
 

Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c       
Sat Oct  3 07:22:07 2015        (r288540)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c       
Sat Oct  3 07:22:24 2015        (r288541)
@@ -441,6 +441,12 @@ dnode_evict_dbufs(dnode_t *dn)
                ASSERT(pass < 100); /* sanity check */
        } while (progress);
 
+       dnode_evict_bonus(dn);
+}
+
+void
+dnode_evict_bonus(dnode_t *dn)
+{
        rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
        if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) {
                mutex_enter(&dn->dn_bonus->db_mtx);

Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h        
Sat Oct  3 07:22:07 2015        (r288540)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h        
Sat Oct  3 07:22:24 2015        (r288541)
@@ -279,6 +279,7 @@ int dnode_hold_impl(struct objset *dd, u
     void *ref, dnode_t **dnp);
 boolean_t dnode_add_ref(dnode_t *dn, void *ref);
 void dnode_rele(dnode_t *dn, void *ref);
+void dnode_rele_and_unlock(dnode_t *dn, void *tag);
 void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
 void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
 void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
@@ -300,6 +301,7 @@ void dnode_fini(void);
 int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off,
     int minlvl, uint64_t blkfill, uint64_t txg);
 void dnode_evict_dbufs(dnode_t *dn);
+void dnode_evict_bonus(dnode_t *dn);
 
 #ifdef ZFS_DEBUG
 
_______________________________________________
svn-src-stable-10@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-stable-10
To unsubscribe, send any mail to "svn-src-stable-10-unsubscr...@freebsd.org"

Reply via email to