Author: delphij
Date: Tue Jul 30 20:37:32 2013
New Revision: 253816
URL: http://svnweb.freebsd.org/changeset/base/253816

Log:
  MFV r253780:
  
  To quote Illumos #3875:
  
  The problem here is that if we ever end up in the error
  path, we drop the locks protecting access to the zfsvfs_t
  prior to forcibly unmounting the filesystem. Because z_os
  is NULL, any thread that had already picked up the zfsvfs_t
  and was sitting in ZFS_ENTER() when we dropped our locks
  in zfs_resume_fs() will now acquire the lock, attempt to
  use z_os, and panic.
  
  Illumos ZFS issues:
    3875 panic in zfs_root() after failed rollback
  
  MFC after:    2 weeks

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
Directory Properties:
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c    Tue Jul 
30 19:24:05 2013        (r253815)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c    Tue Jul 
30 20:37:32 2013        (r253816)
@@ -517,6 +517,38 @@ dmu_objset_rele(objset_t *os, void *tag)
        dsl_pool_rele(dp, tag);
 }
 
+/*
+ * When we are called, os MUST refer to an objset associated with a dataset
+ * that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner
+ * == tag.  We will then release and reacquire ownership of the dataset while
+ * holding the pool config_rwlock to avoid intervening namespace or ownership
+ * changes may occur.
+ *
+ * This exists solely to accommodate zfs_ioc_userspace_upgrade()'s desire to
+ * release the hold on its dataset and acquire a new one on the dataset of the
+ * same name so that it can be partially torn down and reconstructed.
+ */
+void
+dmu_objset_refresh_ownership(objset_t *os, void *tag)
+{
+       dsl_pool_t *dp;
+       dsl_dataset_t *ds, *newds;
+       char name[MAXNAMELEN];
+
+       ds = os->os_dsl_dataset;
+       VERIFY3P(ds, !=, NULL);
+       VERIFY3P(ds->ds_owner, ==, tag);
+       VERIFY(dsl_dataset_long_held(ds));
+
+       dsl_dataset_name(ds, name);
+       dp = dmu_objset_pool(os);
+       dsl_pool_config_enter(dp, FTAG);
+       dmu_objset_disown(os, tag);
+       VERIFY0(dsl_dataset_own(dp, name, tag, &newds));
+       VERIFY3P(newds, ==, os->os_dsl_dataset);
+       dsl_pool_config_exit(dp, FTAG);
+}
+
 void
 dmu_objset_disown(objset_t *os, void *tag)
 {

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c      Tue Jul 
30 19:24:05 2013        (r253815)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c      Tue Jul 
30 20:37:32 2013        (r253816)
@@ -1602,7 +1602,7 @@ dmu_recv_end_check(void *arg, dmu_tx_t *
                if (error != 0)
                        return (error);
                error = dsl_dataset_clone_swap_check_impl(drc->drc_ds,
-                   origin_head, drc->drc_force);
+                   origin_head, drc->drc_force, drc->drc_owner, tx);
                if (error != 0) {
                        dsl_dataset_rele(origin_head, FTAG);
                        return (error);
@@ -1654,6 +1654,9 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *t
 
                dsl_dataset_rele(origin_head, FTAG);
                dsl_destroy_head_sync_impl(drc->drc_ds, tx);
+
+               if (drc->drc_owner != NULL)
+                       VERIFY3P(origin_head->ds_owner, ==, drc->drc_owner);
        } else {
                dsl_dataset_t *ds = drc->drc_ds;
 
@@ -1752,8 +1755,10 @@ dmu_recv_new_end(dmu_recv_cookie_t *drc)
 }
 
 int
-dmu_recv_end(dmu_recv_cookie_t *drc)
+dmu_recv_end(dmu_recv_cookie_t *drc, void *owner)
 {
+       drc->drc_owner = owner;
+
        if (drc->drc_newfs)
                return (dmu_recv_new_end(drc));
        else

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c   Tue Jul 
30 19:24:05 2013        (r253815)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c   Tue Jul 
30 20:37:32 2013        (r253816)
@@ -1755,16 +1755,52 @@ dsl_dataset_rename_snapshot(const char *
            dsl_dataset_rename_snapshot_sync, &ddrsa, 1));
 }
 
+/*
+ * If we're doing an ownership handoff, we need to make sure that there is
+ * only one long hold on the dataset.  We're not allowed to change anything 
here
+ * so we don't permanently release the long hold or regular hold here.  We want
+ * to do this only when syncing to avoid the dataset unexpectedly going away
+ * when we release the long hold.
+ */
+static int
+dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)
+{
+       boolean_t held;
+
+       if (!dmu_tx_is_syncing(tx))
+               return (0);
+
+       if (owner != NULL) {
+               VERIFY3P(ds->ds_owner, ==, owner);
+               dsl_dataset_long_rele(ds, owner);
+       }
+
+       held = dsl_dataset_long_held(ds);
+
+       if (owner != NULL)
+               dsl_dataset_long_hold(ds, owner);
+
+       if (held)
+               return (SET_ERROR(EBUSY));
+
+       return (0);
+}
+
+typedef struct dsl_dataset_rollback_arg {
+       const char *ddra_fsname;
+       void *ddra_owner;
+} dsl_dataset_rollback_arg_t;
+
 static int
 dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
 {
-       const char *fsname = arg;
+       dsl_dataset_rollback_arg_t *ddra = arg;
        dsl_pool_t *dp = dmu_tx_pool(tx);
        dsl_dataset_t *ds;
        int64_t unused_refres_delta;
        int error;
 
-       error = dsl_dataset_hold(dp, fsname, FTAG, &ds);
+       error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds);
        if (error != 0)
                return (error);
 
@@ -1780,9 +1816,10 @@ dsl_dataset_rollback_check(void *arg, dm
                return (SET_ERROR(EINVAL));
        }
 
-       if (dsl_dataset_long_held(ds)) {
+       error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx);
+       if (error != 0) {
                dsl_dataset_rele(ds, FTAG);
-               return (SET_ERROR(EBUSY));
+               return (error);
        }
 
        /*
@@ -1819,12 +1856,12 @@ dsl_dataset_rollback_check(void *arg, dm
 static void
 dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx)
 {
-       const char *fsname = arg;
+       dsl_dataset_rollback_arg_t *ddra = arg;
        dsl_pool_t *dp = dmu_tx_pool(tx);
        dsl_dataset_t *ds, *clone;
        uint64_t cloneobj;
 
-       VERIFY0(dsl_dataset_hold(dp, fsname, FTAG, &ds));
+       VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds));
 
        cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback",
            ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx);
@@ -1840,11 +1877,26 @@ dsl_dataset_rollback_sync(void *arg, dmu
        dsl_dataset_rele(ds, FTAG);
 }
 
+/*
+ * If owner != NULL:
+ *
+ * - The existing dataset MUST be owned by the specified owner at entry
+ * - Upon return, dataset will still be held by the same owner, whether we
+ *   succeed or not.
+ *
+ * This mode is required any time the existing filesystem is mounted.  See
+ * notes above zfs_suspend_fs() for further details.
+ */
 int
-dsl_dataset_rollback(const char *fsname)
+dsl_dataset_rollback(const char *fsname, void *owner)
 {
+       dsl_dataset_rollback_arg_t ddra;
+
+       ddra.ddra_fsname = fsname;
+       ddra.ddra_owner = owner;
+
        return (dsl_sync_task(fsname, dsl_dataset_rollback_check,
-           dsl_dataset_rollback_sync, (void *)fsname, 1));
+           dsl_dataset_rollback_sync, (void *)&ddra, 1));
 }
 
 struct promotenode {
@@ -2362,7 +2414,7 @@ dsl_dataset_promote(const char *name, ch
 
 int
 dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
-    dsl_dataset_t *origin_head, boolean_t force)
+    dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx)
 {
        int64_t unused_refres_delta;
 
@@ -2391,7 +2443,7 @@ dsl_dataset_clone_swap_check_impl(dsl_da
                return (SET_ERROR(ETXTBSY));
 
        /* origin_head should have no long holds (e.g. is not mounted) */
-       if (dsl_dataset_long_held(origin_head))
+       if (dsl_dataset_handoff_check(origin_head, owner, tx))
                return (SET_ERROR(EBUSY));
 
        /* check amount of any unconsumed refreservation */

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h        
Tue Jul 30 19:24:05 2013        (r253815)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h        
Tue Jul 30 20:37:32 2013        (r253816)
@@ -136,6 +136,7 @@ struct objset {
 int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
 int dmu_objset_own(const char *name, dmu_objset_type_t type,
     boolean_t readonly, void *tag, objset_t **osp);
+void dmu_objset_refresh_ownership(objset_t *os, void *tag);
 void dmu_objset_rele(objset_t *os, void *tag);
 void dmu_objset_disown(objset_t *os, void *tag);
 int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h  Tue Jul 
30 19:24:05 2013        (r253815)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h  Tue Jul 
30 20:37:32 2013        (r253816)
@@ -62,6 +62,7 @@ typedef struct dmu_recv_cookie {
        struct avl_tree *drc_guid_to_ds_map;
        zio_cksum_t drc_cksum;
        uint64_t drc_newsnapobj;
+       void *drc_owner;
 } dmu_recv_cookie_t;
 
 int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
@@ -72,6 +73,6 @@ int dmu_recv_stream(dmu_recv_cookie_t *d
 int dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp,
 #endif
     int cleanup_fd, uint64_t *action_handlep);
-int dmu_recv_end(dmu_recv_cookie_t *drc);
+int dmu_recv_end(dmu_recv_cookie_t *drc, void *owner);
 
 #endif /* _DMU_SEND_H */

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h       
Tue Jul 30 19:24:05 2013        (r253815)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h       
Tue Jul 30 20:37:32 2013        (r253816)
@@ -247,7 +247,7 @@ void dsl_dataset_long_rele(dsl_dataset_t
 boolean_t dsl_dataset_long_held(dsl_dataset_t *ds);
 
 int dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
-    dsl_dataset_t *origin_head, boolean_t force);
+    dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx);
 void dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
     dsl_dataset_t *origin_head, dmu_tx_t *tx);
 int dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
@@ -264,7 +264,7 @@ int dsl_dataset_snap_lookup(dsl_dataset_
 int dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx);
 void dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds,
     zprop_source_t source, uint64_t value, dmu_tx_t *tx);
-int dsl_dataset_rollback(const char *fsname);
+int dsl_dataset_rollback(const char *fsname, void *owner);
 
 #ifdef ZFS_DEBUG
 #define        dprintf_ds(ds, fmt, ...) do { \

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c     Tue Jul 
30 19:24:05 2013        (r253815)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c     Tue Jul 
30 20:37:32 2013        (r253816)
@@ -3566,13 +3566,13 @@ zfs_ioc_rollback(zfs_cmd_t *zc)
                if (error == 0) {
                        int resume_err;
 
-                       error = dsl_dataset_rollback(zc->zc_name);
+                       error = dsl_dataset_rollback(zc->zc_name, zfsvfs);
                        resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
                        error = error ? error : resume_err;
                }
                VFS_RELE(zfsvfs->z_vfs);
        } else {
-               error = dsl_dataset_rollback(zc->zc_name);
+               error = dsl_dataset_rollback(zc->zc_name, NULL);
        }
        return (error);
 }
@@ -4101,13 +4101,13 @@ zfs_ioc_recv(zfs_cmd_t *zc)
                         * If the suspend fails, then the recv_end will
                         * likely also fail, and clean up after itself.
                         */
-                       end_err = dmu_recv_end(&drc);
+                       end_err = dmu_recv_end(&drc, zfsvfs);
                        if (error == 0)
                                error = zfs_resume_fs(zfsvfs, tofs);
                        error = error ? error : end_err;
                        VFS_RELE(zfsvfs->z_vfs);
                } else {
-                       error = dmu_recv_end(&drc);
+                       error = dmu_recv_end(&drc, NULL);
                }
        }
 
@@ -4598,8 +4598,11 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
                         * objset_phys_t).  Suspend/resume the fs will do that.
                         */
                        error = zfs_suspend_fs(zfsvfs);
-                       if (error == 0)
+                       if (error == 0) {
+                               dmu_objset_refresh_ownership(zfsvfs->z_os,
+                                   zfsvfs);
                                error = zfs_resume_fs(zfsvfs, zc->zc_name);
+                       }
                }
                if (error == 0)
                        error = dmu_objset_userspace_upgrade(zfsvfs->z_os);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c    Tue Jul 
30 19:24:05 2013        (r253815)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c    Tue Jul 
30 20:37:32 2013        (r253816)
@@ -2222,7 +2222,9 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int
  * Block out VOPs and close zfsvfs_t::z_os
  *
  * Note, if successful, then we return with the 'z_teardown_lock' and
- * 'z_teardown_inactive_lock' write held.
+ * 'z_teardown_inactive_lock' write held.  We leave ownership of the underlying
+ * dataset and objset intact so that they can be atomically handed off during
+ * a subsequent rollback or recv operation and the resume thereafter.
  */
 int
 zfs_suspend_fs(zfsvfs_t *zfsvfs)
@@ -2231,71 +2233,76 @@ zfs_suspend_fs(zfsvfs_t *zfsvfs)
 
        if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
                return (error);
-       dmu_objset_disown(zfsvfs->z_os, zfsvfs);
 
        return (0);
 }
 
 /*
- * Reopen zfsvfs_t::z_os and release VOPs.
+ * Rebuild SA and release VOPs.  Note that ownership of the underlying dataset
+ * is an invariant across any of the operations that can be performed while the
+ * filesystem was suspended.  Whether it succeeded or failed, the preconditions
+ * are the same: the relevant objset and associated dataset are owned by
+ * zfsvfs, held, and long held on entry.
  */
 int
 zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
 {
        int err;
+       znode_t *zp;
+       uint64_t sa_obj = 0;
 
        ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
        ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
 
-       err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs,
-           &zfsvfs->z_os);
-       if (err) {
-               zfsvfs->z_os = NULL;
-       } else {
-               znode_t *zp;
-               uint64_t sa_obj = 0;
+       /*
+        * We already own this, so just hold and rele it to update the
+        * objset_t, as the one we had before may have been evicted.
+        */
+       VERIFY0(dmu_objset_hold(osname, zfsvfs, &zfsvfs->z_os));
+       VERIFY3P(zfsvfs->z_os->os_dsl_dataset->ds_owner, ==, zfsvfs);
+       VERIFY(dsl_dataset_long_held(zfsvfs->z_os->os_dsl_dataset));
+       dmu_objset_rele(zfsvfs->z_os, zfsvfs);
 
-               /*
-                * Make sure version hasn't changed
-                */
+       /*
+        * Make sure version hasn't changed
+        */
 
-               err = zfs_get_zplprop(zfsvfs->z_os, ZFS_PROP_VERSION,
-                   &zfsvfs->z_version);
+       err = zfs_get_zplprop(zfsvfs->z_os, ZFS_PROP_VERSION,
+           &zfsvfs->z_version);
 
-               if (err)
-                       goto bail;
+       if (err)
+               goto bail;
 
-               err = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
-                   ZFS_SA_ATTRS, 8, 1, &sa_obj);
+       err = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
+           ZFS_SA_ATTRS, 8, 1, &sa_obj);
 
-               if (err && zfsvfs->z_version >= ZPL_VERSION_SA)
-                       goto bail;
+       if (err && zfsvfs->z_version >= ZPL_VERSION_SA)
+               goto bail;
 
-               if ((err = sa_setup(zfsvfs->z_os, sa_obj,
-                   zfs_attr_table,  ZPL_END, &zfsvfs->z_attr_table)) != 0)
-                       goto bail;
+       if ((err = sa_setup(zfsvfs->z_os, sa_obj,
+           zfs_attr_table,  ZPL_END, &zfsvfs->z_attr_table)) != 0)
+               goto bail;
 
-               if (zfsvfs->z_version >= ZPL_VERSION_SA)
-                       sa_register_update_callback(zfsvfs->z_os,
-                           zfs_sa_upgrade);
+       if (zfsvfs->z_version >= ZPL_VERSION_SA)
+               sa_register_update_callback(zfsvfs->z_os,
+                   zfs_sa_upgrade);
 
-               VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
+       VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
 
-               zfs_set_fuid_feature(zfsvfs);
+       zfs_set_fuid_feature(zfsvfs);
 
-               /*
-                * Attempt to re-establish all the active znodes with
-                * their dbufs.  If a zfs_rezget() fails, then we'll let
-                * any potential callers discover that via ZFS_ENTER_VERIFY_VP
-                * when they try to use their znode.
-                */
-               mutex_enter(&zfsvfs->z_znodes_lock);
-               for (zp = list_head(&zfsvfs->z_all_znodes); zp;
-                   zp = list_next(&zfsvfs->z_all_znodes, zp)) {
-                       (void) zfs_rezget(zp);
-               }
-               mutex_exit(&zfsvfs->z_znodes_lock);
+       /*
+        * Attempt to re-establish all the active znodes with
+        * their dbufs.  If a zfs_rezget() fails, then we'll let
+        * any potential callers discover that via ZFS_ENTER_VERIFY_VP
+        * when they try to use their znode.
+        */
+       mutex_enter(&zfsvfs->z_znodes_lock);
+       for (zp = list_head(&zfsvfs->z_all_znodes); zp;
+           zp = list_next(&zfsvfs->z_all_znodes, zp)) {
+               (void) zfs_rezget(zp);
        }
+       mutex_exit(&zfsvfs->z_znodes_lock);
 
 bail:
        /* release the VOPs */
@@ -2304,8 +2311,8 @@ bail:
 
        if (err) {
                /*
-                * Since we couldn't reopen zfsvfs::z_os, or
-                * setup the sa framework force unmount this file system.
+                * Since we couldn't setup the sa framework, try to force
+                * unmount this file system.
                 */
                if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0)
                        (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread);
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to