On Sun, 7 May 2006, Konstantin Belousov wrote:

KB> > I'm running RELENG_6 with hand-merged sys/ufs/ffs changes from 1 to 7 May.

KB> It would be great to show the patchset. Also note that relevant changes are
KB> not limited to ufs/ffs. For instance, rev. 1.671 of kern/vfs_subr.c is
KB> also important, as well as rev. 1.82 of ufs/ufs/ufs_quota.c, that
KB> deals explicitely with combination of snapshots and quotas.

Actually I did (see attachment) ;-)

Sincerely,
D.Marck                                     [DM5020, MCK-RIPE, DM3-RIPN]
------------------------------------------------------------------------
*** Dmitry Morozovsky --- D.Marck --- Wild Woozle --- [EMAIL PROTECTED] ***
------------------------------------------------------------------------
Index: kern/vfs_subr.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/vfs_subr.c,v
retrieving revision 1.635.2.16.2.1
diff -u -r1.635.2.16.2.1 vfs_subr.c
--- kern/vfs_subr.c     4 May 2006 07:42:10 -0000       1.635.2.16.2.1
+++ kern/vfs_subr.c     7 May 2006 16:27:52 -0000
@@ -2839,7 +2839,6 @@
        struct vnode *vp, *mvp;
        struct vm_object *obj;
 
-       (void) vn_start_write(NULL, &mp, V_WAIT);
        MNT_ILOCK(mp);
        MNT_VNODE_FOREACH(vp, mp, mvp) {
                VI_LOCK(vp);
@@ -2870,7 +2869,6 @@
                        VI_UNLOCK(vp);
        }
        MNT_IUNLOCK(mp);
-       vn_finished_write(mp);
 }
 
 /*
Index: ufs/ffs/ffs_rawread.c
===================================================================
RCS file: /home/ncvs/src/sys/ufs/ffs/ffs_rawread.c,v
retrieving revision 1.25.2.2
diff -u -r1.25.2.2 ffs_rawread.c
--- ufs/ffs/ffs_rawread.c       9 Mar 2006 00:18:45 -0000       1.25.2.2
+++ ufs/ffs/ffs_rawread.c       7 May 2006 16:27:56 -0000
@@ -129,8 +129,16 @@
                        upgraded = 0;
                        
                
-               /* Attempt to msync mmap() regions to clean dirty mmap */ 
                VI_LOCK(vp);
+               /* Check if vnode was reclaimed while unlocked. */
+               if ((vp->v_iflag & VI_DOOMED) != 0) {
+                       VI_UNLOCK(vp);
+                       if (upgraded != 0)
+                               VOP_LOCK(vp, LK_DOWNGRADE, td);
+                       vn_finished_write(mp);
+                       return (EIO);
+               }
+               /* Attempt to msync mmap() regions to clean dirty mmap */ 
                if ((vp->v_iflag & VI_OBJDIRTY) != 0) {
                        VI_UNLOCK(vp);
                        if (vp->v_object != NULL) {
@@ -150,6 +158,7 @@
                        VI_UNLOCK(vp);
                        if (upgraded != 0)
                                VOP_LOCK(vp, LK_DOWNGRADE, td);
+                       vn_finished_write(mp);
                        return (error);
                }
                /* Flush dirty buffers */
@@ -159,6 +168,7 @@
                        if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0) {
                                if (upgraded != 0)
                                        VOP_LOCK(vp, LK_DOWNGRADE, td);
+                               vn_finished_write(mp);
                                return (error);
                        }
                        VI_LOCK(vp);
Index: ufs/ffs/ffs_snapshot.c
===================================================================
RCS file: /home/ncvs/src/sys/ufs/ffs/ffs_snapshot.c,v
retrieving revision 1.103.2.5
diff -u -r1.103.2.5 ffs_snapshot.c
--- ufs/ffs/ffs_snapshot.c      22 Mar 2006 17:42:31 -0000      1.103.2.5
+++ ufs/ffs/ffs_snapshot.c      7 May 2006 16:27:56 -0000
@@ -36,6 +36,8 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD: src/sys/ufs/ffs/ffs_snapshot.c,v 1.103.2.5 2006/03/22 
17:42:31 tegge Exp $");
 
+#include "opt_quota.h"
+
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
@@ -104,6 +106,7 @@
     struct fs *, ufs_lbn_t, int);
 static int readblock(struct vnode *vp, struct buf *, ufs2_daddr_t);
 static void process_deferred_inactive(struct mount *);
+static void try_free_snapdata(struct vnode *devvp, struct thread *td);
 
 /*
  * To ensure the consistency of snapshots across crashes, we must
@@ -306,6 +309,18 @@
                if (error)
                        goto out;
        }
+#ifdef QUOTA
+       /*
+        * Turn off disk quotas for snapshot file.
+        */
+       (void) chkdq(ip, -DIP(ip, i_blocks), KERNCRED, FORCE);
+       for (i = 0; i < MAXQUOTAS; i++) {
+               if (ip->i_dquot[i] != NODQUOT) {
+                       dqrele(vp, ip->i_dquot[i]);
+                       ip->i_dquot[i] = NODQUOT;
+               }
+       }
+#endif
        /*
         * Change inode to snapshot type file.
         */
@@ -348,6 +363,11 @@
                vn_start_write(NULL, &wrtmp, V_WAIT);
        }
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
+       if (ip->i_effnlink == 0) {
+               error = ENOENT;         /* Snapshot file unlinked */
+               sn = NULL;
+               goto out1;
+       }
        if (collectsnapstats)
                nanotime(&starttime);
        /*
@@ -551,7 +571,6 @@
        }
        lockmgr(vp->v_vnlock, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY,
            VI_MTX(vp), td);
-       transferlockers(&vp->v_lock, vp->v_vnlock);
        lockmgr(&vp->v_lock, LK_RELEASE, NULL, td);
        /*
         * If this is the first snapshot on this filesystem, then we need
@@ -707,12 +726,17 @@
         * the inode for this snapshot then a deadlock can occur. Drop
         * the snapshot lock until the buffer has been written.
         */
+       VREF(vp);       /* Protect against ffs_snapgone() */
        VOP_UNLOCK(vp, 0, td);
        (void) bread(ip->i_devvp,
                     fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
                     (int) fs->fs_bsize, NOCRED, &nbp);
        brelse(nbp);
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
+       if (ip->i_effnlink == 0)
+               error = ENOENT;         /* Snapshot file unlinked */
+       else
+               vrele(vp);              /* Drop extra reference */
 done:
        FREE(copy_fs->fs_csp, M_UFSMNT);
        bawrite(sbp);
@@ -1469,18 +1493,16 @@
 {
        struct inode *ip;
        struct vnode *devvp;
-       struct lock *lkp;
        struct buf *ibp;
        struct fs *fs;
        struct thread *td = curthread;
-       ufs2_daddr_t numblks, blkno, dblk, *snapblklist;
+       ufs2_daddr_t numblks, blkno, dblk;
        int error, loc, last;
        struct snapdata *sn;
 
        ip = VTOI(vp);
        fs = ip->i_fs;
        devvp = ip->i_devvp;
-       sn = devvp->v_rdev->si_snapdata;
        /*
         * If active, delete from incore list (this snapshot may
         * already have been in the process of being deleted, so
@@ -1488,29 +1510,23 @@
         *
         * Clear copy-on-write flag if last snapshot.
         */
+       VI_LOCK(devvp);
        if (ip->i_nextsnap.tqe_prev != 0) {
-               lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL, td);
-               VI_LOCK(devvp);
+               sn = devvp->v_rdev->si_snapdata;
                TAILQ_REMOVE(&sn->sn_head, ip, i_nextsnap);
                ip->i_nextsnap.tqe_prev = 0;
-               lkp = vp->v_vnlock;
+               VI_UNLOCK(devvp);
+               lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL, td);
+               VI_LOCK(vp);
+               KASSERT(vp->v_vnlock == &sn->sn_lock,
+                       ("ffs_snapremove: lost lock mutation")); 
                vp->v_vnlock = &vp->v_lock;
-               lockmgr(lkp, LK_RELEASE, NULL, td);
-               if (TAILQ_FIRST(&sn->sn_head) != 0) {
-                       VI_UNLOCK(devvp);
-               } else {
-                       snapblklist = sn->sn_blklist;
-                       sn->sn_blklist = 0;
-                       sn->sn_listsize = 0;
-                       devvp->v_rdev->si_snapdata = NULL;
-                       devvp->v_vflag &= ~VV_COPYONWRITE;
-                       lockmgr(lkp, LK_DRAIN|LK_INTERLOCK, VI_MTX(devvp), td);
-                       lockmgr(lkp, LK_RELEASE, NULL, td);
-                       lockdestroy(lkp);
-                       free(sn, M_UFSMNT);
-                       FREE(snapblklist, M_UFSMNT);
-               }
-       }
+               VI_UNLOCK(vp);
+               VI_LOCK(devvp);
+               lockmgr(&sn->sn_lock, LK_RELEASE, NULL, td);
+               try_free_snapdata(devvp, td);
+       } else
+               VI_UNLOCK(devvp);
        /*
         * Clear all BLK_NOCOPY fields. Pass any block claims to other
         * snapshots that want them (see ffs_snapblkfree below).
@@ -1575,6 +1591,13 @@
        ip->i_flags &= ~SF_SNAPSHOT;
        DIP_SET(ip, i_flags, ip->i_flags);
        ip->i_flag |= IN_CHANGE | IN_UPDATE;
+#ifdef QUOTA
+       /*
+        * Reenable disk quotas for ex-snapshot file.
+        */
+       if (!getinoquota(ip))
+               (void) chkdq(ip, DIP(ip, i_blocks), KERNCRED, FORCE);
+#endif
 }
 
 /*
@@ -1866,7 +1889,6 @@
                }
                lockmgr(vp->v_vnlock, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY,
                    VI_MTX(vp), td);
-               transferlockers(&vp->v_lock, vp->v_vnlock);
                lockmgr(&vp->v_lock, LK_RELEASE, NULL, td);
                /*
                 * Link it onto the active snapshot list.
@@ -1939,30 +1961,35 @@
        struct snapdata *sn;
        struct inode *xp;
        struct vnode *vp;
+       struct thread *td = curthread;
 
-       sn = devvp->v_rdev->si_snapdata;
        VI_LOCK(devvp);
-       while ((xp = TAILQ_FIRST(&sn->sn_head)) != 0) {
+       sn = devvp->v_rdev->si_snapdata;
+       while (sn != NULL && (xp = TAILQ_FIRST(&sn->sn_head)) != NULL) {
                vp = ITOV(xp);
-               vp->v_vnlock = &vp->v_lock;
                TAILQ_REMOVE(&sn->sn_head, xp, i_nextsnap);
                xp->i_nextsnap.tqe_prev = 0;
-               if (xp->i_effnlink > 0) {
-                       VI_UNLOCK(devvp);
+               lockmgr(&sn->sn_lock, 
+                       LK_INTERLOCK | LK_EXCLUSIVE,
+                       VI_MTX(devvp),
+                       td);
+               VI_LOCK(vp);
+               lockmgr(&vp->v_lock,
+                       LK_INTERLOCK | LK_EXCLUSIVE,
+                       VI_MTX(vp), td);
+               VI_LOCK(vp);
+               KASSERT(vp->v_vnlock == &sn->sn_lock,
+               ("ffs_snapshot_unmount: lost lock mutation")); 
+               vp->v_vnlock = &vp->v_lock;
+               VI_UNLOCK(vp);
+               lockmgr(&vp->v_lock, LK_RELEASE, NULL, td);
+               lockmgr(&sn->sn_lock, LK_RELEASE, NULL, td);
+               if (xp->i_effnlink > 0)
                        vrele(vp);
-                       VI_LOCK(devvp);
-               }
-       }
-       devvp->v_rdev->si_snapdata = NULL;
-       devvp->v_vflag &= ~VV_COPYONWRITE;
-       VI_UNLOCK(devvp);
-       if (sn->sn_blklist != NULL) {
-               FREE(sn->sn_blklist, M_UFSMNT);
-               sn->sn_blklist = NULL;
-               sn->sn_listsize = 0;
+               VI_LOCK(devvp);
+               sn = devvp->v_rdev->si_snapdata;
        }
-       lockdestroy(&sn->sn_lock);
-       free(sn, M_UFSMNT);
+       try_free_snapdata(devvp, td);
        ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_unmount");
 }
 
@@ -1984,6 +2011,7 @@
        ufs2_daddr_t lbn, blkno, *snapblklist;
        int lower, upper, mid, indiroff, error = 0;
        int launched_async_io, prev_norunningbuf;
+       long saved_runningbufspace;
 
        if ((VTOI(bp->b_vp)->i_flags & SF_SNAPSHOT) != 0)
                return (0);             /* Update on a snapshot file */
@@ -2026,7 +2054,9 @@
         * for a long time waiting on snaplk, back it out of
         * runningbufspace, possibly waking other threads waiting for space.
         */
-       runningbufwakeup(bp);
+       saved_runningbufspace = bp->b_runningbufspace;
+       if (saved_runningbufspace != 0)
+               runningbufwakeup(bp);
        /*
         * Not in the precomputed list, so check the snapshots.
         */
@@ -2038,9 +2068,11 @@
                if (sn == NULL ||
                    TAILQ_FIRST(&sn->sn_head) == NULL) {
                        VI_UNLOCK(devvp);
-                       if (bp->b_runningbufspace)
+                       if (saved_runningbufspace != 0) {
+                               bp->b_runningbufspace = saved_runningbufspace;
                                atomic_add_int(&runningbufspace,
                                               bp->b_runningbufspace);
+                       }
                        return (0);             /* Snapshot gone */
                }
        }
@@ -2161,8 +2193,10 @@
        /*
         * I/O on bp will now be started, so count it in runningbufspace.
         */
-       if (bp->b_runningbufspace)
+       if (saved_runningbufspace != 0) {
+               bp->b_runningbufspace = saved_runningbufspace;
                atomic_add_int(&runningbufspace, bp->b_runningbufspace);
+       }
        return (error);
 }
 
@@ -2184,13 +2218,10 @@
        bip->bio_offset = dbtob(fsbtodb(ip->i_fs, blkstofrags(ip->i_fs, lbn)));
        bip->bio_data = bp->b_data;
        bip->bio_length = bp->b_bcount;
+       bip->bio_done = NULL;
 
        g_io_request(bip, ip->i_devvp->v_bufobj.bo_private);
-
-       do 
-               msleep(bip, NULL, PRIBIO, "snaprdb", hz/10);
-       while (!(bip->bio_flags & BIO_DONE));
-       bp->b_error = bip->bio_error;
+       bp->b_error = biowait(bip, "snaprdb");
        g_destroy_bio(bip);
        return (bp->b_error);
 }
@@ -2259,3 +2290,32 @@
        MNT_IUNLOCK(mp);
        vn_finished_secondary_write(mp);
 }
+
+/* Try to free snapdata associated with devvp */
+static void
+try_free_snapdata(struct vnode *devvp,
+                 struct thread *td)
+{
+       struct snapdata *sn;
+       ufs2_daddr_t *snapblklist;
+
+       sn = devvp->v_rdev->si_snapdata;
+
+       if (sn == NULL || TAILQ_FIRST(&sn->sn_head) != NULL ||
+           (devvp->v_vflag & VV_COPYONWRITE) == 0) {
+               VI_UNLOCK(devvp);
+               return;
+       }
+
+       devvp->v_rdev->si_snapdata = NULL;
+       devvp->v_vflag &= ~VV_COPYONWRITE;
+       snapblklist = sn->sn_blklist;
+       sn->sn_blklist = NULL;
+       sn->sn_listsize = 0;
+       lockmgr(&sn->sn_lock, LK_DRAIN|LK_INTERLOCK, VI_MTX(devvp), td);
+       lockmgr(&sn->sn_lock, LK_RELEASE, NULL, td);
+       lockdestroy(&sn->sn_lock);
+       free(sn, M_UFSMNT);
+       if (snapblklist != NULL)
+               FREE(snapblklist, M_UFSMNT);
+}
Index: ufs/ffs/ffs_vfsops.c
===================================================================
RCS file: /home/ncvs/src/sys/ufs/ffs/ffs_vfsops.c,v
retrieving revision 1.290.2.9
diff -u -r1.290.2.9 ffs_vfsops.c
--- ufs/ffs/ffs_vfsops.c        22 Mar 2006 17:54:50 -0000      1.290.2.9
+++ ufs/ffs/ffs_vfsops.c        7 May 2006 16:27:56 -0000
@@ -1728,6 +1728,7 @@
                if ((vp->v_vflag & VV_COPYONWRITE) &&
                    vp->v_rdev->si_snapdata != NULL) {
                        if ((bp->b_flags & B_CLUSTER) != 0) {
+                               runningbufwakeup(bp);
                                TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
                                              b_cluster.cluster_entry) {
                                        error = ffs_copyonwrite(vp, tbp);
@@ -1739,6 +1740,9 @@
                                                return;
                                        }
                                }
+                               bp->b_runningbufspace = bp->b_bufsize;
+                               atomic_add_int(&runningbufspace,
+                                              bp->b_runningbufspace);
                        } else {
                                error = ffs_copyonwrite(vp, bp);
                                if (error != 0 && error != EOPNOTSUPP) {
Index: ufs/ffs/ffs_vnops.c
===================================================================
RCS file: /home/ncvs/src/sys/ufs/ffs/ffs_vnops.c,v
retrieving revision 1.157.2.1
diff -u -r1.157.2.1 ffs_vnops.c
--- ufs/ffs/ffs_vnops.c 29 Oct 2005 06:43:55 -0000      1.157.2.1
+++ ufs/ffs/ffs_vnops.c 7 May 2006 16:27:56 -0000
@@ -338,7 +338,54 @@
                struct thread *a_td;
        } */ *ap;
 {
+#ifndef NO_FFS_SNAPSHOT
+       struct vnode *vp;
+       int flags;
+       struct lock *lkp;
+       int result;
+       
+       switch (ap->a_flags & LK_TYPE_MASK) {
+       case LK_SHARED:
+       case LK_UPGRADE:
+       case LK_EXCLUSIVE:
+               vp = ap->a_vp;
+               flags = ap->a_flags;
+               for (;;) {
+                       /*
+                        * vnode interlock must be held to ensure that
+                        * the possibly external lock isn't freed,
+                        * e.g. when mutating from snapshot file vnode
+                        * to regular file vnode.
+                        */
+                       if ((flags & LK_INTERLOCK) == 0) {
+                               VI_LOCK(vp);
+                               flags |= LK_INTERLOCK;
+                       }
+                       lkp = vp->v_vnlock;
+                       result = lockmgr(lkp, flags, VI_MTX(vp), ap->a_td);
+                       if (lkp == vp->v_vnlock || result != 0)
+                               break;
+                       /*
+                        * Apparent success, except that the vnode
+                        * mutated between snapshot file vnode and
+                        * regular file vnode while this process
+                        * slept.  The lock currently held is not the
+                        * right lock.  Release it, and try to get the
+                        * new lock.
+                        */
+                       (void) lockmgr(lkp, LK_RELEASE, VI_MTX(vp), ap->a_td);
+                       if ((flags & LK_TYPE_MASK) == LK_UPGRADE)
+                               flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE;
+                       flags &= ~LK_INTERLOCK;
+               }
+               break;
+       default:
+               result = VOP_LOCK_APV(&ufs_vnodeops, ap);
+       }
+       return (result);
+#else
        return (VOP_LOCK_APV(&ufs_vnodeops, ap));
+#endif
 }
 
 /*
Index: ufs/ufs/ufs_quota.c
===================================================================
RCS file: /home/ncvs/src/sys/ufs/ufs/ufs_quota.c,v
retrieving revision 1.74.2.2.2.1
diff -u -r1.74.2.2.2.1 ufs_quota.c
--- ufs/ufs/ufs_quota.c 26 Apr 2006 01:23:59 -0000      1.74.2.2.2.1
+++ ufs/ufs/ufs_quota.c 7 May 2006 16:27:56 -0000
@@ -35,6 +35,8 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_ffs.h"
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/fcntl.h>
@@ -46,6 +48,7 @@
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
+#include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 
@@ -97,6 +100,13 @@
        struct vnode *vp = ITOV(ip);
        int error;
 
+#ifndef NO_FFS_SNAPSHOT
+       /*
+        * Disk quotas must be turned off for snapshot files.
+        */
+       if ((ip->i_flags & SF_SNAPSHOT) != 0)
+               return (0);
+#endif
        ump = VFSTOUFS(vp->v_mount);
        /*
         * Set up the user quota based on file uid.
@@ -375,6 +385,13 @@
        struct ufsmount *ump = VFSTOUFS(ITOV(ip)->v_mount);
        int i;
 
+#ifndef NO_FFS_SNAPSHOT
+       /*
+        * Disk quotas must be turned off for snapshot files.
+        */
+       if ((ip->i_flags & SF_SNAPSHOT) != 0)
+               return;
+#endif
        for (i = 0; i < MAXQUOTAS; i++) {
                if (ump->um_quotas[i] == NULLVP ||
                    (ump->um_qflags[i] & (QTF_OPENING|QTF_CLOSING)))
_______________________________________________
freebsd-stable@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-stable
To unsubscribe, send any mail to "[EMAIL PROTECTED]"

Reply via email to