Author: mckusick
Date: Sun Jun 12 19:27:05 2011
New Revision: 223020
URL: http://svn.freebsd.org/changeset/base/223020

Log:
  Update to soft updates journaling to properly track freed blocks
  that get claimed by snapshots.
  
  Submitted by: Jeff Roberson
  Tested by:    Peter Holm

Modified:
  head/sys/ufs/ffs/ffs_alloc.c
  head/sys/ufs/ffs/ffs_extern.h
  head/sys/ufs/ffs/ffs_snapshot.c
  head/sys/ufs/ffs/ffs_softdep.c
  head/sys/ufs/ufs/ufs_vnops.c

Modified: head/sys/ufs/ffs/ffs_alloc.c
==============================================================================
--- head/sys/ufs/ffs/ffs_alloc.c        Sun Jun 12 18:52:39 2011        
(r223019)
+++ head/sys/ufs/ffs/ffs_alloc.c        Sun Jun 12 19:27:05 2011        
(r223020)
@@ -2035,7 +2035,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, i
         */
        if (devvp->v_type != VREG &&
            (devvp->v_vflag & VV_COPYONWRITE) &&
-           ffs_snapblkfree(fs, devvp, bno, size, inum)) {
+           ffs_snapblkfree(fs, devvp, bno, size, inum, dephd)) {
                return;
        }
        if (!ump->um_candelete) {

Modified: head/sys/ufs/ffs/ffs_extern.h
==============================================================================
--- head/sys/ufs/ffs/ffs_extern.h       Sun Jun 12 18:52:39 2011        
(r223019)
+++ head/sys/ufs/ffs/ffs_extern.h       Sun Jun 12 19:27:05 2011        
(r223020)
@@ -80,12 +80,14 @@ int ffs_realloccg(struct inode *, ufs2_d
            ufs2_daddr_t, int, int, int, struct ucred *, struct buf **);
 int    ffs_sbupdate(struct ufsmount *, int, int);
 void   ffs_setblock(struct fs *, u_char *, ufs1_daddr_t);
-int    ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t);
+int    ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t,
+           struct workhead *);
 void   ffs_snapremove(struct vnode *vp);
 int    ffs_snapshot(struct mount *mp, char *snapfile);
 void   ffs_snapshot_mount(struct mount *mp);
 void   ffs_snapshot_unmount(struct mount *mp);
 void   process_deferred_inactive(struct mount *mp);
+void   ffs_sync_snap(struct mount *, int);
 int    ffs_syncvnode(struct vnode *vp, int waitfor);
 int    ffs_truncate(struct vnode *, off_t, int, struct ucred *, struct thread 
*);
 int    ffs_update(struct vnode *, int);
@@ -149,6 +151,9 @@ int softdep_prealloc(struct vnode *, int
 int    softdep_journal_lookup(struct mount *, struct vnode **);
 void   softdep_journal_freeblocks(struct inode *, struct ucred *, off_t, int);
 void   softdep_journal_fsync(struct inode *);
+void   softdep_buf_append(struct buf *, struct workhead *);
+void   softdep_inode_append(struct inode *, struct ucred *, struct workhead *);
+void   softdep_freework(struct workhead *);
 
 
 /*
@@ -161,4 +166,14 @@ void       softdep_journal_fsync(struct inode 
 
 int    ffs_rdonly(struct inode *);
 
+TAILQ_HEAD(snaphead, inode);
+
+struct snapdata {
+       LIST_ENTRY(snapdata) sn_link;
+       struct snaphead sn_head;
+       daddr_t sn_listsize;
+       daddr_t *sn_blklist;
+       struct lock sn_lock;
+};
+
 #endif /* !_UFS_FFS_EXTERN_H */

Modified: head/sys/ufs/ffs/ffs_snapshot.c
==============================================================================
--- head/sys/ufs/ffs/ffs_snapshot.c     Sun Jun 12 18:52:39 2011        
(r223019)
+++ head/sys/ufs/ffs/ffs_snapshot.c     Sun Jun 12 19:27:05 2011        
(r223020)
@@ -81,12 +81,13 @@ ffs_snapshot(mp, snapfile)
 }
 
 int
-ffs_snapblkfree(fs, devvp, bno, size, inum)
+ffs_snapblkfree(fs, devvp, bno, size, inum, wkhd)
        struct fs *fs;
        struct vnode *devvp;
        ufs2_daddr_t bno;
        long size;
        ino_t inum;
+       struct workhead *wkhd;
 {
        return (EINVAL);
 }
@@ -123,19 +124,16 @@ ffs_copyonwrite(devvp, bp)
        return (EINVAL);
 }
 
+void
+ffs_sync_snap(mp, waitfor)
+       struct mount *mp;
+       int waitfor;
+{
+}
+
 #else
 FEATURE(ffs_snapshot, "FFS snapshot support");
 
-TAILQ_HEAD(snaphead, inode);
-
-struct snapdata {
-       LIST_ENTRY(snapdata) sn_link;
-       struct snaphead sn_head;
-       daddr_t sn_listsize;
-       daddr_t *sn_blklist;
-       struct lock sn_lock;
-};
-
 LIST_HEAD(, snapdata) snapfree;
 static struct mtx snapfree_lock;
 MTX_SYSINIT(ffs_snapfree, &snapfree_lock, "snapdata free list", MTX_DEF);
@@ -1635,7 +1633,7 @@ ffs_snapremove(vp)
                        DIP_SET(ip, i_db[blkno], 0);
                else if ((dblk == blkstofrags(fs, blkno) &&
                     ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize,
-                    ip->i_number))) {
+                    ip->i_number, NULL))) {
                        DIP_SET(ip, i_blocks, DIP(ip, i_blocks) -
                            btodb(fs->fs_bsize));
                        DIP_SET(ip, i_db[blkno], 0);
@@ -1660,7 +1658,7 @@ ffs_snapremove(vp)
                                        ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0;
                                else if ((dblk == blkstofrags(fs, blkno) &&
                                     ffs_snapblkfree(fs, ip->i_devvp, dblk,
-                                    fs->fs_bsize, ip->i_number))) {
+                                    fs->fs_bsize, ip->i_number, NULL))) {
                                        ip->i_din1->di_blocks -=
                                            btodb(fs->fs_bsize);
                                        ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0;
@@ -1674,7 +1672,7 @@ ffs_snapremove(vp)
                                ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0;
                        else if ((dblk == blkstofrags(fs, blkno) &&
                             ffs_snapblkfree(fs, ip->i_devvp, dblk,
-                            fs->fs_bsize, ip->i_number))) {
+                            fs->fs_bsize, ip->i_number, NULL))) {
                                ip->i_din2->di_blocks -= btodb(fs->fs_bsize);
                                ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0;
                        }
@@ -1722,12 +1720,13 @@ ffs_snapremove(vp)
  * must always have been allocated from a BLK_NOCOPY location.
  */
 int
-ffs_snapblkfree(fs, devvp, bno, size, inum)
+ffs_snapblkfree(fs, devvp, bno, size, inum, wkhd)
        struct fs *fs;
        struct vnode *devvp;
        ufs2_daddr_t bno;
        long size;
        ino_t inum;
+       struct workhead *wkhd;
 {
        struct buf *ibp, *cbp, *savedcbp = 0;
        struct thread *td = curthread;
@@ -1825,6 +1824,17 @@ retry:
                                    "Grabonremove: snapino", ip->i_number,
                                    (intmax_t)lbn, inum);
 #endif
+                       /*
+                        * If journaling is tracking this write we must add
+                        * the work to the inode or indirect being written.
+                        */
+                       if (wkhd != NULL) {
+                               if (lbn < NDADDR)
+                                       softdep_inode_append(ip,
+                                           curthread->td_ucred, wkhd);
+                               else
+                                       softdep_buf_append(ibp, wkhd);
+                       }
                        if (lbn < NDADDR) {
                                DIP_SET(ip, i_db[lbn], bno);
                        } else if (ip->i_ump->um_fstype == UFS1) {
@@ -1902,6 +1912,8 @@ retry:
         * not be freed. Although space will be lost, the snapshot
         * will stay consistent.
         */
+       if (error != 0 && wkhd != NULL)
+               softdep_freework(wkhd);
        lockmgr(vp->v_vnlock, LK_RELEASE, NULL);
        return (error);
 }
@@ -2400,6 +2412,42 @@ ffs_copyonwrite(devvp, bp)
 }
 
 /*
+ * sync snapshots to force freework records waiting on snapshots to claim
+ * blocks to free.
+ */
+void
+ffs_sync_snap(mp, waitfor)
+       struct mount *mp;
+       int waitfor;
+{
+       struct snapdata *sn;
+       struct vnode *devvp;
+       struct vnode *vp;
+       struct inode *ip;
+
+       devvp = VFSTOUFS(mp)->um_devvp;
+       if ((devvp->v_vflag & VV_COPYONWRITE) == 0)
+               return;
+       for (;;) {
+               VI_LOCK(devvp);
+               sn = devvp->v_rdev->si_snapdata;
+               if (sn == NULL) {
+                       VI_UNLOCK(devvp);
+                       return;
+               }
+               if (lockmgr(&sn->sn_lock,
+                   LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
+                   VI_MTX(devvp)) == 0)
+                       break;
+       }
+       TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) {
+               vp = ITOV(ip);
+               ffs_syncvnode(vp, waitfor);
+       }
+       lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
+}
+
+/*
  * Read the specified block into the given buffer.
  * Much of this boiler-plate comes from bwrite().
  */

Modified: head/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- head/sys/ufs/ffs/ffs_softdep.c      Sun Jun 12 18:52:39 2011        
(r223019)
+++ head/sys/ufs/ffs/ffs_softdep.c      Sun Jun 12 19:27:05 2011        
(r223020)
@@ -584,6 +584,33 @@ softdep_get_depcounts(struct mount *mp,
        *softdepactiveaccp = 0;
 }
 
+void
+softdep_buf_append(bp, wkhd)
+       struct buf *bp;
+       struct workhead *wkhd;
+{
+
+       panic("softdep_buf_appendwork called");
+}
+
+void
+softdep_inode_append(ip, cred, wkhd)
+       struct inode *ip;
+       struct ucred *cred;
+       struct workhead *wkhd;
+{
+
+       panic("softdep_inode_appendwork called");
+}
+
+void
+softdep_freework(wkhd)
+       struct workhead *wkhd;
+{
+
+       panic("softdep_freework called");
+}
+
 #else
 
 FEATURE(softupdates, "FFS soft-updates support");
@@ -867,7 +894,7 @@ static      void freework_enqueue(struct free
 static int handle_workitem_freeblocks(struct freeblks *, int);
 static int handle_complete_freeblocks(struct freeblks *, int);
 static void handle_workitem_indirblk(struct freework *);
-static void handle_written_freework(struct freework *, int);
+static void handle_written_freework(struct freework *);
 static void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *);
 static struct worklist *jnewblk_merge(struct worklist *, struct worklist *,
            struct workhead *);
@@ -1632,6 +1659,7 @@ process_truncates(vp)
                if (cgwait) {
                        FREE_LOCK(&lk);
                        sync_cgs(mp, MNT_WAIT);
+                       ffs_sync_snap(mp, MNT_WAIT);
                        ACQUIRE_LOCK(&lk);
                        continue;
                }
@@ -5922,7 +5950,7 @@ complete_trunc_indir(freework)
         */
        if (bp == NULL)  {
                if (LIST_EMPTY(&indirdep->ir_freeblks->fb_jblkdephd))
-                       handle_written_freework(freework, 0);
+                       handle_written_freework(freework);
                else
                        WORKLIST_INSERT(&indirdep->ir_freeblks->fb_freeworkhd,
                           &freework->fw_list);
@@ -5974,7 +6002,7 @@ out:
         */
        if (totblks > datablocks)
                return (0);
-       return (totblks - datablocks);
+       return (datablocks - totblks);
 }
 
 /*
@@ -7228,6 +7256,7 @@ freework_freeblock(freework)
                cancel_jnewblk(jnewblk, &wkhd);
                needj = 0;
        } else if (needj) {
+               freework->fw_state |= DELAYEDFREE;
                freeblks->fb_cgwait++;
                WORKLIST_INSERT(&wkhd, &freework->fw_list);
        }
@@ -7241,7 +7270,7 @@ freework_freeblock(freework)
         * made it to disk.  We can immediately free the freeblk.
         */
        if (needj == 0)
-               handle_written_freework(freework, 0);
+               handle_written_freework(freework);
 }
 
 /*
@@ -7256,7 +7285,8 @@ freework_enqueue(freework)
        struct freeblks *freeblks;
 
        freeblks = freework->fw_freeblks;
-       WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list);
+       if ((freework->fw_state & INPROGRESS) == 0)
+               WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list);
        if ((freeblks->fb_state &
            (ONWORKLIST | INPROGRESS | ALLCOMPLETE)) == ALLCOMPLETE &&
            LIST_EMPTY(&freeblks->fb_jblkdephd))
@@ -7282,13 +7312,14 @@ handle_workitem_indirblk(freework)
        ump = VFSTOUFS(freeblks->fb_list.wk_mp);
        fs = ump->um_fs;
        if (freework->fw_state & DEPCOMPLETE) {
-               handle_written_freework(freework, 0);
+               handle_written_freework(freework);
                return;
        }
        if (freework->fw_off == NINDIR(fs)) {
                freework_freeblock(freework);
                return;
        }
+       freework->fw_state |= INPROGRESS;
        FREE_LOCK(&lk);
        indir_trunc(freework, fsbtodb(fs, freework->fw_blkno),
            freework->fw_lbn);
@@ -7301,16 +7332,16 @@ handle_workitem_indirblk(freework)
  * the freeblks is added back to the worklist if there is more work to do.
  */
 static void
-handle_written_freework(freework, cgwrite)
+handle_written_freework(freework)
        struct freework *freework;
-       int cgwrite;
 {
        struct freeblks *freeblks;
        struct freework *parent;
 
        freeblks = freework->fw_freeblks;
        parent = freework->fw_parent;
-       freeblks->fb_cgwait -= cgwrite;
+       if (freework->fw_state & DELAYEDFREE)
+               freeblks->fb_cgwait--;
        freework->fw_state |= COMPLETE;
        if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE)
                WORKITEM_FREE(freework, D_FREEWORK);
@@ -7552,6 +7583,8 @@ indir_trunc(freework, dbn, lbn)
                return;
        }
        ACQUIRE_LOCK(&lk);
+       /* Protects against a race with complete_trunc_indir(). */
+       freework->fw_state &= ~INPROGRESS;
        /*
         * If we have an indirdep we need to enforce the truncation order
         * and discard it when it is complete.
@@ -7675,7 +7708,7 @@ indir_trunc(freework, dbn, lbn)
        if (freework->fw_blkno == dbn) {
                freework->fw_state |= ALLCOMPLETE;
                ACQUIRE_LOCK(&lk);
-               handle_written_freework(freework, 0);
+               handle_written_freework(freework);
                FREE_LOCK(&lk);
        }
        return;
@@ -10368,8 +10401,7 @@ softdep_disk_write_complete(bp)
                        continue;
 
                case D_FREEWORK:
-                       /* Freework on an indirect block, not bmsafemap. */
-                       handle_written_freework(WK_FREEWORK(wk), 0);
+                       handle_written_freework(WK_FREEWORK(wk));
                        break;
 
                case D_JSEGDEP:
@@ -10540,7 +10572,7 @@ handle_jwork(wkhd)
                        free_freedep(WK_FREEDEP(wk));
                        continue;
                case D_FREEWORK:
-                       handle_written_freework(WK_FREEWORK(wk), 1);
+                       handle_written_freework(WK_FREEWORK(wk));
                        continue;
                default:
                        panic("handle_jwork: Unknown type %s\n",
@@ -12738,6 +12770,53 @@ clear_inodedeps(td)
        }
 }
 
+void
+softdep_buf_append(bp, wkhd)
+       struct buf *bp;
+       struct workhead *wkhd;
+{
+       struct worklist *wk;
+
+       ACQUIRE_LOCK(&lk);
+       while ((wk = LIST_FIRST(wkhd)) != NULL) {
+               WORKLIST_REMOVE(wk);
+               WORKLIST_INSERT(&bp->b_dep, wk);
+       }
+       FREE_LOCK(&lk);
+
+}
+
+void
+softdep_inode_append(ip, cred, wkhd)
+       struct inode *ip;
+       struct ucred *cred;
+       struct workhead *wkhd;
+{
+       struct buf *bp;
+       struct fs *fs;
+       int error;
+
+       fs = ip->i_fs;
+       error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
+           (int)fs->fs_bsize, cred, &bp);
+       if (error) {
+               softdep_freework(wkhd);
+               return;
+       }
+       softdep_buf_append(bp, wkhd);
+       bqrelse(bp);
+}
+
+void
+softdep_freework(wkhd)
+       struct workhead *wkhd;
+{
+
+       ACQUIRE_LOCK(&lk);
+       handle_jwork(wkhd);
+       FREE_LOCK(&lk);
+}
+
 /*
  * Function to determine if the buffer has outstanding dependencies
  * that will cause a roll-back if the buffer is written. If wantcount

Modified: head/sys/ufs/ufs/ufs_vnops.c
==============================================================================
--- head/sys/ufs/ufs/ufs_vnops.c        Sun Jun 12 18:52:39 2011        
(r223019)
+++ head/sys/ufs/ufs/ufs_vnops.c        Sun Jun 12 19:27:05 2011        
(r223020)
@@ -1838,6 +1838,8 @@ ufs_mkdir(ap)
 #ifdef QUOTA
                if ((error = getinoquota(ip)) ||
                    (error = chkiq(ip, 1, ucp, 0))) {
+                       if (DOINGSOFTDEP(tvp))
+                               softdep_revert_link(dp, ip);
                        UFS_VFREE(tvp, ip->i_number, dmode);
                        vput(tvp);
                        return (error);
@@ -1850,6 +1852,8 @@ ufs_mkdir(ap)
 #ifdef QUOTA
        if ((error = getinoquota(ip)) ||
            (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
+               if (DOINGSOFTDEP(tvp))
+                       softdep_revert_link(dp, ip);
                UFS_VFREE(tvp, ip->i_number, dmode);
                vput(tvp);
                return (error);
@@ -2608,6 +2612,8 @@ ufs_makeinode(mode, dvp, vpp, cnp)
 #ifdef QUOTA
                if ((error = getinoquota(ip)) ||
                    (error = chkiq(ip, 1, ucp, 0))) {
+                       if (DOINGSOFTDEP(tvp))
+                               softdep_revert_link(pdir, ip);
                        UFS_VFREE(tvp, ip->i_number, mode);
                        vput(tvp);
                        return (error);
@@ -2620,6 +2626,8 @@ ufs_makeinode(mode, dvp, vpp, cnp)
 #ifdef QUOTA
        if ((error = getinoquota(ip)) ||
            (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
+               if (DOINGSOFTDEP(tvp))
+                       softdep_revert_link(pdir, ip);
                UFS_VFREE(tvp, ip->i_number, mode);
                vput(tvp);
                return (error);
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to