Author: scottl
Date: Tue Jan 22 07:40:38 2013
New Revision: 245779
URL: http://svnweb.freebsd.org/changeset/base/245779

Log:
  MFC r243018:
  
   - Fix a truncation bug with softdep journaling that could leak blocks on
     crash.  When truncating a file that never made it to disk we use the
     canceled allocation dependencies to hold the journal records until
     the truncation completes.  Previously allocdirect dependencies on
     the id_bufwait list were not considered and their journal space
     could expire before the bitmaps were written.  Cancel them and attach
     them to the freeblks as we do for other allocdirects.
   - Add KTR traces that were used to debug this problem.
   - When adding jsegdeps, always use jwork_insert() so we don't have more
     than one segdep on a given jwork list.

Modified:
  stable/9/sys/ufs/ffs/ffs_softdep.c
Directory Properties:
  stable/9/sys/   (props changed)

Modified: stable/9/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- stable/9/sys/ufs/ffs/ffs_softdep.c  Tue Jan 22 07:38:43 2013        
(r245778)
+++ stable/9/sys/ufs/ffs/ffs_softdep.c  Tue Jan 22 07:40:38 2013        
(r245779)
@@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/buf.h>
 #include <sys/kdb.h>
 #include <sys/kthread.h>
+#include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
@@ -92,6 +93,8 @@ __FBSDID("$FreeBSD$");
 
 #include <ddb/ddb.h>
 
+#define        KTR_SUJ 0       /* Define to KTR_SPARE. */
+
 #ifndef SOFTUPDATES
 
 int
@@ -769,6 +772,34 @@ struct pagedep_hashhead;
 struct bmsafemap_hashhead;
 
 /*
+ * Private journaling structures.
+ */
+struct jblocks {
+       struct jseglst  jb_segs;        /* TAILQ of current segments. */
+       struct jseg     *jb_writeseg;   /* Next write to complete. */
+       struct jseg     *jb_oldestseg;  /* Oldest segment with valid entries. */
+       struct jextent  *jb_extent;     /* Extent array. */
+       uint64_t        jb_nextseq;     /* Next sequence number. */
+       uint64_t        jb_oldestwrseq; /* Oldest written sequence number. */
+       uint8_t         jb_needseg;     /* Need a forced segment. */
+       uint8_t         jb_suspended;   /* Did journal suspend writes? */
+       int             jb_avail;       /* Available extents. */
+       int             jb_used;        /* Last used extent. */
+       int             jb_head;        /* Allocator head. */
+       int             jb_off;         /* Allocator extent offset. */
+       int             jb_blocks;      /* Total disk blocks covered. */
+       int             jb_free;        /* Total disk blocks free. */
+       int             jb_min;         /* Minimum free space. */
+       int             jb_low;         /* Low on space. */
+       int             jb_age;         /* Insertion time of oldest rec. */
+};
+
+struct jextent {
+       ufs2_daddr_t    je_daddr;       /* Disk block address. */
+       int             je_blocks;      /* Disk block count. */
+};
+
+/*
  * Internal function prototypes.
  */
 static void softdep_error(char *, int);
@@ -2273,19 +2304,15 @@ static void
 indirblk_insert(freework)
        struct freework *freework;
 {
-       struct freeblks *freeblks;
-       struct jsegdep *jsegdep;
-       struct worklist *wk;
+       struct jblocks *jblocks;
+       struct jseg *jseg;
 
-       freeblks = freework->fw_freeblks;
-       LIST_FOREACH(wk, &freeblks->fb_jwork, wk_list)
-               if (wk->wk_type == D_JSEGDEP)
-                       break;
-       if (wk == NULL)
+       jblocks = VFSTOUFS(freework->fw_list.wk_mp)->softdep_jblocks;
+       jseg = TAILQ_LAST(&jblocks->jb_segs, jseglst);
+       if (jseg == NULL)
                return;
        
-       jsegdep = WK_JSEGDEP(wk);
-       LIST_INSERT_HEAD(&jsegdep->jd_seg->js_indirs, freework, fw_segs);
+       LIST_INSERT_HEAD(&jseg->js_indirs, freework, fw_segs);
        TAILQ_INSERT_HEAD(INDIR_HASH(freework->fw_list.wk_mp,
            freework->fw_blkno), freework, fw_next);
        freework->fw_state &= ~DEPCOMPLETE;
@@ -2438,31 +2465,6 @@ softdep_unmount(mp)
        journal_unmount(mp);
 }
 
-struct jblocks {
-       struct jseglst  jb_segs;        /* TAILQ of current segments. */
-       struct jseg     *jb_writeseg;   /* Next write to complete. */
-       struct jseg     *jb_oldestseg;  /* Oldest segment with valid entries. */
-       struct jextent  *jb_extent;     /* Extent array. */
-       uint64_t        jb_nextseq;     /* Next sequence number. */
-       uint64_t        jb_oldestwrseq; /* Oldest written sequence number. */
-       uint8_t         jb_needseg;     /* Need a forced segment. */
-       uint8_t         jb_suspended;   /* Did journal suspend writes? */
-       int             jb_avail;       /* Available extents. */
-       int             jb_used;        /* Last used extent. */
-       int             jb_head;        /* Allocator head. */
-       int             jb_off;         /* Allocator extent offset. */
-       int             jb_blocks;      /* Total disk blocks covered. */
-       int             jb_free;        /* Total disk blocks free. */
-       int             jb_min;         /* Minimum free space. */
-       int             jb_low;         /* Low on space. */
-       int             jb_age;         /* Insertion time of oldest rec. */
-};
-
-struct jextent {
-       ufs2_daddr_t    je_daddr;       /* Disk block address. */
-       int             je_blocks;      /* Disk block count. */
-};
-
 static struct jblocks *
 jblocks_create(void)
 {
@@ -3668,7 +3670,7 @@ handle_written_jnewblk(jnewblk)
                 */
                freefrag = WK_FREEFRAG(jnewblk->jn_dep);
                freefrag->ff_jdep = NULL;
-               WORKLIST_INSERT(&freefrag->ff_jwork, &jsegdep->jd_list);
+               jwork_insert(&freefrag->ff_jwork, jsegdep);
                break;
        case D_FREEWORK:
                /*
@@ -3676,8 +3678,7 @@ handle_written_jnewblk(jnewblk)
                 */
                freework = WK_FREEWORK(jnewblk->jn_dep);
                freework->fw_jnewblk = NULL;
-               WORKLIST_INSERT(&freework->fw_freeblks->fb_jwork,
-                   &jsegdep->jd_list);
+               jwork_insert(&freework->fw_freeblks->fb_jwork, jsegdep);
                break;
        default:
                panic("handle_written_jnewblk: Unknown type %d.",
@@ -3707,6 +3708,7 @@ cancel_jfreefrag(jfreefrag)
        jfreefrag->fr_freefrag = NULL;
        free_jfreefrag(jfreefrag);
        freefrag->ff_state |= DEPCOMPLETE;
+       CTR1(KTR_SUJ, "cancel_jfreefrag: blkno %jd", freefrag->ff_blkno);
 }
 
 /*
@@ -3770,7 +3772,7 @@ handle_written_jblkdep(jblkdep)
        jblkdep->jb_jsegdep = NULL;
        freeblks = jblkdep->jb_freeblks;
        LIST_REMOVE(jblkdep, jb_deps);
-       WORKLIST_INSERT(&freeblks->fb_jwork, &jsegdep->jd_list);
+       jwork_insert(&freeblks->fb_jwork, jsegdep);
        /*
         * If the freeblks is all journaled, we can add it to the worklist.
         */
@@ -3973,6 +3975,7 @@ cancel_jfreeblk(freeblks, blkno)
        }
        if (jblkdep == NULL)
                return;
+       CTR1(KTR_SUJ, "cancel_jfreeblk: blkno %jd", blkno);
        free_jsegdep(jblkdep->jb_jsegdep);
        LIST_REMOVE(jblkdep, jb_deps);
        WORKITEM_FREE(jfreeblk, D_JFREEBLK);
@@ -4213,6 +4216,7 @@ cancel_jnewblk(jnewblk, wkhd)
 {
        struct jsegdep *jsegdep;
 
+       CTR1(KTR_SUJ, "cancel_jnewblk: blkno %jd", jnewblk->jn_blkno);
        jsegdep = jnewblk->jn_jsegdep;
        if (jnewblk->jn_jsegdep == NULL || jnewblk->jn_dep == NULL)
                panic("cancel_jnewblk: Invalid state");
@@ -4904,6 +4908,10 @@ softdep_setup_blkmapdep(bp, mp, newblkno
                }
 #endif
        }
+
+       CTR3(KTR_SUJ,
+           "softdep_setup_blkmapdep: blkno %jd frags %d oldfrags %d",
+           newblkno, frags, oldfrags);
        ACQUIRE_LOCK(&lk);
        if (newblk_lookup(mp, newblkno, DEPALLOC, &newblk) != 0)
                panic("softdep_setup_blkmapdep: found block");
@@ -5065,6 +5073,10 @@ softdep_setup_allocdirect(ip, off, newbl
        else
                freefrag = NULL;
 
+       CTR6(KTR_SUJ,
+           "softdep_setup_allocdirect: ino %d blkno %jd oldblkno %jd "
+           "off %jd newsize %ld oldsize %d",
+           ip->i_number, newblkno, oldblkno, off, newsize, oldsize);
        ACQUIRE_LOCK(&lk);
        if (off >= NDADDR) {
                if (lbn > 0)
@@ -5343,6 +5355,8 @@ newfreefrag(ip, blkno, size, lbn)
        struct freefrag *freefrag;
        struct fs *fs;
 
+       CTR4(KTR_SUJ, "newfreefrag: ino %d blkno %jd size %ld lbn %jd",
+           ip->i_number, blkno, size, lbn);
        fs = ip->i_fs;
        if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag)
                panic("newfreefrag: frag size");
@@ -5378,6 +5392,9 @@ handle_workitem_freefrag(freefrag)
        struct ufsmount *ump = VFSTOUFS(freefrag->ff_list.wk_mp);
        struct workhead wkhd;
 
+       CTR3(KTR_SUJ,
+           "handle_workitem_freefrag: ino %d blkno %jd size %ld",
+           freefrag->ff_inum, freefrag->ff_blkno, freefrag->ff_fragsize);
        /*
         * It would be illegal to add new completion items to the
         * freefrag after it was schedule to be done so it must be
@@ -5596,6 +5613,9 @@ softdep_setup_allocindir_page(ip, lbn, b
        if (lbn != nbp->b_lblkno)
                panic("softdep_setup_allocindir_page: lbn %jd != lblkno %jd",
                    lbn, bp->b_lblkno);
+       CTR4(KTR_SUJ,
+           "softdep_setup_allocindir_page: ino %d blkno %jd oldblkno %jd "
+           "lbn %jd", ip->i_number, newblkno, oldblkno, lbn);
        ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_page");
        mp = UFSTOVFS(ip->i_ump);
        aip = newallocindir(ip, ptrno, newblkno, oldblkno, lbn);
@@ -5634,6 +5654,9 @@ softdep_setup_allocindir_meta(nbp, ip, b
        ufs_lbn_t lbn;
        int dflags;
 
+       CTR3(KTR_SUJ,
+           "softdep_setup_allocindir_meta: ino %d blkno %jd ptrno %d",
+           ip->i_number, newblkno, ptrno);
        lbn = nbp->b_lblkno;
        ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_meta");
        aip = newallocindir(ip, ptrno, newblkno, 0, lbn);
@@ -6238,6 +6261,7 @@ softdep_journal_freeblocks(ip, cred, len
        int flags;              /* IO_EXT and/or IO_NORMAL */
 {
        struct freeblks *freeblks, *fbn;
+       struct worklist *wk, *wkn;
        struct inodedep *inodedep;
        struct jblkdep *jblkdep;
        struct allocdirect *adp, *adpn;
@@ -6272,6 +6296,8 @@ softdep_journal_freeblocks(ip, cred, len
        if ((inodedep->id_state & (UNLINKED | DEPCOMPLETE)) == UNLINKED &&
            length == 0)
                needj = 0;
+       CTR3(KTR_SUJ, "softdep_journal_freeblks: ip %d length %ld needj %d",
+           ip->i_number, length, needj);
        FREE_LOCK(&lk);
        /*
         * Calculate the lbn that we are truncating to.  This results in -1
@@ -6425,6 +6451,21 @@ softdep_journal_freeblocks(ip, cred, len
                        cancel_allocdirect(&inodedep->id_extupdt, adp,
                            freeblks);
        /*
+        * Scan the bufwait list for newblock dependencies that will never
+        * make it to disk.
+        */
+       LIST_FOREACH_SAFE(wk, &inodedep->id_bufwait, wk_list, wkn) {
+               if (wk->wk_type != D_ALLOCDIRECT)
+                       continue;
+               adp = WK_ALLOCDIRECT(wk);
+               if (((flags & IO_NORMAL) != 0 && (adp->ad_offset > iboff)) ||
+                   ((flags & IO_EXT) != 0 && (adp->ad_state & EXTDATA))) {
+                       cancel_jfreeblk(freeblks, adp->ad_newblkno);
+                       cancel_newblk(WK_NEWBLK(wk), NULL, &freeblks->fb_jwork);
+                       WORKLIST_INSERT(&freeblks->fb_freeworkhd, wk);
+               }
+       }
+       /*
         * Add journal work.
         */
        LIST_FOREACH(jblkdep, &freeblks->fb_jblkdephd, jb_deps)
@@ -6563,6 +6604,8 @@ softdep_setup_freeblocks(ip, length, fla
        ufs_lbn_t tmpval;
        ufs_lbn_t lbn;
 
+       CTR2(KTR_SUJ, "softdep_setup_freeblks: ip %d length %ld",
+           ip->i_number, length);
        fs = ip->i_fs;
        mp = UFSTOVFS(ip->i_ump);
        if (length != 0)
@@ -7088,6 +7131,8 @@ cancel_newblk(newblk, wk, wkhd)
 {
        struct jnewblk *jnewblk;
 
+       CTR1(KTR_SUJ, "cancel_newblk: blkno %jd", newblk->nb_newblkno);
+           
        newblk->nb_state |= GOINGAWAY;
        /*
         * Previously we traversed the completedhd on each indirdep
@@ -7456,6 +7501,9 @@ freework_freeblock(freework)
        }
        FREE_LOCK(&lk);
        freeblks_free(ump, freeblks, btodb(bsize));
+       CTR4(KTR_SUJ,
+           "freework_freeblock: ino %d blkno %jd lbn %jd size %ld",
+           freeblks->fb_inum, freework->fw_blkno, freework->fw_lbn, bsize);
        ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, bsize,
            freeblks->fb_inum, freeblks->fb_vtype, &wkhd);
        ACQUIRE_LOCK(&lk);
@@ -7889,6 +7937,9 @@ indir_trunc(freework, dbn, lbn)
                                    &freedep->fd_list);
                                freedeps++;
                        }
+                       CTR3(KTR_SUJ,
+                           "indir_trunc: ino %d blkno %jd size %ld",
+                           freeblks->fb_inum, nb, fs->fs_bsize);
                        ffs_blkfree(ump, fs, freeblks->fb_devvp, nb,
                            fs->fs_bsize, freeblks->fb_inum,
                            freeblks->fb_vtype, &wkhd);
@@ -7924,6 +7975,9 @@ indir_trunc(freework, dbn, lbn)
         * If we're not journaling we can free the indirect now.
         */
        dbn = dbtofsb(fs, dbn);
+       CTR3(KTR_SUJ,
+           "indir_trunc 2: ino %d blkno %jd size %ld",
+           freeblks->fb_inum, dbn, fs->fs_bsize);
        ffs_blkfree(ump, fs, freeblks->fb_devvp, dbn, fs->fs_bsize,
            freeblks->fb_inum, freeblks->fb_vtype, NULL);
        /* Non SUJ softdep does single-threaded truncations. */
@@ -10359,6 +10413,10 @@ softdep_setup_blkfree(mp, bp, blkno, fra
        int i;
 #endif
 
+       CTR3(KTR_SUJ,
+           "softdep_setup_blkfree: blkno %jd frags %d wk head %p",
+           blkno, frags, wkhd);
+
        ACQUIRE_LOCK(&lk);
        /* Lookup the bmsafemap so we track when it is dirty. */
        fs = VFSTOUFS(mp)->um_fs;
@@ -10370,6 +10428,9 @@ softdep_setup_blkfree(mp, bp, blkno, fra
         */
        if (wkhd) {
                while ((wk = LIST_FIRST(wkhd)) != NULL) {
+                       CTR2(KTR_SUJ,
+                           "softdep_setup_blkfree: blkno %jd wk type %d",
+                           blkno, wk->wk_type);
                        WORKLIST_REMOVE(wk);
                        if (wk->wk_type != D_JNEWBLK) {
                                WORKLIST_INSERT(&bmsafemap->sm_freehd, wk);
_______________________________________________
svn-src-stable-9@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-stable-9
To unsubscribe, send any mail to "svn-src-stable-9-unsubscr...@freebsd.org"

Reply via email to