Author: mckusick
Date: Sat Mar 23 21:56:19 2013
New Revision: 248665
URL: http://svnweb.freebsd.org/changeset/base/248665

Log:
  MFC of 246876 and 246877
  
  MFC: 246876:
  
  Add barrier write capability to the VFS buffer interface. A barrier
  write is a disk write request that tells the disk that the buffer
  being written must be committed to the media along with any writes
  that preceeded it before any future blocks may be written to the drive.
  
  Barrier writes are provided by adding the functions bbarrierwrite
  (bwrite with barrier) and babarrierwrite (bawrite with barrier).
  
  Following a bbarrierwrite the client knows that the requested buffer
  is on the media. It does not ensure that buffers written before that
  buffer are on the media. It only ensure that buffers written before
  that buffer will get to the media before any buffers written after
  that buffer. A flush command must be sent to the disk to ensure that
  all earlier written buffers are on the media.
  
  Reviewed by: kib
  Tested by:   Peter Holm
  
  MFC 246877:
  
  The UFS2 filesystem allocates new blocks of inodes as they are needed.
  When a cylinder group runs short of inodes, a new block for inodes is
  allocated, zero'ed, and written to the disk. The zero'ed inodes must
  be on the disk before the cylinder group can be updated to claim them.
  If the cylinder group claiming the new inodes were written before the
  zero'ed block of inodes, the system could crash with the filesystem in
  an unrecoverable state.
  
  Rather than adding a soft updates dependency to ensure that the new
  inode block is written before it is claimed by the cylinder group
  map, we just do a barrier write of the zero'ed inode block to ensure
  that it will get written before the updated cylinder group map can
  be written. This change should only slow down bulk loading of newly
  created filesystems since that is the primary time that new inode
  blocks need to be created.
  
  Reported by: Robert Watson
  Reviewed by: kib
  Tested by:   Peter Holm

Modified:
  stable/9/sys/geom/geom_vfs.c
  stable/9/sys/kern/vfs_bio.c
  stable/9/sys/kern/vfs_cluster.c
  stable/9/sys/sys/buf.h
  stable/9/sys/ufs/ffs/ffs_alloc.c
Directory Properties:
  stable/9/sys/   (props changed)
  stable/9/sys/amd64/include/xen/   (props changed)
  stable/9/sys/boot/   (props changed)
  stable/9/sys/boot/i386/efi/   (props changed)
  stable/9/sys/boot/ia64/efi/   (props changed)
  stable/9/sys/boot/ia64/ski/   (props changed)
  stable/9/sys/boot/powerpc/boot1.chrp/   (props changed)
  stable/9/sys/boot/powerpc/ofw/   (props changed)
  stable/9/sys/cddl/contrib/opensolaris/   (props changed)
  stable/9/sys/conf/   (props changed)
  stable/9/sys/contrib/dev/acpica/   (props changed)
  stable/9/sys/contrib/octeon-sdk/   (props changed)
  stable/9/sys/contrib/pf/   (props changed)
  stable/9/sys/contrib/x86emu/   (props changed)
  stable/9/sys/dev/   (props changed)
  stable/9/sys/dev/e1000/   (props changed)
  stable/9/sys/dev/isp/   (props changed)
  stable/9/sys/dev/ixgbe/   (props changed)
  stable/9/sys/dev/puc/   (props changed)
  stable/9/sys/fs/   (props changed)
  stable/9/sys/fs/ntfs/   (props changed)
  stable/9/sys/modules/   (props changed)
  stable/9/sys/net/   (props changed)
  stable/9/sys/sys/   (props changed)

Modified: stable/9/sys/geom/geom_vfs.c
==============================================================================
--- stable/9/sys/geom/geom_vfs.c        Sat Mar 23 21:34:10 2013        
(r248664)
+++ stable/9/sys/geom/geom_vfs.c        Sat Mar 23 21:56:19 2013        
(r248665)
@@ -168,6 +168,10 @@ g_vfs_strategy(struct bufobj *bo, struct
        bip->bio_done = g_vfs_done;
        bip->bio_caller2 = bp;
        bip->bio_length = bp->b_bcount;
+       if (bp->b_flags & B_BARRIER) {
+               bip->bio_flags |= BIO_ORDERED;
+               bp->b_flags &= ~B_BARRIER;
+       }
        g_io_request(bip, cp);
 }
 

Modified: stable/9/sys/kern/vfs_bio.c
==============================================================================
--- stable/9/sys/kern/vfs_bio.c Sat Mar 23 21:34:10 2013        (r248664)
+++ stable/9/sys/kern/vfs_bio.c Sat Mar 23 21:56:19 2013        (r248665)
@@ -206,6 +206,9 @@ SYSCTL_INT(_vfs, OID_AUTO, flushbufqtarg
 static long notbufdflashes;
 SYSCTL_LONG(_vfs, OID_AUTO, notbufdflashes, CTLFLAG_RD, &notbufdflashes, 0,
     "Number of dirty buffer flushes done by the bufdaemon helpers");
+static long barrierwrites;
+SYSCTL_LONG(_vfs, OID_AUTO, barrierwrites, CTLFLAG_RW, &barrierwrites, 0,
+    "Number of barrier writes");
 
 /*
  * Wakeup point for bufdaemon, as well as indicator of whether it is already
@@ -914,6 +917,9 @@ bufwrite(struct buf *bp)
                return (0);
        }
 
+       if (bp->b_flags & B_BARRIER)
+               barrierwrites++;
+
        oldflags = bp->b_flags;
 
        BUF_ASSERT_HELD(bp);
@@ -1033,6 +1039,8 @@ bdwrite(struct buf *bp)
 
        CTR3(KTR_BUF, "bdwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
        KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp));
+       KASSERT((bp->b_flags & B_BARRIER) == 0,
+           ("Barrier request in delayed write %p", bp));
        BUF_ASSERT_HELD(bp);
 
        if (bp->b_flags & B_INVAL) {
@@ -1193,6 +1201,40 @@ bawrite(struct buf *bp)
 }
 
 /*
+ *     babarrierwrite:
+ *
+ *     Asynchronous barrier write.  Start output on a buffer, but do not
+ *     wait for it to complete.  Place a write barrier after this write so
+ *     that this buffer and all buffers written before it are committed to
+ *     the disk before any buffers written after this write are committed
+ *     to the disk.  The buffer is released when the output completes.
+ */
+void
+babarrierwrite(struct buf *bp)
+{
+
+       bp->b_flags |= B_ASYNC | B_BARRIER;
+       (void) bwrite(bp);
+}
+
+/*
+ *     bbarrierwrite:
+ *
+ *     Synchronous barrier write.  Start output on a buffer and wait for
+ *     it to complete.  Place a write barrier after this write so that
+ *     this buffer and all buffers written before it are committed to 
+ *     the disk before any buffers written after this write are committed
+ *     to the disk.  The buffer is released when the output completes.
+ */
+int
+bbarrierwrite(struct buf *bp)
+{
+
+       bp->b_flags |= B_BARRIER;
+       return (bwrite(bp));
+}
+
+/*
  *     bwillwrite:
  *
  *     Called prior to the locking of any vnodes when we are expecting to

Modified: stable/9/sys/kern/vfs_cluster.c
==============================================================================
--- stable/9/sys/kern/vfs_cluster.c     Sat Mar 23 21:34:10 2013        
(r248664)
+++ stable/9/sys/kern/vfs_cluster.c     Sat Mar 23 21:56:19 2013        
(r248665)
@@ -944,11 +944,17 @@ cluster_wbuild(vp, size, start_lbn, len)
                        }
                        bp->b_bcount += size;
                        bp->b_bufsize += size;
-                       bundirty(tbp);
-                       tbp->b_flags &= ~B_DONE;
-                       tbp->b_ioflags &= ~BIO_ERROR;
+                       /*
+                        * If any of the clustered buffers have their
+                        * B_BARRIER flag set, transfer that request to
+                        * the cluster.
+                        */
+                       bp->b_flags |= (tbp->b_flags & B_BARRIER);
+                       tbp->b_flags &= ~(B_DONE | B_BARRIER);
                        tbp->b_flags |= B_ASYNC;
+                       tbp->b_ioflags &= ~BIO_ERROR;
                        tbp->b_iocmd = BIO_WRITE;
+                       bundirty(tbp);
                        reassignbuf(tbp);               /* put on clean list */
                        bufobj_wref(tbp->b_bufobj);
                        BUF_KERNPROC(tbp);

Modified: stable/9/sys/sys/buf.h
==============================================================================
--- stable/9/sys/sys/buf.h      Sat Mar 23 21:34:10 2013        (r248664)
+++ stable/9/sys/sys/buf.h      Sat Mar 23 21:56:19 2013        (r248665)
@@ -205,7 +205,7 @@ struct buf {
 #define        B_00000800      0x00000800      /* Available flag. */
 #define        B_00001000      0x00001000      /* Available flag. */
 #define        B_INVAL         0x00002000      /* Does not contain valid info. 
*/
-#define        B_00004000      0x00004000      /* Available flag. */
+#define        B_BARRIER       0x00004000      /* Write this and all 
preceeding first. */
 #define        B_NOCACHE       0x00008000      /* Do not cache block after 
use. */
 #define        B_MALLOC        0x00010000      /* malloced b_data */
 #define        B_CLUSTEROK     0x00020000      /* Pagein op, so swap() can 
count it. */
@@ -491,6 +491,8 @@ int breadn_flags(struct vnode *, daddr_t
            struct ucred *, int, struct buf **);
 void   bdwrite(struct buf *);
 void   bawrite(struct buf *);
+void   babarrierwrite(struct buf *);
+int    bbarrierwrite(struct buf *);
 void   bdirty(struct buf *);
 void   bundirty(struct buf *);
 void   bufstrategy(struct bufobj *, struct buf *);

Modified: stable/9/sys/ufs/ffs/ffs_alloc.c
==============================================================================
--- stable/9/sys/ufs/ffs/ffs_alloc.c    Sat Mar 23 21:34:10 2013        
(r248664)
+++ stable/9/sys/ufs/ffs/ffs_alloc.c    Sat Mar 23 21:56:19 2013        
(r248665)
@@ -1801,7 +1801,6 @@ gotit:
        /*
         * Check to see if we need to initialize more inodes.
         */
-       ibp = NULL;
        if (fs->fs_magic == FS_UFS2_MAGIC &&
            ipref + INOPB(fs) > cgp->cg_initediblk &&
            cgp->cg_initediblk < cgp->cg_niblk) {
@@ -1814,6 +1813,16 @@ gotit:
                        dp2->di_gen = arc4random() / 2 + 1;
                        dp2++;
                }
+               /*
+                * Rather than adding a soft updates dependency to ensure
+                * that the new inode block is written before it is claimed
+                * by the cylinder group map, we just do a barrier write
+                * here. The barrier write will ensure that the inode block
+                * gets written before the updated cylinder group map can be
+                * written. The barrier write should only slow down bulk
+                * loading of newly created filesystems.
+                */
+               babarrierwrite(ibp);
                cgp->cg_initediblk += INOPB(fs);
        }
        UFS_LOCK(ump);
@@ -1832,8 +1841,6 @@ gotit:
        if (DOINGSOFTDEP(ITOV(ip)))
                softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref, mode);
        bdwrite(bp);
-       if (ibp != NULL)
-               bawrite(ibp);
        return ((ino_t)(cg * fs->fs_ipg + ipref));
 }
 
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to