Ok, I think I've whacked this one.  Try this patch and see if it
    fixes your buf_daemon() lockups.  The patch also fixes the 
    double-data-caching that occurs with file-backed MD.

    If this works for you, Mark, I'll commit it and probably also MFC it.
    I'll also be able to apply the same IO_NOWDRAIN fixes to the nfs server
    code for loopback mounts.

                                                -Matt

Index: dev/md/md.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/md/md.c,v
retrieving revision 1.47
diff -u -r1.47 md.c
--- dev/md/md.c 2001/10/11 23:38:13     1.47
+++ dev/md/md.c 2001/11/04 23:54:18
@@ -388,13 +388,18 @@
                auio.uio_td = curthread;
                if (VOP_ISLOCKED(sc->vnode, NULL))
                        vprint("unexpected md driver lock", sc->vnode);
+               /*
+                * When reading set IO_DIRECT to try to avoid double-caching
+                * the data.  When writing IO_DIRECT is not optimal, but we
+                * must set IO_NOWDRAIN to avoid a wdrain deadlock.
+                */
                if (bp->bio_cmd == BIO_READ) {
                        vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
-                       error = VOP_READ(sc->vnode, &auio, 0, sc->cred);
+                       error = VOP_READ(sc->vnode, &auio, IO_DIRECT, sc->cred);
                } else {
                        (void) vn_start_write(sc->vnode, &mp, V_WAIT);
                        vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
-                       error = VOP_WRITE(sc->vnode, &auio, 0, sc->cred);
+                       error = VOP_WRITE(sc->vnode, &auio, IO_NOWDRAIN, sc->cred);
                        vn_finished_write(mp);
                }
                VOP_UNLOCK(sc->vnode, 0, curthread);
Index: kern/vfs_bio.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/vfs_bio.c,v
retrieving revision 1.291
diff -u -r1.291 vfs_bio.c
--- kern/vfs_bio.c      2001/10/21 06:26:55     1.291
+++ kern/vfs_bio.c      2001/11/04 23:41:19
@@ -758,11 +758,15 @@
                int rtval = bufwait(bp);
                brelse(bp);
                return (rtval);
-       } else {
+       } else if ((oldflags & B_NOWDRAIN) == 0) {
                /*
                 * don't allow the async write to saturate the I/O
-                * system.  There is no chance of deadlock here because
-                * we are blocking on I/O that is already in-progress.
+                * system.  Deadlocks can occur only if a device strategy
+                * routine (like in MD) turns around and issues another
+                * high-level write, in which case B_NOWDRAIN is expected
+                * to be set.  Otherwise we will not deadlock here because
+                * we are blocking waiting for I/O that is already in-progress
+                * to complete.
                 */
                waitrunningbufspace();
        }
@@ -1286,7 +1290,8 @@
 
        /* unlock */
        BUF_UNLOCK(bp);
-       bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF | B_DIRECT);
+       bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF | 
+                       B_DIRECT | B_NOWDRAIN);
        bp->b_ioflags &= ~BIO_ORDERED;
        if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY))
                panic("brelse: not dirty");
Index: sys/buf.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/buf.h,v
retrieving revision 1.121
diff -u -r1.121 buf.h
--- sys/buf.h   2001/09/12 08:38:04     1.121
+++ sys/buf.h   2001/11/04 23:30:25
@@ -192,6 +192,11 @@
  *                     the pages underlying the buffer.  B_DIRECT is
  *                     sticky until the buffer is released and typically
  *                     only has an effect when B_RELBUF is also set.
+ *
+ *     B_NOWDRAIN      This flag should be set when a device (like MD)
+ *                     does a turn-around VOP_WRITE from its strategy
+ *                     routine.  This flag prevents bwrite() from blocking
+ *                     in wdrain, avoiding a deadlock situation.
  */
 
 #define        B_AGE           0x00000001      /* Move to age queue when I/O done. */
@@ -204,7 +209,7 @@
 #define        B_DELWRI        0x00000080      /* Delay I/O until buffer reused. */
 #define        B_DONE          0x00000200      /* I/O completed. */
 #define        B_EINTR         0x00000400      /* I/O was interrupted */
-#define        B_00000800      0x00000800      /* Available flag. */
+#define        B_NOWDRAIN      0x00000800      /* Avoid wdrain deadlock */
 #define        B_SCANNED       0x00001000      /* VOP_FSYNC funcs mark written bufs */
 #define        B_INVAL         0x00002000      /* Does not contain valid info. */
 #define        B_LOCKED        0x00004000      /* Locked in core (not reusable). */
Index: sys/vnode.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/vnode.h,v
retrieving revision 1.162
diff -u -r1.162 vnode.h
--- sys/vnode.h 2001/10/27 19:58:55     1.162
+++ sys/vnode.h 2001/11/04 23:27:40
@@ -222,6 +222,7 @@
 #define        IO_INVAL        0x40            /* invalidate after I/O */
 #define        IO_ASYNC        0x80            /* bawrite rather then bdwrite */
 #define IO_DIRECT      0x100           /* attempt to bypass buffer cache */
+#define IO_NOWDRAIN    0x200           /* do not block on wdrain */
 
 /*
  *  Modes.  Some values same as Ixxx entries from inode.h for now.
Index: ufs/ufs/ufs_readwrite.c
===================================================================
RCS file: /home/ncvs/src/sys/ufs/ufs/ufs_readwrite.c,v
retrieving revision 1.82
diff -u -r1.82 ufs_readwrite.c
--- ufs/ufs/ufs_readwrite.c     2001/09/12 08:38:10     1.82
+++ ufs/ufs/ufs_readwrite.c     2001/11/04 23:29:15
@@ -511,6 +511,8 @@
                        break;
                if (ioflag & IO_DIRECT)
                        bp->b_flags |= B_DIRECT;
+               if (ioflag & IO_NOWDRAIN)
+                       bp->b_flags |= B_NOWDRAIN;
 
                if (uio->uio_offset + xfersize > ip->i_size) {
                        ip->i_size = uio->uio_offset + xfersize;

To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message

Reply via email to