On Tue, Apr 19, 2005 at 12:16:16PM -0400, Brian Fundakowski Feldman wrote:
> On Tue, Apr 19, 2005 at 06:09:00PM +0200, Marc Olzheim wrote:
> > On Tue, Apr 19, 2005 at 06:02:58PM +0200, Marc Olzheim wrote:
> > > On Tue, Apr 19, 2005 at 11:18:00AM -0400, Brian Fundakowski Feldman wrote:
> > > > Does this work for you?
> > > 
> > > ...
> > > 
> > > cc -c -O -pipe  -Wall -Wredundant-decls -Wnested-externs 
> > > -Wstrict-prototypes  -Wmissing-prototypes -Wpointer-arith -Winline 
> > > -Wcast-qual  -fformat-extensions -std=c99 -g -nostdinc -I-  -I. 
> > > -I/usr/src/sys -I/usr/src/sys/contrib/dev/acpica 
> > > -I/usr/src/sys/contrib/altq -I/usr/src/sys/contrib/ipfilter 
> > > -I/usr/src/sys/contrib/pf -I/usr/src/sys/contrib/dev/ath 
> > > -I/usr/src/sys/contrib/dev/ath/freebsd -I/usr/src/sys/contrib/ngatm 
> > > -I/usr/src/sys/dev/twa -D_KERNEL -include opt_global.h -fno-common 
> > > -finline-limit=8000 --param inline-unit-growth=100 --param 
> > > large-function-growth=1000  -mno-align-long-strings 
> > > -mpreferred-stack-boundary=2  -mno-mmx -mno-3dnow -mno-sse -mno-sse2 
> > > -ffreestanding -Werror  /usr/src/sys/nfsclient/nfs_bio.c
> > > /usr/src/sys/nfsclient/nfs_bio.c: In function `nfs_write':
> > > /usr/src/sys/nfsclient/nfs_bio.c:976: error: invalid type argument of `->'
> > > /usr/src/sys/nfsclient/nfs_bio.c:976: error: invalid type argument of `->'
> > > /usr/src/sys/nfsclient/nfs_bio.c:984: error: invalid type argument of `->'
> > > /usr/src/sys/nfsclient/nfs_bio.c:984: error: invalid type argument of `->'
> > > *** Error code 1
> > 
> > That's about the BO_LOCK(vp->v_bufobj); and the BO_UNLOCK(vp->v_bufobj);
> 
> Yeah, I get that too.  I have no clue why it's doing that, I was figuring
> it was just my compile setup...

Okay, stupid me.  This compiles.

cvs diff: Diffing sys
Index: sys/buf.h
===================================================================
RCS file: /usr/ncvs/src/sys/sys/buf.h,v
retrieving revision 1.185
diff -u -u -r1.185 buf.h
--- sys/buf.h   10 Feb 2005 12:17:48 -0000      1.185
+++ sys/buf.h   19 Apr 2005 14:30:54 -0000
@@ -465,6 +465,7 @@
 extern int     maxswzone;              /* Max KVA for swap structures */
 extern int     maxbcache;              /* Max KVA for buffer cache */
 extern int     runningbufspace;
+extern int     hibufspace;
 extern int      buf_maxio;              /* nominal maximum I/O for buffer */
 extern struct  buf *buf;               /* The buffer headers. */
 extern char    *buffers;               /* The buffer contents. */
Index: sys/bufobj.h
===================================================================
RCS file: /usr/ncvs/src/sys/sys/bufobj.h,v
retrieving revision 1.13
diff -u -u -r1.13 bufobj.h
--- sys/bufobj.h        19 Feb 2005 11:44:57 -0000      1.13
+++ sys/bufobj.h        19 Apr 2005 20:39:10 -0000
@@ -109,13 +109,13 @@
 
 #define        BO_LOCK(bo) \
        do { \
-               KASSERT (bo->bo_mtx != NULL, ("No lock in bufobj")); \
+               KASSERT((bo)->bo_mtx != NULL, ("No lock in bufobj")); \
                mtx_lock((bo)->bo_mtx); \
        } while (0)
 
 #define BO_UNLOCK(bo) \
        do { \
-               KASSERT (bo->bo_mtx != NULL, ("No lock in bufobj")); \
+               KASSERT((bo)->bo_mtx != NULL, ("No lock in bufobj")); \
                mtx_unlock((bo)->bo_mtx); \
        } while (0)
 
cvs diff: Diffing kern
Index: kern/vfs_bio.c
===================================================================
RCS file: /usr/ncvs/src/sys/kern/vfs_bio.c,v
retrieving revision 1.482
diff -u -u -r1.482 vfs_bio.c
--- kern/vfs_bio.c      25 Mar 2005 00:20:37 -0000      1.482
+++ kern/vfs_bio.c      19 Apr 2005 14:30:54 -0000
@@ -127,7 +127,7 @@
 static int lobufspace;
 SYSCTL_INT(_vfs, OID_AUTO, lobufspace, CTLFLAG_RD, &lobufspace, 0,
     "Minimum amount of buffers we want to have");
-static int hibufspace;
+int hibufspace;
 SYSCTL_INT(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD, &hibufspace, 0,
     "Maximum allowed value of bufspace (excluding buf_daemon)");
 static int bufreusecnt;
cvs diff: Diffing nfsclient
Index: nfsclient/nfs_bio.c
===================================================================
RCS file: /usr/ncvs/src/sys/nfsclient/nfs_bio.c,v
retrieving revision 1.150
diff -u -u -r1.150 nfs_bio.c
--- nfsclient/nfs_bio.c 18 Mar 2005 21:23:32 -0000      1.150
+++ nfsclient/nfs_bio.c 19 Apr 2005 20:38:34 -0000
@@ -844,6 +844,7 @@
        struct vattr vattr;
        struct nfsmount *nmp = VFSTONFS(vp->v_mount);
        daddr_t lbn;
+       off_t commitleft; /* if non-zero, the amount left we may write */
        int bcount;
        int n, on, error = 0;
        int haverslock = 0;
@@ -873,6 +874,14 @@
         */
        if (ioflag & (IO_APPEND | IO_SYNC)) {
                if (np->n_flag & NMODIFIED) {
+flush_and_restart:
+                       /*
+                        * Require non-blocking, synchronous writes to
+                        * dirty files to inform the program it needs
+                        * to fsync(2) explicitly.
+                        */
+                       if (ioflag & IO_NDELAY)
+                               return (EAGAIN);
                        np->n_attrstamp = 0;
                        error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
                        if (error)
@@ -953,12 +962,65 @@
        }
 
        biosize = vp->v_mount->mnt_stat.f_iosize;
+       commitleft = 0;
+       /*
+        * If there are possible modifications, find all of this file's
+        * B_NEEDCOMMIT buffers.  If our writes would exceed the local
+        * maximum per-file write commit size when combined with those,
+        * we decide to flush and/or do a short write.
+        */
+       if ((ioflag & (IO_SYNC | IO_INVAL)) != (IO_SYNC | IO_INVAL)) {
+               commitleft = nmp->nm_wcommitsize;
+               if (np->n_flag & NMODIFIED) {
+                       int wouldcommit = 0;
+                       BO_LOCK(&vp->v_bufobj);
+                       if (vp->v_bufobj.bo_dirty.bv_cnt != 0) {
+                               TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd,
+                                   b_bobufs) {
+                                       if (bp->b_flags & B_NEEDCOMMIT)
+                                               wouldcommit += bp->b_bcount;
+                               }
+                       }
+                       BO_UNLOCK(&vp->v_bufobj);
+                       /*
+                        * Since we're not operating synchronously and
+                        * bypassing the buffer cache, we are in a commit
+                        * and holding all of these buffers whether
+                        * transmitted or not.  If not limited, this
+                        * will lead to the buffer cache deadlocking,
+                        * as no one else can flush our uncommitted buffers.
+                        */
+                       wouldcommit += uio->uio_resid;
+                       /*
+                        * If we would initially exceed the maximum
+                        * outstanding write commit size, flush and restart.
+                        */
+                       if (wouldcommit > commitleft) {
+                               if (haverslock) {
+                                       nfs_rsunlock(np, td);
+                                       haverslock = 0;
+                               }
+                               goto flush_and_restart;
+                       }
+               } else {
+                       /*
+                        * With no outstanding commits, we are limited only
+                        * by commitleft as to how far we can go.
+                        */
+               }
+       }
 
        do {
                nfsstats.biocache_writes++;
                lbn = uio->uio_offset / biosize;
                on = uio->uio_offset & (biosize-1);
                n = min((unsigned)(biosize - on), uio->uio_resid);
+               /* Always allow at least the very first write. */
+               if (commitleft > 0) {
+                       commitleft -= n;
+                       if (commitleft == 0)
+                               commitleft = -1;
+               }
 again:
                /*
                 * Handle direct append and file extension cases, calculate
@@ -1146,10 +1208,21 @@
                } else {
                        bdwrite(bp);
                }
-       } while (uio->uio_resid > 0 && n > 0);
+       } while (uio->uio_resid > 0 && n > 0 && commitleft >= 0);
 
-       if (haverslock)
+       if (haverslock) {
+               haverslock = 0;
                nfs_rsunlock(np, td);
+       }
+
+       /*
+        * On the off chance that we're trying to do a very large
+        * and non-atomic write, go ahead and let that just continue
+        * within the same call after flushing out what's been written.
+        */
+       if (error == 0 && uio->uio_resid > 0 && n > 0 && commitleft < 0 &&
+           !(ioflag & IO_UNIT))
+               goto flush_and_restart;
 
        return (error);
 }
Index: nfsclient/nfs_vfsops.c
===================================================================
RCS file: /usr/ncvs/src/sys/nfsclient/nfs_vfsops.c,v
retrieving revision 1.172
diff -u -u -r1.172 nfs_vfsops.c
--- nfsclient/nfs_vfsops.c      24 Mar 2005 07:37:22 -0000      1.172
+++ nfsclient/nfs_vfsops.c      19 Apr 2005 14:32:19 -0000
@@ -41,6 +41,8 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
@@ -626,6 +628,12 @@
                else
                        nmp->nm_readahead = NFS_MAXRAHEAD;
        }
+       if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
+               if (argp->wcommitsize < nmp->nm_wsize)
+                       nmp->nm_wcommitsize = nmp->nm_wsize;
+               else
+                       nmp->nm_wcommitsize = argp->wcommitsize;
+       }
        if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) {
                if (argp->deadthresh <= NFS_MAXDEADTHRESH)
                        nmp->nm_deadthresh = argp->deadthresh;
@@ -808,6 +816,7 @@
                nmp->nm_wsize = NFS_WSIZE;
                nmp->nm_rsize = NFS_RSIZE;
        }
+       nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
        nmp->nm_readdirsize = NFS_READDIRSIZE;
        nmp->nm_numgrps = NFS_MAXGRPS;
        nmp->nm_readahead = NFS_DEFRAHEAD;
Index: nfsclient/nfsargs.h
===================================================================
RCS file: /usr/ncvs/src/sys/nfsclient/nfsargs.h,v
retrieving revision 1.67
diff -u -u -r1.67 nfsargs.h
--- nfsclient/nfsargs.h 7 Jan 2005 01:45:51 -0000       1.67
+++ nfsclient/nfsargs.h 19 Apr 2005 14:30:54 -0000
@@ -56,7 +56,7 @@
        int             retrans;        /* times to retry send */
        int             maxgrouplist;   /* Max. size of group list */
        int             readahead;      /* # of blocks to readahead */
-       int             __pad1;         /* was "leaseterm" */
+       int             wcommitsize;    /* Max. write commit size in bytes */
        int             deadthresh;     /* Retrans threshold */
        char            *hostname;      /* server's name */
        int             acregmin;       /* cache attrs for reg files min time */
@@ -80,7 +80,7 @@
 #define        NFSMNT_NFSV3            0x00000200  /* Use NFS Version 3 
protocol */
 /* 0x400 free, was NFSMNT_KERB */
 #define        NFSMNT_DUMBTIMR         0x00000800  /* Don't estimate rtt 
dynamically */
-/* 0x1000 free, was NFSMNT_LEASETERM */
+#define NFSMNT_WCOMMITSIZE     0x00001000  /* set max write commit size */
 #define        NFSMNT_READAHEAD        0x00002000  /* set read ahead */
 #define        NFSMNT_DEADTHRESH       0x00004000  /* set dead server retry 
thresh */
 #define        NFSMNT_RESVPORT         0x00008000  /* Allocate a reserved port 
*/
Index: nfsclient/nfsmount.h
===================================================================
RCS file: /usr/ncvs/src/sys/nfsclient/nfsmount.h,v
retrieving revision 1.29
diff -u -u -r1.29 nfsmount.h
--- nfsclient/nfsmount.h        7 Jan 2005 01:45:51 -0000       1.29
+++ nfsclient/nfsmount.h        19 Apr 2005 14:30:54 -0000
@@ -74,6 +74,7 @@
        int     nm_wsize;               /* Max size of write rpc */
        int     nm_readdirsize;         /* Size of a readdir rpc */
        int     nm_readahead;           /* Num. of blocks to readahead */
+       int     nm_wcommitsize;         /* Max size of commit for write */
        int     nm_acdirmin;            /* Directory attr cache min lifetime */
        int     nm_acdirmax;            /* Directory attr cache max lifetime */
        int     nm_acregmin;            /* Reg file attr cache min lifetime */

-- 
Brian Fundakowski Feldman                           \'[ FreeBSD ]''''''''''\
  <> [EMAIL PROTECTED]                               \  The Power to Serve! \
 Opinions expressed are my own.                       \,,,,,,,,,,,,,,,,,,,,,,\
_______________________________________________
freebsd-hackers@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-hackers
To unsubscribe, send any mail to "[EMAIL PROTECTED]"

Reply via email to