Author: jhb
Date: Thu Dec  1 18:46:28 2011
New Revision: 228185
URL: http://svn.freebsd.org/changeset/base/228185

Log:
  Enhance the sequential access heuristic used to perform readahead in the
  NFS server and reuse it for writes as well to allow writes to the backing
  store to be clustered.
  - Use a prime number for the size of the heuristic table (1017 is not
    prime).
  - Move the logic to locate a heuristic entry from the table and compute
    the sequential count out of VOP_READ() and into a separate routine.
  - Use the logic from sequential_heuristic() in vfs_vnops.c to update the
    seqcount when a sequential access is performed rather than just
    increasing seqcount by 1.  This lets the clustering count ramp up
    faster.
  - Allow for some reordering of RPCs and if it is detected leave the current
    seqcount as-is rather than dropping back to a seqcount of 1.  Also,
    when out of order access is encountered, cut seqcount in half rather than
    dropping it all the way back to 1 to further aid with reordering.
  - Fix the new NFS server to properly update the next offset after a
    successful VOP_READ() so that the readahead actually works.
  
  Some of these changes came from an earlier patch by Bjorn Gronwall that was
  forwarded to me by bde@.
  
  Discussed with:       bde, rmacklem, fs@
  Submitted by: Bjorn Gronwall (1, 4)
  MFC after:    2 weeks

Modified:
  head/sys/fs/nfsserver/nfs_nfsdport.c
  head/sys/nfsserver/nfs_serv.c

Modified: head/sys/fs/nfsserver/nfs_nfsdport.c
==============================================================================
--- head/sys/fs/nfsserver/nfs_nfsdport.c        Thu Dec  1 15:33:58 2011        
(r228184)
+++ head/sys/fs/nfsserver/nfs_nfsdport.c        Thu Dec  1 18:46:28 2011        
(r228185)
@@ -90,20 +90,78 @@ SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_de
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW,
     &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
 
-#define        NUM_HEURISTIC           1017
+#define        MAX_REORDERED_RPC       16
+#define        NUM_HEURISTIC           1031
 #define        NHUSE_INIT              64
 #define        NHUSE_INC               16
 #define        NHUSE_MAX               2048
 
 static struct nfsheur {
        struct vnode *nh_vp;    /* vp to match (unreferenced pointer) */
-       off_t nh_nextr;         /* next offset for sequential detection */
+       off_t nh_nextoff;       /* next offset for sequential detection */
        int nh_use;             /* use count for selection */
        int nh_seqcount;        /* heuristic */
 } nfsheur[NUM_HEURISTIC];
 
 
 /*
+ * Heuristic to detect sequential operation.
+ */
+static struct nfsheur *
+nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp)
+{
+       struct nfsheur *nh;
+       int hi, try;
+
+       /* Locate best candidate. */
+       try = 32;
+       hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
+       nh = &nfsheur[hi];
+       while (try--) {
+               if (nfsheur[hi].nh_vp == vp) {
+                       nh = &nfsheur[hi];
+                       break;
+               }
+               if (nfsheur[hi].nh_use > 0)
+                       --nfsheur[hi].nh_use;
+               hi = (hi + 1) % NUM_HEURISTIC;
+               if (nfsheur[hi].nh_use < nh->nh_use)
+                       nh = &nfsheur[hi];
+       }
+
+       /* Initialize hint if this is a new file. */
+       if (nh->nh_vp != vp) {
+               nh->nh_vp = vp;
+               nh->nh_nextoff = uio->uio_offset;
+               nh->nh_use = NHUSE_INIT;
+               if (uio->uio_offset == 0)
+                       nh->nh_seqcount = 4;
+               else
+                       nh->nh_seqcount = 1;
+       }
+
+       /* Calculate heuristic. */
+       if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
+           uio->uio_offset == nh->nh_nextoff) {
+               /* See comments in vfs_vnops.c:sequential_heuristic(). */
+               nh->nh_seqcount += howmany(uio->uio_resid, 16384);
+               if (nh->nh_seqcount > IO_SEQMAX)
+                       nh->nh_seqcount = IO_SEQMAX;
+       } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
+           imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
+               /* Probably a reordered RPC, leave seqcount alone. */
+       } else if (nh->nh_seqcount > 1) {
+               nh->nh_seqcount /= 2;
+       } else {
+               nh->nh_seqcount = 0;
+       }
+       nh->nh_use += NHUSE_INC;
+       if (nh->nh_use > NHUSE_MAX)
+               nh->nh_use = NHUSE_MAX;
+       return (nh);
+}
+
+/*
  * Get attributes into nfsvattr structure.
  */
 int
@@ -567,60 +625,11 @@ nfsvno_read(struct vnode *vp, off_t off,
        int i;
        struct iovec *iv;
        struct iovec *iv2;
-       int error = 0, len, left, siz, tlen, ioflag = 0, hi, try = 32;
+       int error = 0, len, left, siz, tlen, ioflag = 0;
        struct mbuf *m2 = NULL, *m3;
        struct uio io, *uiop = &io;
        struct nfsheur *nh;
 
-       /*
-        * Calculate seqcount for heuristic
-        */
-       /*
-        * Locate best candidate
-        */
-
-       hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
-       nh = &nfsheur[hi];
-
-       while (try--) {
-               if (nfsheur[hi].nh_vp == vp) {
-                       nh = &nfsheur[hi];
-                       break;
-               }
-               if (nfsheur[hi].nh_use > 0)
-                       --nfsheur[hi].nh_use;
-               hi = (hi + 1) % NUM_HEURISTIC;
-               if (nfsheur[hi].nh_use < nh->nh_use)
-                       nh = &nfsheur[hi];
-       }
-
-       if (nh->nh_vp != vp) {
-               nh->nh_vp = vp;
-               nh->nh_nextr = off;
-               nh->nh_use = NHUSE_INIT;
-               if (off == 0)
-                       nh->nh_seqcount = 4;
-               else
-                       nh->nh_seqcount = 1;
-       }
-
-       /*
-        * Calculate heuristic
-        */
-
-       if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
-               if (++nh->nh_seqcount > IO_SEQMAX)
-                       nh->nh_seqcount = IO_SEQMAX;
-       } else if (nh->nh_seqcount > 1) {
-               nh->nh_seqcount = 1;
-       } else {
-               nh->nh_seqcount = 0;
-       }
-       nh->nh_use += NHUSE_INC;
-       if (nh->nh_use > NHUSE_MAX)
-               nh->nh_use = NHUSE_MAX;
-       ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
-
        len = left = NFSM_RNDUP(cnt);
        m3 = NULL;
        /*
@@ -665,6 +674,8 @@ nfsvno_read(struct vnode *vp, off_t off,
        uiop->uio_resid = len;
        uiop->uio_rw = UIO_READ;
        uiop->uio_segflg = UIO_SYSSPACE;
+       nh = nfsrv_sequential_heuristic(uiop, vp);
+       ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
        error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
        FREE((caddr_t)iv2, M_TEMP);
        if (error) {
@@ -672,6 +683,7 @@ nfsvno_read(struct vnode *vp, off_t off,
                *mpp = NULL;
                goto out;
        }
+       nh->nh_nextoff = uiop->uio_offset;
        tlen = len - uiop->uio_resid;
        cnt = cnt < tlen ? cnt : tlen;
        tlen = NFSM_RNDUP(cnt);
@@ -700,6 +712,7 @@ nfsvno_write(struct vnode *vp, off_t off
        struct iovec *iv;
        int ioflags, error;
        struct uio io, *uiop = &io;
+       struct nfsheur *nh;
 
        MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
            M_WAITOK);
@@ -733,7 +746,11 @@ nfsvno_write(struct vnode *vp, off_t off
        uiop->uio_segflg = UIO_SYSSPACE;
        NFSUIOPROC(uiop, p);
        uiop->uio_offset = off;
+       nh = nfsrv_sequential_heuristic(uiop, vp);
+       ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
        error = VOP_WRITE(vp, uiop, ioflags, cred);
+       if (error == 0)
+               nh->nh_nextoff = uiop->uio_offset;
        FREE((caddr_t)iv, M_TEMP);
 
        NFSEXITCODE(error);

Modified: head/sys/nfsserver/nfs_serv.c
==============================================================================
--- head/sys/nfsserver/nfs_serv.c       Thu Dec  1 15:33:58 2011        
(r228184)
+++ head/sys/nfsserver/nfs_serv.c       Thu Dec  1 18:46:28 2011        
(r228185)
@@ -107,14 +107,15 @@ FEATURE(nfsserver, "NFS server");
 
 #define MAX_COMMIT_COUNT       (1024 * 1024)
 
-#define NUM_HEURISTIC          1017
+#define        MAX_REORDERED_RPC       16
+#define NUM_HEURISTIC          1031
 #define NHUSE_INIT             64
 #define NHUSE_INC              16
 #define NHUSE_MAX              2048
 
 static struct nfsheur {
        struct vnode *nh_vp;    /* vp to match (unreferenced pointer) */
-       off_t nh_nextr;         /* next offset for sequential detection */
+       off_t nh_nextoff;       /* next offset for sequential detection */
        int nh_use;             /* use count for selection */
        int nh_seqcount;        /* heuristic */
 } nfsheur[NUM_HEURISTIC];
@@ -187,6 +188,63 @@ nfsrv_lockedpair_nd(int vfs1, struct nam
 }
 
 /*
+ * Heuristic to detect sequential operation.
+ */
+static struct nfsheur *
+nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp)
+{
+       struct nfsheur *nh;
+       int hi, try;
+
+       /* Locate best candidate. */
+       try = 32;
+       hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
+       nh = &nfsheur[hi];
+       while (try--) {
+               if (nfsheur[hi].nh_vp == vp) {
+                       nh = &nfsheur[hi];
+                       break;
+               }
+               if (nfsheur[hi].nh_use > 0)
+                       --nfsheur[hi].nh_use;
+               hi = (hi + 1) % NUM_HEURISTIC;
+               if (nfsheur[hi].nh_use < nh->nh_use)
+                       nh = &nfsheur[hi];
+       }
+
+       /* Initialize hint if this is a new file. */
+       if (nh->nh_vp != vp) {
+               nh->nh_vp = vp;
+               nh->nh_nextoff = uio->uio_offset;
+               nh->nh_use = NHUSE_INIT;
+               if (uio->uio_offset == 0)
+                       nh->nh_seqcount = 4;
+               else
+                       nh->nh_seqcount = 1;
+       }
+
+       /* Calculate heuristic. */
+       if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
+           uio->uio_offset == nh->nh_nextoff) {
+               /* See comments in vfs_vnops.c:sequential_heuristic(). */
+               nh->nh_seqcount += howmany(uio->uio_resid, 16384);
+               if (nh->nh_seqcount > IO_SEQMAX)
+                       nh->nh_seqcount = IO_SEQMAX;
+       } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
+           imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
+               /* Probably a reordered RPC, leave seqcount alone. */
+       } else if (nh->nh_seqcount > 1) {
+               nh->nh_seqcount /= 2;
+       } else {
+               nh->nh_seqcount = 0;
+       }
+       nh->nh_use += NHUSE_INC;
+       if (nh->nh_use > NHUSE_MAX)
+               nh->nh_use = NHUSE_MAX;
+       return (nh);
+}
+
+/*
  * nfs v3 access service
  */
 int
@@ -843,7 +901,6 @@ nfsrv_read(struct nfsrv_descript *nfsd, 
        /*
         * Calculate byte count to read
         */
-
        if (off >= vap->va_size)
                cnt = 0;
        else if ((off + reqlen) > vap->va_size)
@@ -851,61 +908,6 @@ nfsrv_read(struct nfsrv_descript *nfsd, 
        else
                cnt = reqlen;
 
-       /*
-        * Calculate seqcount for heuristic
-        */
-
-       {
-               int hi;
-               int try = 32;
-
-               /*
-                * Locate best candidate
-                */
-
-               hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % 
NUM_HEURISTIC;
-               nh = &nfsheur[hi];
-
-               while (try--) {
-                       if (nfsheur[hi].nh_vp == vp) {
-                               nh = &nfsheur[hi];
-                               break;
-                       }
-                       if (nfsheur[hi].nh_use > 0)
-                               --nfsheur[hi].nh_use;
-                       hi = (hi + 1) % NUM_HEURISTIC;
-                       if (nfsheur[hi].nh_use < nh->nh_use)
-                               nh = &nfsheur[hi];
-               }
-
-               if (nh->nh_vp != vp) {
-                       nh->nh_vp = vp;
-                       nh->nh_nextr = off;
-                       nh->nh_use = NHUSE_INIT;
-                       if (off == 0)
-                               nh->nh_seqcount = 4;
-                       else
-                               nh->nh_seqcount = 1;
-               }
-
-               /*
-                * Calculate heuristic
-                */
-
-               if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
-                       if (++nh->nh_seqcount > IO_SEQMAX)
-                               nh->nh_seqcount = IO_SEQMAX;
-               } else if (nh->nh_seqcount > 1) {
-                       nh->nh_seqcount = 1;
-               } else {
-                       nh->nh_seqcount = 0;
-               }
-               nh->nh_use += NHUSE_INC;
-               if (nh->nh_use > NHUSE_MAX)
-                       nh->nh_use = NHUSE_MAX;
-               ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
-        }
-
        nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt));
        if (v3) {
                tl = nfsm_build(u_int32_t *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
@@ -963,9 +965,11 @@ nfsrv_read(struct nfsrv_descript *nfsd, 
                uiop->uio_resid = len;
                uiop->uio_rw = UIO_READ;
                uiop->uio_segflg = UIO_SYSSPACE;
+               nh = nfsrv_sequential_heuristic(uiop, vp);
+               ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
                error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
-               off = uiop->uio_offset;
-               nh->nh_nextr = off;
+               if (error == 0)
+                       nh->nh_nextoff = uiop->uio_offset;
                free((caddr_t)iv2, M_TEMP);
                if (error || (getret = VOP_GETATTR(vp, vap, cred))) {
                        if (!error)
@@ -1030,6 +1034,7 @@ nfsrv_write(struct nfsrv_descript *nfsd,
        int v3 = (nfsd->nd_flag & ND_NFSV3);
        struct mbuf *mb, *mreq;
        struct vnode *vp = NULL;
+       struct nfsheur *nh;
        nfsfh_t nfh;
        fhandle_t *fhp;
        struct uio io, *uiop = &io;
@@ -1170,7 +1175,11 @@ nfsrv_write(struct nfsrv_descript *nfsd,
            uiop->uio_segflg = UIO_SYSSPACE;
            uiop->uio_td = NULL;
            uiop->uio_offset = off;
+           nh = nfsrv_sequential_heuristic(uiop, vp);
+           ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
            error = VOP_WRITE(vp, uiop, ioflags, cred);
+           if (error == 0)
+                   nh->nh_nextoff = uiop->uio_offset;
            /* Unlocked write. */
            nfsrvstats.srvvop_writes++;
            free((caddr_t)iv, M_TEMP);
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to