Author: rmacklem
Date: Mon Oct 16 23:28:12 2017
New Revision: 324676
URL: https://svnweb.freebsd.org/changeset/base/324676

Log:
  Use taskqueue(9) to do writes/commits to mirrored DSs concurrently.
  
  When the NFSv4.1 pNFS client is using a Flexible File Layout specifying
  mirrored Data Servers, it must do the writes and commits to all mirrors.
  This patch modifies the client to use a taskqueue to perform these writes
  and commits concurrently.
  The number of threads can't be changed for taskqueue(9), so it is set
  to 4 * mp_ncpus by default, but this can be overridden by setting the
  sysctl vfs.nfs.pnfsiothreads.
  
  Differential Revision:        https://reviews.freebsd.org/D12632

Modified:
  head/sys/fs/nfs/nfs_commonport.c
  head/sys/fs/nfsclient/nfs_clrpcops.c

Modified: head/sys/fs/nfs/nfs_commonport.c
==============================================================================
--- head/sys/fs/nfs/nfs_commonport.c    Mon Oct 16 21:55:31 2017        
(r324675)
+++ head/sys/fs/nfs/nfs_commonport.c    Mon Oct 16 23:28:12 2017        
(r324676)
@@ -40,7 +40,9 @@ __FBSDID("$FreeBSD$");
  * to this BSD variant.
  */
 #include <fs/nfs/nfsport.h>
+#include <sys/smp.h>
 #include <sys/sysctl.h>
+#include <sys/taskqueue.h>
 #include <rpc/rpc_com.h>
 #include <vm/vm.h>
 #include <vm/vm_object.h>
@@ -68,6 +70,8 @@ int nfsrv_lughashsize = 100;
 void (*nfsd_call_servertimer)(void) = NULL;
 void (*ncl_call_invalcaches)(struct vnode *) = NULL;
 
+int nfs_pnfsio(task_fn_t *, void *);
+
 static int nfs_realign_test;
 static int nfs_realign_count;
 static struct ext_nfsstats oldnfsstats;
@@ -84,6 +88,9 @@ SYSCTL_INT(_vfs_nfs, OID_AUTO, debuglevel, CTLFLAG_RW,
     0, "Debug level for NFS client");
 SYSCTL_INT(_vfs_nfs, OID_AUTO, userhashsize, CTLFLAG_RDTUN, &nfsrv_lughashsize,
     0, "Size of hash tables for uid/name mapping");
+int nfs_pnfsiothreads = 0;
+SYSCTL_INT(_vfs_nfs, OID_AUTO, pnfsiothreads, CTLFLAG_RW, &nfs_pnfsiothreads,
+    0, "Number of pNFS mirror I/O threads");
 
 /*
  * Defines for malloc
@@ -689,6 +696,50 @@ nfs_supportsnfsv4acls(struct vnode *vp)
        if (error == 0 && retval != 0)
                return (1);
        return (0);
+}
+
+/*
+ * These are the first fields of all the context structures passed into
+ * nfs_pnfsio().
+ */
+struct pnfsio {
+       int             done;
+       int             inprog;
+       struct task     tsk;
+};
+
+/*
+ * Do a mirror I/O on a pNFS thread.
+ */
+int
+nfs_pnfsio(task_fn_t *func, void *context)
+{
+       struct pnfsio *pio;
+       int ret;
+       static struct taskqueue *pnfsioq = NULL;
+
+       pio = (struct pnfsio *)context;
+       if (pnfsioq == NULL) {
+               if (nfs_pnfsiothreads == 0)
+                       nfs_pnfsiothreads = mp_ncpus * 4;
+               pnfsioq = taskqueue_create("pnfsioq", M_WAITOK,
+                   taskqueue_thread_enqueue, &pnfsioq);
+               if (pnfsioq == NULL)
+                       return (ENOMEM);
+               ret = taskqueue_start_threads(&pnfsioq, nfs_pnfsiothreads,
+                   0, "pnfsiot");
+               if (ret != 0) {
+                       taskqueue_free(pnfsioq);
+                       pnfsioq = NULL;
+                       return (ret);
+               }
+       }
+       pio->inprog = 1;
+       TASK_INIT(&pio->tsk, 0, func, context);
+       ret = taskqueue_enqueue(pnfsioq, &pio->tsk);
+       if (ret != 0)
+               pio->inprog = 0;
+       return (ret);
 }
 
 extern int (*nfsd_call_nfscommon)(struct thread *, struct nfssvc_args *);

Modified: head/sys/fs/nfsclient/nfs_clrpcops.c
==============================================================================
--- head/sys/fs/nfsclient/nfs_clrpcops.c        Mon Oct 16 21:55:31 2017        
(r324675)
+++ head/sys/fs/nfsclient/nfs_clrpcops.c        Mon Oct 16 23:28:12 2017        
(r324676)
@@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$");
 
 #include <fs/nfs/nfsport.h>
 #include <sys/sysctl.h>
+#include <sys/taskqueue.h>
 
 SYSCTL_DECL(_vfs_nfs);
 
@@ -64,6 +65,7 @@ extern nfstype nfsv34_type[9];
 extern int nfsrv_useacl;
 extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
 extern int nfscl_debuglevel;
+extern int nfs_pnfsiothreads;
 NFSCLSTATEMUTEX;
 int nfstest_outofseq = 0;
 int nfscl_assumeposixlocks = 1;
@@ -87,6 +89,30 @@ enum nfsclds_state {
        NFSDSP_NOTFOUND = 2,
 };
 
+/*
+ * Do a write RPC on a DS data file, using this structure for the arguments,
+ * so that this function can be executed by a separate kernel process.
+ */
+struct nfsclwritedsdorpc {
+       int                     done;
+       int                     inprog;
+       struct task             tsk;
+       struct vnode            *vp;
+       int                     iomode;
+       int                     must_commit;
+       nfsv4stateid_t          *stateidp;
+       struct nfsclds          *dsp;
+       uint64_t                off;
+       int                     len;
+       struct nfsfh            *fhp;
+       struct mbuf             *m;
+       int                     vers;
+       int                     minorvers;
+       struct ucred            *cred;
+       NFSPROC_T               *p;
+       int                     err;
+};
+
 static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
 static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
@@ -119,7 +145,7 @@ static int nfscl_doflayoutio(vnode_t, struct uio *, in
 static int nfscl_dofflayoutio(vnode_t, struct uio *, int *, int *, int *,
     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
     struct nfsclflayout *, uint64_t, uint64_t, int, int, struct mbuf *,
-    struct ucred *, NFSPROC_T *);
+    struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
 static struct mbuf *nfsm_copym(struct mbuf *, int, int);
 static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
     struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int,
@@ -127,11 +153,17 @@ static int nfsrpc_readds(vnode_t, struct uio *, nfsv4s
 static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
     nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
     struct nfsfh *, int, int, int, int, struct ucred *, NFSPROC_T *);
+static int nfsio_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
+    struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
+    struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
 static int nfsrpc_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
     struct ucred *, NFSPROC_T *);
 static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
     struct nfsclds *, struct nfsclds **);
+static int nfsio_commitds(vnode_t, uint64_t, int, struct nfsclds *,
+    struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
+    NFSPROC_T *);
 static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
 static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t,
@@ -163,6 +195,8 @@ static int nfsrpc_layoutgetres(struct nfsmount *, vnod
     int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **,
     struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *);
 
+int nfs_pnfsio(task_fn_t *, void *);
+
 /*
  * nfs null call from vfs.
  */
@@ -5544,10 +5578,11 @@ nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode
        struct nfscldevinfo *dip;
        struct nfsclflayout *rflp;
        struct mbuf *m;
+       struct nfsclwritedsdorpc *drpc, *tdrpc;
        nfsv4stateid_t stateid;
        struct ucred *newcred;
        uint64_t lastbyte, len, off, oresid, xfer;
-       int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled;
+       int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled, timo;
        void *lckp;
        uint8_t *dev;
        void *iovbase;
@@ -5627,6 +5662,7 @@ nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode
                         * do all mirrors.
                         */
                        m = NULL;
+                       drpc = NULL;
                        firstmirror = 0;
                        mirrorcnt = 1;
                        if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0 &&
@@ -5634,17 +5670,24 @@ nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode
                                if (rwaccess == NFSV4OPEN_ACCESSREAD) {
                                        firstmirror = arc4random() % mirrorcnt;
                                        mirrorcnt = firstmirror + 1;
-                               } else if (docommit == 0) {
-                                       /*
-                                        * Save values, so uiop can be rolled
-                                        * back upon a write error.
-                                        */
-                                       offs = uiop->uio_offset;
-                                       resid = uiop->uio_resid;
-                                       iovbase = uiop->uio_iov->iov_base;
-                                       iovlen = uiop->uio_iov->iov_len;
-                                       m = nfsm_uiombuflist(uiop, len, NULL,
-                                           NULL);
+                               } else {
+                                       if (docommit == 0) {
+                                               /*
+                                                * Save values, so uiop can be
+                                                * rolled back upon a write
+                                                * error.
+                                                */
+                                               offs = uiop->uio_offset;
+                                               resid = uiop->uio_resid;
+                                               iovbase =
+                                                   uiop->uio_iov->iov_base;
+                                               iovlen = uiop->uio_iov->iov_len;
+                                               m = nfsm_uiombuflist(uiop, len,
+                                                   NULL, NULL);
+                                       }
+                                       tdrpc = drpc = malloc(sizeof(*drpc) *
+                                           (mirrorcnt - 1), M_TEMP, M_WAITOK |
+                                           M_ZERO);
                                }
                        }
                        for (i = firstmirror; i < mirrorcnt && error == 0; i++){
@@ -5661,8 +5704,8 @@ nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode
                                                    uiop, iomode, must_commit,
                                                    &eof, &stateid, rwaccess,
                                                    dip, layp, rflp, off, xfer,
-                                                   i, docommit, m, newcred,
-                                                   p);
+                                                   i, docommit, m, tdrpc,
+                                                   newcred, p);
                                        else
                                                error = nfscl_doflayoutio(vp,
                                                    uiop, iomode, must_commit,
@@ -5672,9 +5715,27 @@ nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode
                                        nfscl_reldevinfo(dip);
                                } else
                                        error = EIO;
+                               tdrpc++;
                        }
                        if (m != NULL)
                                m_freem(m);
+                       tdrpc = drpc;
+                       timo = hz / 50;         /* Wait for 20msec. */
+                       if (timo < 1)
+                               timo = 1;
+                       for (i = firstmirror; i < mirrorcnt - 1 &&
+                           tdrpc != NULL; i++, tdrpc++) {
+                               /*
+                                * For the unused drpc entries, both inprog and
+                                * err == 0, so this loop won't break.
+                                */
+                               while (tdrpc->inprog != 0 && tdrpc->done == 0)
+                                       tsleep(&tdrpc->tsk, PVFS, "clrpcio",
+                                           timo);
+                               if (error == 0 && tdrpc->err != 0)
+                                       error = tdrpc->err;
+                       }
+                       free(drpc, M_TEMP);
                        if (error == 0) {
                                if (mirrorcnt > 1 && rwaccess ==
                                    NFSV4OPEN_ACCESSWRITE && docommit == 0) {
@@ -5898,8 +5959,8 @@ static int
 nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
-    uint64_t len, int mirror, int docommit, struct mbuf *mp, struct ucred 
*cred,
-    NFSPROC_T *p)
+    uint64_t len, int mirror, int docommit, struct mbuf *mp,
+    struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
 {
        uint64_t transfer, xfer;
        int error, rel_off;
@@ -5940,13 +6001,23 @@ nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *
                else
                        xfer = len;
                if (docommit != 0) {
-                       if (error == 0)
-                               error = nfsrpc_commitds(vp, off, xfer, *dspp,
-                                   fhp, dp->nfsdi_vers, dp->nfsdi_minorvers,
-                                   tcred, p);
-                       NFSCL_DEBUG(4, "aft nfsrpc_commitds=%d\n", error);
                        if (error == 0) {
                                /*
+                                * Do last mirrored DS commit with this thread.
+                                */
+                               if (mirror < flp->nfsfl_mirrorcnt - 1)
+                                       error = nfsio_commitds(vp, off, xfer,
+                                           *dspp, fhp, dp->nfsdi_vers,
+                                           dp->nfsdi_minorvers, drpc, tcred,
+                                           p);
+                               else
+                                       error = nfsrpc_commitds(vp, off, xfer,
+                                           *dspp, fhp, dp->nfsdi_vers,
+                                           dp->nfsdi_minorvers, tcred, p);
+                       }
+                       NFSCL_DEBUG(4, "aft nfsio_commitds=%d\n", error);
+                       if (error == 0) {
+                               /*
                                 * Set both eof and uio_resid = 0 to end any
                                 * loops.
                                 */
@@ -5976,11 +6047,22 @@ nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *
                                m = nfsm_copym(mp, rel_off, xfer);
                                NFSCL_DEBUG(4, "mcopy reloff=%d xfer=%jd\n",
                                    rel_off, (uintmax_t)xfer);
-                               error = nfsrpc_writedsmir(vp, iomode,
-                                   must_commit, stateidp, *dspp, off, xfer,
-                                   fhp, m, dp->nfsdi_vers, dp->nfsdi_minorvers,
-                                   tcred, p);
-                               NFSCL_DEBUG(4, "nfsrpc_writedsmir=%d\n", error);
+                               /*
+                                * Do last write to a mirrored DS with this
+                                * thread.
+                                */
+                               if (mirror < flp->nfsfl_mirrorcnt - 1)
+                                       error = nfsio_writedsmir(vp, iomode,
+                                           must_commit, stateidp, *dspp, off,
+                                           xfer, fhp, m, dp->nfsdi_vers,
+                                           dp->nfsdi_minorvers, drpc, tcred,
+                                           p);
+                               else
+                                       error = nfsrpc_writedsmir(vp, iomode,
+                                           must_commit, stateidp, *dspp, off,
+                                           xfer, fhp, m, dp->nfsdi_vers,
+                                           dp->nfsdi_minorvers, tcred, p);
+                               NFSCL_DEBUG(4, "nfsio_writedsmir=%d\n", error);
                        }
                }
                NFSCL_DEBUG(4, "aft read/writeds=%d\n", error);
@@ -6307,6 +6389,62 @@ nfsmout:
 }
 
 /*
+ * Start up the thread that will execute nfsrpc_writedsmir().
+ */
+static void
+start_writedsmir(void *arg, int pending)
+{
+       struct nfsclwritedsdorpc *drpc;
+
+       drpc = (struct nfsclwritedsdorpc *)arg;
+       drpc->err = nfsrpc_writedsmir(drpc->vp, &drpc->iomode,
+           &drpc->must_commit, drpc->stateidp, drpc->dsp, drpc->off, drpc->len,
+           drpc->fhp, drpc->m, drpc->vers, drpc->minorvers, drpc->cred,
+           drpc->p);
+       drpc->done = 1;
+       NFSCL_DEBUG(4, "start_writedsmir: err=%d\n", drpc->err);
+}
+
+/*
+ * Set up the write DS mirror call for the pNFS I/O thread.
+ */
+static int
+nfsio_writedsmir(vnode_t vp, int *iomode, int *must_commit,
+    nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t off, int len,
+    struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
+    struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
+{
+       int error, ret;
+
+       error = 0;
+       drpc->done = 0;
+       drpc->vp = vp;
+       drpc->iomode = *iomode;
+       drpc->must_commit = *must_commit;
+       drpc->stateidp = stateidp;
+       drpc->dsp = dsp;
+       drpc->off = off;
+       drpc->len = len;
+       drpc->fhp = fhp;
+       drpc->m = m;
+       drpc->vers = vers;
+       drpc->minorvers = minorvers;
+       drpc->cred = cred;
+       drpc->p = p;
+       drpc->inprog = 0;
+       ret = EIO;
+       if (nfs_pnfsiothreads > 0) {
+               ret = nfs_pnfsio(start_writedsmir, drpc);
+               NFSCL_DEBUG(4, "nfsio_writedsmir: nfs_pnfsio=%d\n", ret);
+       }
+       if (ret != 0)
+               error = nfsrpc_writedsmir(vp, iomode, must_commit, stateidp,
+                   dsp, off, len, fhp, m, vers, minorvers, cred, p);
+       NFSCL_DEBUG(4, "nfsio_writedsmir: error=%d\n", error);
+       return (error);
+}
+
+/*
  * Free up the nfsclds structure.
  */
 void
@@ -6432,6 +6570,56 @@ nfsmout:
        if (error == 0 && nd->nd_repstat != 0)
                error = nd->nd_repstat;
        mbuf_freem(nd->nd_mrep);
+       return (error);
+}
+
+/*
+ * Start up the thread that will execute nfsrpc_commitds().
+ */
+static void
+start_commitds(void *arg, int pending)
+{
+       struct nfsclwritedsdorpc *drpc;
+
+       drpc = (struct nfsclwritedsdorpc *)arg;
+       drpc->err = nfsrpc_commitds(drpc->vp, drpc->off, drpc->len,
+           drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers, drpc->cred,
+           drpc->p);
+       drpc->done = 1;
+       NFSCL_DEBUG(4, "start_commitds: err=%d\n", drpc->err);
+}
+
+/*
+ * Set up the commit DS mirror call for the pNFS I/O thread.
+ */
+static int
+nfsio_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
+    struct nfsfh *fhp, int vers, int minorvers,
+    struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
+{
+       int error, ret;
+
+       error = 0;
+       drpc->done = 0;
+       drpc->vp = vp;
+       drpc->off = offset;
+       drpc->len = cnt;
+       drpc->dsp = dsp;
+       drpc->fhp = fhp;
+       drpc->vers = vers;
+       drpc->minorvers = minorvers;
+       drpc->cred = cred;
+       drpc->p = p;
+       drpc->inprog = 0;
+       ret = EIO;
+       if (nfs_pnfsiothreads > 0) {
+               ret = nfs_pnfsio(start_commitds, drpc);
+               NFSCL_DEBUG(4, "nfsio_commitds: nfs_pnfsio=%d\n", ret);
+       }
+       if (ret != 0)
+               error = nfsrpc_commitds(vp, offset, cnt, dsp, fhp, vers,
+                   minorvers, cred, p);
+       NFSCL_DEBUG(4, "nfsio_commitds: error=%d\n", error);
        return (error);
 }
 
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to