Author: cem
Date: Fri Jul 24 17:34:04 2020
New Revision: 363482
URL: https://svnweb.freebsd.org/changeset/base/363482

Log:
  Add unlocked/SMR fast path to getblk()
  
  Convert the bufobj tries to an SMR zone/PCTRIE and add a gbincore_unlocked()
  API wrapping this functionality.  Use it for a fast path in getblkx(),
  falling back to locked lookup if we raced a thread changing the buf's
  identity.
  
  Reported by:  Attilio
  Reviewed by:  kib, markj
  Testing:      pho (in progress)
  Sponsored by: Isilon
  Differential Revision:        https://reviews.freebsd.org/D25782

Modified:
  head/sys/kern/vfs_bio.c
  head/sys/kern/vfs_subr.c
  head/sys/sys/buf.h

Modified: head/sys/kern/vfs_bio.c
==============================================================================
--- head/sys/kern/vfs_bio.c     Fri Jul 24 17:32:10 2020        (r363481)
+++ head/sys/kern/vfs_bio.c     Fri Jul 24 17:34:04 2020        (r363482)
@@ -3849,7 +3849,7 @@ getblkx(struct vnode *vp, daddr_t blkno, daddr_t dblkn
        struct buf *bp;
        struct bufobj *bo;
        daddr_t d_blkno;
-       int bsize, error, maxsize, vmio;
+       int bsize, error, maxsize, vmio, lockflags;
        off_t offset;
 
        CTR3(KTR_BUF, "getblk(%p, %ld, %d)", vp, (long)blkno, size);
@@ -3864,11 +3864,33 @@ getblkx(struct vnode *vp, daddr_t blkno, daddr_t dblkn
 
        bo = &vp->v_bufobj;
        d_blkno = dblkno;
+
+       /* Attempt lockless lookup first. */
+       bp = gbincore_unlocked(bo, blkno);
+       if (bp == NULL)
+               goto newbuf_unlocked;
+
+       lockflags = LK_EXCLUSIVE | LK_SLEEPFAIL |
+           ((flags & GB_LOCK_NOWAIT) ? LK_NOWAIT : 0);
+
+       error = BUF_TIMELOCK(bp, lockflags, NULL, "getblku", slpflag,
+           slptimeo);
+       if (error == EINTR || error == ERESTART)
+               return (error);
+       else if (error != 0)
+               goto loop;
+
+       /* Verify buf identify has not changed since lookup. */
+       if (bp->b_bufobj == bo && bp->b_lblkno == blkno)
+               goto foundbuf_fastpath;
+
+       /* It changed, fallback to locked lookup. */
+       BUF_UNLOCK_RAW(bp);
+
 loop:
        BO_RLOCK(bo);
        bp = gbincore(bo, blkno);
        if (bp != NULL) {
-               int lockflags;
                /*
                 * Buffer is in-core.  If the buffer is not busy nor managed,
                 * it must be on a queue.
@@ -3890,8 +3912,10 @@ loop:
                /* We timed out or were interrupted. */
                else if (error != 0)
                        return (error);
+
+foundbuf_fastpath:
                /* If recursed, assume caller knows the rules. */
-               else if (BUF_LOCKRECURSED(bp))
+               if (BUF_LOCKRECURSED(bp))
                        goto end;
 
                /*
@@ -3989,6 +4013,7 @@ loop:
                 * buffer is also considered valid (not marked B_INVAL).
                 */
                BO_RUNLOCK(bo);
+newbuf_unlocked:
                /*
                 * If the user does not want us to create the buffer, bail out
                 * here.

Modified: head/sys/kern/vfs_subr.c
==============================================================================
--- head/sys/kern/vfs_subr.c    Fri Jul 24 17:32:10 2020        (r363481)
+++ head/sys/kern/vfs_subr.c    Fri Jul 24 17:34:04 2020        (r363482)
@@ -234,6 +234,7 @@ static struct mtx __exclusive_cache_line vnode_list_mt
 struct nfs_public nfs_pub;
 
 static uma_zone_t buf_trie_zone;
+static smr_t buf_trie_smr;
 
 /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */
 static uma_zone_t vnode_zone;
@@ -491,17 +492,16 @@ static int vnsz2log;
 static void *
 buf_trie_alloc(struct pctrie *ptree)
 {
-
-       return uma_zalloc(buf_trie_zone, M_NOWAIT);
+       return (uma_zalloc_smr(buf_trie_zone, M_NOWAIT));
 }
 
 static void
 buf_trie_free(struct pctrie *ptree, void *node)
 {
-
-       uma_zfree(buf_trie_zone, node);
+       uma_zfree_smr(buf_trie_zone, node);
 }
-PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free);
+PCTRIE_DEFINE_SMR(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free,
+    buf_trie_smr);
 
 /*
  * Initialize the vnode management data structures.
@@ -675,7 +675,8 @@ vntblinit(void *dummy __unused)
         */
        buf_trie_zone = uma_zcreate("BUF TRIE", pctrie_node_size(),
            NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR, 
-           UMA_ZONE_NOFREE);
+           UMA_ZONE_NOFREE | UMA_ZONE_SMR);
+       buf_trie_smr = uma_zone_get_smr(buf_trie_zone);
        uma_prealloc(buf_trie_zone, nbuf);
 
        vnodes_created = counter_u64_alloc(M_WAITOK);
@@ -2330,7 +2331,25 @@ gbincore(struct bufobj *bo, daddr_t lblkno)
        bp = BUF_PCTRIE_LOOKUP(&bo->bo_clean.bv_root, lblkno);
        if (bp != NULL)
                return (bp);
-       return BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno);
+       return (BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno));
+}
+
+/*
+ * Look up a buf using the buffer tries, without the bufobj lock.  This relies
+ * on SMR for safe lookup, and bufs being in a no-free zone to provide type
+ * stability of the result.  Like other lockless lookups, the found buf may
+ * already be invalid by the time this function returns.
+ */
+struct buf *
+gbincore_unlocked(struct bufobj *bo, daddr_t lblkno)
+{
+       struct buf *bp;
+
+       ASSERT_BO_UNLOCKED(bo);
+       bp = BUF_PCTRIE_LOOKUP_UNLOCKED(&bo->bo_clean.bv_root, lblkno);
+       if (bp != NULL)
+               return (bp);
+       return (BUF_PCTRIE_LOOKUP_UNLOCKED(&bo->bo_dirty.bv_root, lblkno));
 }
 
 /*

Modified: head/sys/sys/buf.h
==============================================================================
--- head/sys/sys/buf.h  Fri Jul 24 17:32:10 2020        (r363481)
+++ head/sys/sys/buf.h  Fri Jul 24 17:34:04 2020        (r363482)
@@ -326,6 +326,9 @@ extern const char *buf_wmesg;               /* Default 
buffer lock 
        KASSERT(((bp)->b_flags & B_REMFREE) == 0,                       \
            ("BUF_UNLOCK %p while B_REMFREE is still set.", (bp)));     \
                                                                        \
+       BUF_UNLOCK_RAW((bp));                                           \
+} while (0)
+#define        BUF_UNLOCK_RAW(bp) do {                                         
\
        (void)_lockmgr_args(&(bp)->b_lock, LK_RELEASE, NULL,            \
            LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT,         \
            LOCK_FILE, LOCK_LINE);                                      \
@@ -547,6 +550,7 @@ void        vfs_busy_pages_acquire(struct buf *bp);
 void   vfs_busy_pages_release(struct buf *bp);
 struct buf *incore(struct bufobj *, daddr_t);
 struct buf *gbincore(struct bufobj *, daddr_t);
+struct buf *gbincore_unlocked(struct bufobj *, daddr_t);
 struct buf *getblk(struct vnode *, daddr_t, int, int, int, int);
 int    getblkx(struct vnode *vp, daddr_t blkno, daddr_t dblkno, int size,
            int slpflag, int slptimeo, int flags, struct buf **bpp);
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to