The branch main has been updated by mjg:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=739ecbcf1c4fd22b5f6ee0bb180a67644046a3e0

commit 739ecbcf1c4fd22b5f6ee0bb180a67644046a3e0
Author:     Mateusz Guzik <m...@freebsd.org>
AuthorDate: 2021-01-23 13:40:48 +0000
Commit:     Mateusz Guzik <m...@freebsd.org>
CommitDate: 2021-01-23 15:04:43 +0000

    cache: add symlink support to lockless lookup
    
    Reviewed by:    kib (previous version)
    Tested by:      pho (previous version)
    Differential Revision:  https://reviews.freebsd.org/D27488
---
 sys/fs/deadfs/dead_vnops.c |   1 +
 sys/kern/vfs_cache.c       | 214 ++++++++++++++++++++++++++++++++++++++-------
 sys/kern/vfs_lookup.c      |  11 ++-
 sys/kern/vfs_subr.c        |  13 +++
 sys/kern/vnode_if.src      |  10 +++
 sys/sys/namei.h            |   4 +-
 sys/sys/param.h            |   2 +-
 sys/sys/vnode.h            |   9 ++
 8 files changed, 227 insertions(+), 37 deletions(-)

diff --git a/sys/fs/deadfs/dead_vnops.c b/sys/fs/deadfs/dead_vnops.c
index 49f852791387..09c3c996ee0e 100644
--- a/sys/fs/deadfs/dead_vnops.c
+++ b/sys/fs/deadfs/dead_vnops.c
@@ -80,6 +80,7 @@ struct vop_vector dead_vnodeops = {
        .vop_unset_text =       dead_unset_text,
        .vop_write =            dead_write,
        .vop_fplookup_vexec =   VOP_EOPNOTSUPP,
+       .vop_fplookup_symlink = VOP_EAGAIN,
 };
 VFS_VOP_VECTOR_REGISTER(dead_vnodeops);
 
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index cfa25dc59ee4..770c8ebf061b 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -562,6 +562,36 @@ static uma_zone_t __read_mostly cache_zone_small_ts;
 static uma_zone_t __read_mostly cache_zone_large;
 static uma_zone_t __read_mostly cache_zone_large_ts;
 
+char *
+cache_symlink_alloc(size_t size, int flags)
+{
+
+       if (size < CACHE_ZONE_SMALL_SIZE) {
+               return (uma_zalloc_smr(cache_zone_small, flags));
+       }
+       if (size < CACHE_ZONE_LARGE_SIZE) {
+               return (uma_zalloc_smr(cache_zone_large, flags));
+       }
+       return (NULL);
+}
+
+void
+cache_symlink_free(char *string, size_t size)
+{
+
+       MPASS(string != NULL);
+
+       if (size < CACHE_ZONE_SMALL_SIZE) {
+               uma_zfree_smr(cache_zone_small, string);
+               return;
+       }
+       if (size < CACHE_ZONE_LARGE_SIZE) {
+               uma_zfree_smr(cache_zone_large, string);
+               return;
+       }
+       __assert_unreachable();
+}
+
 static struct namecache *
 cache_alloc_uma(int len, bool ts)
 {
@@ -3584,6 +3614,7 @@ cache_fast_lookup_enabled_recalc(void)
 
 #ifdef MAC
        mac_on = mac_vnode_check_lookup_enabled();
+       mac_on |= mac_vnode_check_readlink_enabled();
 #else
        mac_on = 0;
 #endif
@@ -3615,6 +3646,7 @@ SYSCTL_PROC(_vfs, OID_AUTO, cache_fast_lookup, 
CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MP
  * need restoring in case fast path lookup fails.
  */
 struct nameidata_outer {
+       size_t ni_pathlen;
        int cn_flags;
 };
 
@@ -3656,8 +3688,10 @@ static bool cache_fplookup_is_mp(struct cache_fpl *fpl);
 static int cache_fplookup_cross_mount(struct cache_fpl *fpl);
 static int cache_fplookup_partial_setup(struct cache_fpl *fpl);
 static int cache_fplookup_skip_slashes(struct cache_fpl *fpl);
+static int cache_fplookup_preparse(struct cache_fpl *fpl);
 static void cache_fpl_pathlen_dec(struct cache_fpl *fpl);
 static void cache_fpl_pathlen_inc(struct cache_fpl *fpl);
+static void cache_fpl_pathlen_add(struct cache_fpl *fpl, size_t n);
 static void cache_fpl_pathlen_sub(struct cache_fpl *fpl, size_t n);
 
 static void
@@ -3698,6 +3732,7 @@ static void
 cache_fpl_checkpoint_outer(struct cache_fpl *fpl)
 {
 
+       fpl->snd_outer.ni_pathlen = fpl->ndp->ni_pathlen;
        fpl->snd_outer.cn_flags = fpl->ndp->ni_cnd.cn_flags;
 }
 
@@ -3731,6 +3766,7 @@ cache_fpl_restore_abort(struct cache_fpl *fpl)
         */
        fpl->ndp->ni_resflags = 0;
        fpl->ndp->ni_cnd.cn_nameptr = fpl->ndp->ni_cnd.cn_pnbuf;
+       fpl->ndp->ni_pathlen = fpl->snd_outer.ni_pathlen;
 }
 
 #ifdef INVARIANTS
@@ -3752,6 +3788,7 @@ cache_fpl_assert_status(struct cache_fpl *fpl)
        case CACHE_FPL_STATUS_UNSET:
                __assert_unreachable();
                break;
+       case CACHE_FPL_STATUS_DESTROYED:
        case CACHE_FPL_STATUS_ABORTED:
        case CACHE_FPL_STATUS_PARTIAL:
        case CACHE_FPL_STATUS_HANDLED:
@@ -3804,6 +3841,11 @@ cache_fpl_aborted_early_impl(struct cache_fpl *fpl, int 
line)
 static int __noinline
 cache_fpl_aborted_impl(struct cache_fpl *fpl, int line)
 {
+       struct nameidata *ndp;
+       struct componentname *cnp;
+
+       ndp = fpl->ndp;
+       cnp = fpl->cnp;
 
        if (fpl->status != CACHE_FPL_STATUS_UNSET) {
                KASSERT(fpl->status == CACHE_FPL_STATUS_PARTIAL,
@@ -3815,6 +3857,14 @@ cache_fpl_aborted_impl(struct cache_fpl *fpl, int line)
        if (fpl->in_smr)
                cache_fpl_smr_exit(fpl);
        cache_fpl_restore_abort(fpl);
+       /*
+        * Resolving symlinks overwrites data passed by the caller.
+        * Let namei know.
+        */
+       if (ndp->ni_loopcnt > 0) {
+               fpl->status = CACHE_FPL_STATUS_DESTROYED;
+               cache_fpl_cleanup_cnp(cnp);
+       }
        return (CACHE_FPL_FAILED);
 }
 
@@ -3955,13 +4005,6 @@ cache_fplookup_dirfd(struct cache_fpl *fpl, struct vnode 
**vpp)
        return (0);
 }
 
-static bool
-cache_fplookup_vnode_supported(struct vnode *vp)
-{
-
-       return (vp->v_type != VLNK);
-}
-
 static int __noinline
 cache_fplookup_negative_promote(struct cache_fpl *fpl, struct namecache *oncp,
     uint32_t hash)
@@ -4242,8 +4285,7 @@ cache_fplookup_final_modifying(struct cache_fpl *fpl)
         * Since we expect this to be the terminal vnode it should
         * almost never be true.
         */
-       if (__predict_false(!cache_fplookup_vnode_supported(tvp) ||
-           cache_fplookup_is_mp(fpl))) {
+       if (__predict_false(tvp->v_type == VLNK || cache_fplookup_is_mp(fpl))) {
                vput(dvp);
                vput(tvp);
                return (cache_fpl_aborted(fpl));
@@ -4546,8 +4588,7 @@ cache_fplookup_noentry(struct cache_fpl *fpl)
                return (cache_fpl_handled(fpl));
        }
 
-       if (__predict_false(!cache_fplookup_vnode_supported(tvp) ||
-           cache_fplookup_is_mp(fpl))) {
+       if (__predict_false(tvp->v_type == VLNK || cache_fplookup_is_mp(fpl))) {
                vput(dvp);
                vput(tvp);
                return (cache_fpl_aborted(fpl));
@@ -4688,6 +4729,102 @@ cache_fplookup_neg(struct cache_fpl *fpl, struct 
namecache *ncp, uint32_t hash)
        return (cache_fpl_handled_error(fpl, ENOENT));
 }
 
+/*
+ * Resolve a symlink. Called by filesystem-specific routines.
+ *
+ * Code flow is:
+ * ... -> cache_fplookup_symlink -> VOP_FPLOOKUP_SYMLINK -> 
cache_symlink_resolve
+ */
+int
+cache_symlink_resolve(struct cache_fpl *fpl, const char *string, size_t len)
+{
+       struct nameidata *ndp;
+       struct componentname *cnp;
+
+       ndp = fpl->ndp;
+       cnp = fpl->cnp;
+
+       if (__predict_false(len == 0)) {
+               return (ENOENT);
+       }
+
+       ndp->ni_pathlen = fpl->nulchar - cnp->cn_nameptr - cnp->cn_namelen + 1;
+#ifdef INVARIANTS
+       if (ndp->ni_pathlen != fpl->debug.ni_pathlen) {
+               panic("%s: mismatch (%zu != %zu) nulchar %p nameptr %p [%s] ; 
full string [%s]\n",
+                   __func__, ndp->ni_pathlen, fpl->debug.ni_pathlen, 
fpl->nulchar,
+                   cnp->cn_nameptr, cnp->cn_nameptr, cnp->cn_pnbuf);
+       }
+#endif
+
+       if (__predict_false(len + ndp->ni_pathlen > MAXPATHLEN)) {
+               return (ENAMETOOLONG);
+       }
+
+       if (__predict_false(ndp->ni_loopcnt++ >= MAXSYMLINKS)) {
+               return (ELOOP);
+       }
+
+       if (ndp->ni_pathlen > 1) {
+               bcopy(ndp->ni_next, cnp->cn_pnbuf + len, ndp->ni_pathlen);
+       } else {
+               cnp->cn_pnbuf[len] = '\0';
+       }
+       bcopy(string, cnp->cn_pnbuf, len);
+
+       ndp->ni_pathlen += len;
+       cache_fpl_pathlen_add(fpl, len);
+       cnp->cn_nameptr = cnp->cn_pnbuf;
+       fpl->nulchar = &cnp->cn_nameptr[ndp->ni_pathlen - 1];
+
+       return (0);
+}
+
+static int __noinline
+cache_fplookup_symlink(struct cache_fpl *fpl)
+{
+       struct nameidata *ndp;
+       struct componentname *cnp;
+       struct vnode *dvp, *tvp;
+       int error;
+
+       ndp = fpl->ndp;
+       cnp = fpl->cnp;
+       dvp = fpl->dvp;
+       tvp = fpl->tvp;
+
+       if (cache_fpl_islastcn(ndp)) {
+               if ((cnp->cn_flags & FOLLOW) == 0) {
+                       return (cache_fplookup_final(fpl));
+               }
+       }
+
+       error = VOP_FPLOOKUP_SYMLINK(tvp, fpl);
+       if (__predict_false(error != 0)) {
+               switch (error) {
+               case EAGAIN:
+                       return (cache_fpl_partial(fpl));
+               case ENOENT:
+               case ENAMETOOLONG:
+               case ELOOP:
+                       cache_fpl_smr_exit(fpl);
+                       return (cache_fpl_handled_error(fpl, error));
+               default:
+                       return (cache_fpl_aborted(fpl));
+               }
+       }
+
+       if (*(cnp->cn_nameptr) == '/') {
+               fpl->dvp = cache_fpl_handle_root(fpl);
+               fpl->dvp_seqc = vn_seqc_read_any(fpl->dvp);
+               if (seqc_in_modify(fpl->dvp_seqc)) {
+                       return (cache_fpl_aborted(fpl));
+               }
+       }
+
+       return (cache_fplookup_preparse(fpl));
+}
+
 static int
 cache_fplookup_next(struct cache_fpl *fpl)
 {
@@ -4743,10 +4880,6 @@ cache_fplookup_next(struct cache_fpl *fpl)
                return (cache_fpl_partial(fpl));
        }
 
-       if (!cache_fplookup_vnode_supported(tvp)) {
-               return (cache_fpl_partial(fpl));
-       }
-
        counter_u64_add(numposhits, 1);
        SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name, tvp);
 
@@ -4931,7 +5064,16 @@ static void
 cache_fpl_pathlen_inc(struct cache_fpl *fpl)
 {
 
-       fpl->debug.ni_pathlen++;
+       cache_fpl_pathlen_add(fpl, 1);
+}
+
+static void
+cache_fpl_pathlen_add(struct cache_fpl *fpl, size_t n)
+{
+
+       fpl->debug.ni_pathlen += n;
+       KASSERT(fpl->debug.ni_pathlen <= PATH_MAX,
+           ("%s: pathlen overflow to %zd\n", __func__, fpl->debug.ni_pathlen));
 }
 
 static void
@@ -4953,13 +5095,18 @@ cache_fpl_pathlen_inc(struct cache_fpl *fpl)
 {
 }
 
+static void
+cache_fpl_pathlen_add(struct cache_fpl *fpl, size_t n)
+{
+}
+
 static void
 cache_fpl_pathlen_sub(struct cache_fpl *fpl, size_t n)
 {
 }
 #endif
 
-static int
+static int __always_inline
 cache_fplookup_preparse(struct cache_fpl *fpl)
 {
        struct componentname *cnp;
@@ -5238,8 +5385,6 @@ cache_fplookup_impl(struct vnode *dvp, struct cache_fpl 
*fpl)
                return (cache_fpl_aborted(fpl));
        }
 
-       VNPASS(cache_fplookup_vnode_supported(fpl->dvp), fpl->dvp);
-
        error = cache_fplookup_preparse(fpl);
        if (__predict_false(cache_fpl_terminated(fpl))) {
                return (error);
@@ -5251,8 +5396,6 @@ cache_fplookup_impl(struct vnode *dvp, struct cache_fpl 
*fpl)
                        break;
                }
 
-               VNPASS(cache_fplookup_vnode_supported(fpl->dvp), fpl->dvp);
-
                error = VOP_FPLOOKUP_VEXEC(fpl->dvp, cnp->cn_cred);
                if (__predict_false(error != 0)) {
                        error = cache_fplookup_failed_vexec(fpl, error);
@@ -5266,20 +5409,27 @@ cache_fplookup_impl(struct vnode *dvp, struct cache_fpl 
*fpl)
 
                VNPASS(!seqc_in_modify(fpl->tvp_seqc), fpl->tvp);
 
-               if (cache_fpl_islastcn(ndp)) {
-                       error = cache_fplookup_final(fpl);
-                       break;
-               }
+               if (fpl->tvp->v_type == VLNK) {
+                       error = cache_fplookup_symlink(fpl);
+                       if (cache_fpl_terminated(fpl)) {
+                               break;
+                       }
+               } else {
+                       if (cache_fpl_islastcn(ndp)) {
+                               error = cache_fplookup_final(fpl);
+                               break;
+                       }
 
-               if (!vn_seqc_consistent(fpl->dvp, fpl->dvp_seqc)) {
-                       error = cache_fpl_aborted(fpl);
-                       break;
-               }
+                       if (!vn_seqc_consistent(fpl->dvp, fpl->dvp_seqc)) {
+                               error = cache_fpl_aborted(fpl);
+                               break;
+                       }
 
-               fpl->dvp = fpl->tvp;
-               fpl->dvp_seqc = fpl->tvp_seqc;
+                       fpl->dvp = fpl->tvp;
+                       fpl->dvp_seqc = fpl->tvp_seqc;
+                       cache_fplookup_parse_advance(fpl);
+               }
 
-               cache_fplookup_parse_advance(fpl);
                cache_fpl_checkpoint(fpl);
        }
 
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index b6529623ecbb..ad65ab11bb1d 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -578,6 +578,7 @@ namei(struct nameidata *ndp)
            ndp->ni_startdir->v_type == VBAD);
 
        ndp->ni_lcf = 0;
+       ndp->ni_loopcnt = 0;
        ndp->ni_vp = NULL;
 
        error = namei_getpath(ndp);
@@ -611,8 +612,16 @@ namei(struct nameidata *ndp)
                TAILQ_INIT(&ndp->ni_cap_tracker);
                dp = ndp->ni_startdir;
                break;
+       case CACHE_FPL_STATUS_DESTROYED:
+               ndp->ni_loopcnt = 0;
+               error = namei_getpath(ndp);
+               if (__predict_false(error != 0)) {
+                       return (error);
+               }
+               /* FALLTHROUGH */
        case CACHE_FPL_STATUS_ABORTED:
                TAILQ_INIT(&ndp->ni_cap_tracker);
+               MPASS(ndp->ni_lcf == 0);
                error = namei_setup(ndp, &dp, &pwd);
                if (error != 0) {
                        namei_cleanup_cnp(cnp);
@@ -621,8 +630,6 @@ namei(struct nameidata *ndp)
                break;
        }
 
-       ndp->ni_loopcnt = 0;
-
        /*
         * Locked lookup.
         */
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 8461fd0d49b5..047e4c54f0c5 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -5452,6 +5452,19 @@ vop_fplookup_vexec_debugpost(void *ap __unused, int rc 
__unused)
        VFS_SMR_ASSERT_ENTERED();
 }
 
+void
+vop_fplookup_symlink_debugpre(void *ap __unused)
+{
+
+       VFS_SMR_ASSERT_ENTERED();
+}
+
+void
+vop_fplookup_symlink_debugpost(void *ap __unused, int rc __unused)
+{
+
+       VFS_SMR_ASSERT_ENTERED();
+}
 void
 vop_strategy_debugpre(void *ap)
 {
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index ed6f3bf9d61b..5d15d4a0c863 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -156,6 +156,16 @@ vop_fplookup_vexec {
 };
 
 
+%% fplookup_symlink    vp      - - -
+%! fplookup_symlink    debugpre        vop_fplookup_symlink_debugpre
+%! fplookup_symlink    debugpost       vop_fplookup_symlink_debugpost
+
+vop_fplookup_symlink {
+       IN struct vnode *vp;
+       IN struct cache_fpl *fpl;
+};
+
+
 %% access      vp      L L L
 
 vop_access {
diff --git a/sys/sys/namei.h b/sys/sys/namei.h
index c045bc7c6bc5..1f9bae9e2753 100644
--- a/sys/sys/namei.h
+++ b/sys/sys/namei.h
@@ -116,8 +116,8 @@ struct nameidata {
 
 #ifdef _KERNEL
 
-enum cache_fpl_status { CACHE_FPL_STATUS_ABORTED, CACHE_FPL_STATUS_PARTIAL,
-    CACHE_FPL_STATUS_HANDLED, CACHE_FPL_STATUS_UNSET };
+enum cache_fpl_status { CACHE_FPL_STATUS_DESTROYED, CACHE_FPL_STATUS_ABORTED,
+    CACHE_FPL_STATUS_PARTIAL, CACHE_FPL_STATUS_HANDLED, CACHE_FPL_STATUS_UNSET 
};
 int    cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
     struct pwd **pwdp);
 
diff --git a/sys/sys/param.h b/sys/sys/param.h
index 19d6d3fb2d35..f540b240e848 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -60,7 +60,7 @@
  *             in the range 5 to 9.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1400000      /* Master, propagated to newvers */
+#define __FreeBSD_version 1400001      /* Master, propagated to newvers */
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 0eadfec02313..ffd0bdad940d 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -66,6 +66,7 @@ enum vgetstate        { VGET_NONE, VGET_HOLDCNT, 
VGET_USECOUNT };
  */
 
 struct namecache;
+struct cache_fpl;
 
 struct vpollinfo {
        struct  mtx vpi_lock;           /* lock to protect below */
@@ -646,6 +647,10 @@ void       cache_purge(struct vnode *vp);
 void   cache_purge_vgone(struct vnode *vp);
 void   cache_purge_negative(struct vnode *vp);
 void   cache_purgevfs(struct mount *mp);
+char   *cache_symlink_alloc(size_t size, int flags);
+void   cache_symlink_free(char *string, size_t size);
+int    cache_symlink_resolve(struct cache_fpl *fpl, const char *string,
+           size_t len);
 void   cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode 
*tdvp,
     struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp);
 void   cache_vop_rmdir(struct vnode *dvp, struct vnode *vp);
@@ -900,6 +905,8 @@ int vop_sigdefer(struct vop_vector *vop, struct 
vop_generic_args *a);
 #ifdef DEBUG_VFS_LOCKS
 void   vop_fplookup_vexec_debugpre(void *a);
 void   vop_fplookup_vexec_debugpost(void *a, int rc);
+void   vop_fplookup_symlink_debugpre(void *a);
+void   vop_fplookup_symlink_debugpost(void *a, int rc);
 void   vop_strategy_debugpre(void *a);
 void   vop_lock_debugpre(void *a);
 void   vop_lock_debugpost(void *a, int rc);
@@ -910,6 +917,8 @@ void        vop_mkdir_debugpost(void *a, int rc);
 #else
 #define        vop_fplookup_vexec_debugpre(x)          do { } while (0)
 #define        vop_fplookup_vexec_debugpost(x, y)      do { } while (0)
+#define        vop_fplookup_symlink_debugpre(x)        do { } while (0)
+#define        vop_fplookup_symlink_debugpost(x, y)    do { } while (0)
 #define        vop_strategy_debugpre(x)                do { } while (0)
 #define        vop_lock_debugpre(x)                    do { } while (0)
 #define        vop_lock_debugpost(x, y)                do { } while (0)
_______________________________________________
dev-commits-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-all
To unsubscribe, send any mail to "dev-commits-src-all-unsubscr...@freebsd.org"

Reply via email to