Author: mjg
Date: Thu Oct 22 19:28:12 2020
New Revision: 366950
URL: https://svnweb.freebsd.org/changeset/base/366950

Log:
  vfs: prevent avoidable evictions on mkdir of existing directories
  
  mkdir -p /foo/bar/baz will mkdir each path component and ignore EEXIST.
  
  The NOCACHE lookup will make the namecache unnecessarily evict the existing 
entry,
  and then fallback to the fs lookup routine eventually leading namei to return 
an
  error as the directory is already there.
  
  For invocations like mkdir -p /usr/obj/usr/src/sys/GENERIC/modules this 
triggers
  fallbacks to the slowpath for concurrently executing lookups.
  
  Tested by:    pho
  Discussed with:       kib

Modified:
  head/sys/kern/vfs_cache.c
  head/sys/kern/vfs_subr.c
  head/sys/kern/vfs_syscalls.c
  head/sys/kern/vnode_if.src
  head/sys/sys/namei.h
  head/sys/sys/vnode.h

Modified: head/sys/kern/vfs_cache.c
==============================================================================
--- head/sys/kern/vfs_cache.c   Thu Oct 22 19:25:01 2020        (r366949)
+++ head/sys/kern/vfs_cache.c   Thu Oct 22 19:28:12 2020        (r366950)
@@ -1676,7 +1676,8 @@ cache_lookup_fallback(struct vnode *dvp, struct vnode 
        int error;
        bool whiteout;
 
-       MPASS((cnp->cn_flags & (MAKEENTRY | ISDOTDOT)) == MAKEENTRY);
+       MPASS((cnp->cn_flags & ISDOTDOT) == 0);
+       MPASS((cnp->cn_flags & (MAKEENTRY | NC_KEEPPOSENTRY)) != 0);
 
 retry:
        hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp);
@@ -1768,7 +1769,7 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, st
 
        MPASS((cnp->cn_flags & ISDOTDOT) == 0);
 
-       if ((cnp->cn_flags & MAKEENTRY) == 0) {
+       if ((cnp->cn_flags & (MAKEENTRY | NC_KEEPPOSENTRY)) == 0) {
                cache_remove_cnp(dvp, cnp);
                return (0);
        }
@@ -2595,6 +2596,35 @@ cache_rename(struct vnode *fdvp, struct vnode *fvp, st
                cache_remove_cnp(tdvp, tcnp);
        }
 }
+
+#ifdef INVARIANTS
+/*
+ * Validate that if an entry exists it matches.
+ */
+void
+cache_validate(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
+{
+       struct namecache *ncp;
+       struct mtx *blp;
+       uint32_t hash;
+
+       hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp);
+       if (CK_SLIST_EMPTY(NCHHASH(hash)))
+               return;
+       blp = HASH2BUCKETLOCK(hash);
+       mtx_lock(blp);
+       CK_SLIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
+               if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
+                   !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) {
+                       if (ncp->nc_vp != vp)
+                               panic("%s: mismatch (%p != %p); ncp %p [%s] dvp 
%p vp %p\n",
+                                   __func__, vp, ncp->nc_vp, ncp, 
ncp->nc_name, ncp->nc_dvp,
+                                   ncp->nc_vp);
+               }
+       }
+       mtx_unlock(blp);
+}
+#endif
 
 /*
  * Flush all entries referencing a particular filesystem.

Modified: head/sys/kern/vfs_subr.c
==============================================================================
--- head/sys/kern/vfs_subr.c    Thu Oct 22 19:25:01 2020        (r366949)
+++ head/sys/kern/vfs_subr.c    Thu Oct 22 19:28:12 2020        (r366950)
@@ -5587,6 +5587,18 @@ vop_mkdir_post(void *ap, int rc)
                VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK);
 }
 
+#ifdef DEBUG_VFS_LOCKS
+void
+vop_mkdir_debugpost(void *ap, int rc)
+{
+       struct vop_mkdir_args *a;
+
+       a = ap;
+       if (!rc)
+               cache_validate(a->a_dvp, *a->a_vpp, a->a_cnp);
+}
+#endif
+
 void
 vop_mknod_pre(void *ap)
 {

Modified: head/sys/kern/vfs_syscalls.c
==============================================================================
--- head/sys/kern/vfs_syscalls.c        Thu Oct 22 19:25:01 2020        
(r366949)
+++ head/sys/kern/vfs_syscalls.c        Thu Oct 22 19:28:12 2020        
(r366950)
@@ -3758,8 +3758,8 @@ kern_mkdirat(struct thread *td, int fd, const char *pa
 restart:
        bwillwrite();
        NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
-           NOCACHE, segflg, path, fd, &cap_mkdirat_rights,
-           td);
+           NC_NOMAKEENTRY | NC_KEEPPOSENTRY, segflg, path, fd,
+           &cap_mkdirat_rights, td);
        nd.ni_cnd.cn_flags |= WILLBEDIR;
        if ((error = namei(&nd)) != 0)
                return (error);

Modified: head/sys/kern/vnode_if.src
==============================================================================
--- head/sys/kern/vnode_if.src  Thu Oct 22 19:25:01 2020        (r366949)
+++ head/sys/kern/vnode_if.src  Thu Oct 22 19:28:12 2020        (r366950)
@@ -336,6 +336,7 @@ vop_rename {
 %% mkdir       vpp     - E -
 %! mkdir       pre     vop_mkdir_pre
 %! mkdir       post    vop_mkdir_post
+%! mkdir       debugpost vop_mkdir_debugpost
 
 vop_mkdir {
        IN struct vnode *dvp;

Modified: head/sys/sys/namei.h
==============================================================================
--- head/sys/sys/namei.h        Thu Oct 22 19:25:01 2020        (r366949)
+++ head/sys/sys/namei.h        Thu Oct 22 19:28:12 2020        (r366950)
@@ -125,16 +125,19 @@ int       cache_fplookup(struct nameidata *ndp, enum 
cache_f
 /*
  * namei operational modifier flags, stored in ni_cnd.flags
  */
+#define        NC_NOMAKEENTRY  0x0001  /* name must not be added to cache */
+#define        NC_KEEPPOSENTRY 0x0002  /* don't evict a positive entry */
+#define        NOCACHE         NC_NOMAKEENTRY  /* for compatibility with older 
code */
 #define        LOCKLEAF        0x0004  /* lock vnode on return */
 #define        LOCKPARENT      0x0008  /* want parent vnode returned locked */
 #define        WANTPARENT      0x0010  /* want parent vnode returned unlocked 
*/
-#define        NOCACHE         0x0020  /* name must not be left in cache */
+/* UNUSED              0x0020 */
 #define        FOLLOW          0x0040  /* follow symbolic links */
 #define        BENEATH         0x0080  /* No escape from the start dir */
 #define        LOCKSHARED      0x0100  /* Shared lock leaf */
 #define        NOFOLLOW        0x0000  /* do not follow symbolic links 
(pseudo) */
 #define        RBENEATH        0x100000000ULL /* No escape, even tmp, from 
start dir */
-#define        MODMASK         0xf000001fcULL  /* mask of operational 
modifiers */
+#define        MODMASK         0xf000001ffULL  /* mask of operational 
modifiers */
 /*
  * Namei parameter descriptors.
  *

Modified: head/sys/sys/vnode.h
==============================================================================
--- head/sys/sys/vnode.h        Thu Oct 22 19:25:01 2020        (r366949)
+++ head/sys/sys/vnode.h        Thu Oct 22 19:28:12 2020        (r366950)
@@ -644,6 +644,15 @@ void       cache_purge_negative(struct vnode *vp);
 void   cache_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
     struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp);
 void   cache_purgevfs(struct mount *mp);
+#ifdef INVARIANTS
+void   cache_validate(struct vnode *dvp, struct vnode *vp,
+           struct componentname *cnp);
+#else
+static inline void
+cache_validate(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
+{
+}
+#endif
 int    change_dir(struct vnode *vp, struct thread *td);
 void   cvtstat(struct stat *st, struct ostat *ost);
 void   freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb);
@@ -880,6 +889,7 @@ void        vop_lock_debugpost(void *a, int rc);
 void   vop_unlock_debugpre(void *a);
 void   vop_need_inactive_debugpre(void *a);
 void   vop_need_inactive_debugpost(void *a, int rc);
+void   vop_mkdir_debugpost(void *a, int rc);
 #else
 #define        vop_fplookup_vexec_debugpre(x)          do { } while (0)
 #define        vop_fplookup_vexec_debugpost(x, y)      do { } while (0)
@@ -889,6 +899,7 @@ void        vop_need_inactive_debugpost(void *a, int rc);
 #define        vop_unlock_debugpre(x)                  do { } while (0)
 #define        vop_need_inactive_debugpre(x)           do { } while (0)
 #define        vop_need_inactive_debugpost(x, y)       do { } while (0)
+#define        vop_mkdir_debugpost(x, y)               do { } while (0)
 #endif
 
 void   vop_rename_fail(struct vop_rename_args *ap);
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to