Author: mjg
Date: Sun Mar  8 00:23:36 2020
New Revision: 358734
URL: https://svnweb.freebsd.org/changeset/base/358734

Log:
  fd: use smr for managing struct pwd
  
  This has a side effect of eliminating filedesc slock/sunlock during path
  lookup, which in turn removes contention vs concurrent modifications to the fd
  table.
  
  Reviewed by:  markj, kib
  Differential Revision:        https://reviews.freebsd.org/D23889

Modified:
  head/lib/libprocstat/libprocstat.c
  head/sys/kern/kern_descrip.c
  head/sys/kern/kern_linker.c
  head/sys/sys/filedesc.h
  head/sys/ufs/ffs/ffs_alloc.c

Modified: head/lib/libprocstat/libprocstat.c
==============================================================================
--- head/lib/libprocstat/libprocstat.c  Sun Mar  8 00:22:32 2020        
(r358733)
+++ head/lib/libprocstat/libprocstat.c  Sun Mar  8 00:23:36 2020        
(r358734)
@@ -460,6 +460,7 @@ procstat_getfiles_kvm(struct procstat *procstat, struc
        struct file file;
        struct filedesc filed;
        struct pwd pwd;
+       unsigned long pwd_addr;
        struct vm_map_entry vmentry;
        struct vm_object object;
        struct vmspace vmspace;
@@ -488,10 +489,10 @@ procstat_getfiles_kvm(struct procstat *procstat, struc
                return (NULL);
        }
        haspwd = false;
-       if (filed.fd_pwd != NULL) {
-               if (!kvm_read_all(kd, (unsigned long)filed.fd_pwd, &pwd,
-                   sizeof(pwd))) {
-                       warnx("can't read fd_pwd at %p", (void *)filed.fd_pwd);
+       pwd_addr = (unsigned long)(FILEDESC_KVM_LOAD_PWD(&filed));
+       if (pwd_addr != 0) {
+               if (!kvm_read_all(kd, pwd_addr, &pwd, sizeof(pwd))) {
+                       warnx("can't read fd_pwd at %p", (void *)pwd_addr);
                        return (NULL);
                }
                haspwd = true;

Modified: head/sys/kern/kern_descrip.c
==============================================================================
--- head/sys/kern/kern_descrip.c        Sun Mar  8 00:22:32 2020        
(r358733)
+++ head/sys/kern/kern_descrip.c        Sun Mar  8 00:23:36 2020        
(r358734)
@@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sbuf.h>
 #include <sys/signalvar.h>
 #include <sys/kdb.h>
+#include <sys/smr.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/syscallsubr.h>
@@ -101,6 +102,8 @@ MALLOC_DECLARE(M_FADVISE);
 
 static __read_mostly uma_zone_t file_zone;
 static __read_mostly uma_zone_t filedesc0_zone;
+static __read_mostly uma_zone_t pwd_zone;
+static __read_mostly smr_t pwd_smr;
 
 static int     closefp(struct filedesc *fdp, int fd, struct file *fp,
                    struct thread *td, int holdleaders);
@@ -1985,6 +1988,7 @@ fdinit(struct filedesc *fdp, bool prepfiles)
 {
        struct filedesc0 *newfdp0;
        struct filedesc *newfdp;
+       struct pwd *newpwd;
 
        newfdp0 = uma_zalloc(filedesc0_zone, M_WAITOK | M_ZERO);
        newfdp = &newfdp0->fd_fd;
@@ -2000,7 +2004,8 @@ fdinit(struct filedesc *fdp, bool prepfiles)
        newfdp->fd_files->fdt_nfiles = NDFILE;
 
        if (fdp == NULL) {
-               newfdp->fd_pwd = pwd_alloc();
+               newpwd = pwd_alloc();
+               smr_serialized_store(&newfdp->fd_pwd, newpwd, true);
                return (newfdp);
        }
 
@@ -2008,7 +2013,8 @@ fdinit(struct filedesc *fdp, bool prepfiles)
                fdgrowtable(newfdp, fdp->fd_lastfile + 1);
 
        FILEDESC_SLOCK(fdp);
-       newfdp->fd_pwd = pwd_hold_filedesc(fdp);
+       newpwd = pwd_hold_filedesc(fdp);
+       smr_serialized_store(&newfdp->fd_pwd, newpwd, true);
 
        if (!prepfiles) {
                FILEDESC_SUNLOCK(fdp);
@@ -2328,7 +2334,7 @@ fdescfree(struct thread *td)
                return;
 
        FILEDESC_XLOCK(fdp);
-       pwd = fdp->fd_pwd;
+       pwd = FILEDESC_XLOCKED_LOAD_PWD(fdp);
        pwd_set(fdp, NULL);
        FILEDESC_XUNLOCK(fdp);
 
@@ -2341,7 +2347,7 @@ void
 fdescfree_remapped(struct filedesc *fdp)
 {
 
-       pwd_drop(fdp->fd_pwd);
+       pwd_drop(smr_serialized_load(&fdp->fd_pwd, true));
        fdescfree_fds(curthread, fdp, 0);
 }
 
@@ -3277,7 +3283,7 @@ pwd_hold_filedesc(struct filedesc *fdp)
        struct pwd *pwd;
 
        FILEDESC_LOCK_ASSERT(fdp);
-       pwd = fdp->fd_pwd;
+       pwd = FILEDESC_LOCKED_LOAD_PWD(fdp);
        if (pwd != NULL)
                refcount_acquire(&pwd->pwd_refcount);
        return (pwd);
@@ -3291,11 +3297,14 @@ pwd_hold(struct thread *td)
 
        fdp = td->td_proc->p_fd;
 
-       FILEDESC_SLOCK(fdp);
-       pwd = fdp->fd_pwd;
-       MPASS(pwd != NULL);
-       refcount_acquire(&pwd->pwd_refcount);
-       FILEDESC_SUNLOCK(fdp);
+       smr_enter(pwd_smr);
+       for (;;) {
+               pwd = smr_entered_load(&fdp->fd_pwd, pwd_smr);
+               MPASS(pwd != NULL);
+               if (refcount_acquire_if_not_zero(&pwd->pwd_refcount))
+                       break;
+       }
+       smr_exit(pwd_smr);
        return (pwd);
 }
 
@@ -3304,7 +3313,8 @@ pwd_alloc(void)
 {
        struct pwd *pwd;
 
-       pwd = malloc(sizeof(*pwd), M_PWD, M_WAITOK | M_ZERO);
+       pwd = uma_zalloc_smr(pwd_zone, M_WAITOK);
+       bzero(pwd, sizeof(*pwd));
        refcount_init(&pwd->pwd_refcount, 1);
        return (pwd);
 }
@@ -3322,7 +3332,7 @@ pwd_drop(struct pwd *pwd)
                vrele(pwd->pwd_rdir);
        if (pwd->pwd_jdir != NULL)
                vrele(pwd->pwd_jdir);
-       free(pwd, M_PWD);
+       uma_zfree_smr(pwd_zone, pwd);
 }
 
 /*
@@ -3340,7 +3350,7 @@ pwd_chroot(struct thread *td, struct vnode *vp)
        fdp = td->td_proc->p_fd;
        newpwd = pwd_alloc();
        FILEDESC_XLOCK(fdp);
-       oldpwd = fdp->fd_pwd;
+       oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp);
        if (chroot_allow_open_directories == 0 ||
            (chroot_allow_open_directories == 1 &&
            oldpwd->pwd_rdir != rootvnode)) {
@@ -3376,7 +3386,7 @@ pwd_chdir(struct thread *td, struct vnode *vp)
        newpwd = pwd_alloc();
        fdp = td->td_proc->p_fd;
        FILEDESC_XLOCK(fdp);
-       oldpwd = fdp->fd_pwd;
+       oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp);
        newpwd->pwd_cdir = vp;
        pwd_fill(oldpwd, newpwd);
        pwd_set(fdp, newpwd);
@@ -3392,7 +3402,7 @@ pwd_ensure_dirs(void)
 
        fdp = curproc->p_fd;
        FILEDESC_XLOCK(fdp);
-       oldpwd = fdp->fd_pwd;
+       oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp);
        if (oldpwd->pwd_cdir != NULL && oldpwd->pwd_rdir != NULL) {
                FILEDESC_XUNLOCK(fdp);
                return;
@@ -3401,7 +3411,7 @@ pwd_ensure_dirs(void)
 
        newpwd = pwd_alloc();
        FILEDESC_XLOCK(fdp);
-       oldpwd = fdp->fd_pwd;
+       oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp);
        pwd_fill(oldpwd, newpwd);
        if (newpwd->pwd_cdir == NULL) {
                vrefact(rootvnode);
@@ -3441,7 +3451,7 @@ mountcheckdirs(struct vnode *olddp, struct vnode *newd
                if (fdp == NULL)
                        continue;
                FILEDESC_XLOCK(fdp);
-               oldpwd = fdp->fd_pwd;
+               oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp);
                if (oldpwd == NULL ||
                    (oldpwd->pwd_cdir != olddp &&
                    oldpwd->pwd_rdir != olddp &&
@@ -4074,6 +4084,7 @@ int
 kern_proc_cwd_out(struct proc *p,  struct sbuf *sb, ssize_t maxlen)
 {
        struct filedesc *fdp;
+       struct pwd *pwd;
        struct export_fd_buf *efbuf;
        struct vnode *cdir;
        int error;
@@ -4091,7 +4102,8 @@ kern_proc_cwd_out(struct proc *p,  struct sbuf *sb, ss
        efbuf->remainder = maxlen;
 
        FILEDESC_SLOCK(fdp);
-       cdir = fdp->fd_pwd->pwd_cdir;
+       pwd = FILEDESC_LOCKED_LOAD_PWD(fdp);
+       cdir = pwd->pwd_cdir;
        if (cdir == NULL) {
                error = EINVAL;
        } else {
@@ -4279,6 +4291,9 @@ filelistinit(void *dummy)
            NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
        filedesc0_zone = uma_zcreate("filedesc0", sizeof(struct filedesc0),
            NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+       pwd_zone = uma_zcreate("PWD", sizeof(struct pwd), NULL, NULL,
+           NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_SMR);
+       pwd_smr = uma_zone_get_smr(pwd_zone);
        mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
 }
 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL);

Modified: head/sys/kern/kern_linker.c
==============================================================================
--- head/sys/kern/kern_linker.c Sun Mar  8 00:22:32 2020        (r358733)
+++ head/sys/kern/kern_linker.c Sun Mar  8 00:23:36 2020        (r358734)
@@ -2063,6 +2063,22 @@ linker_hwpmc_list_objects(void)
 }
 #endif
 
+/* check if root file system is not mounted */
+static bool
+linker_root_mounted(void)
+{
+       struct pwd *pwd;
+       bool ret;
+
+       if (rootvnode == NULL)
+               return (false);
+
+       pwd = pwd_hold(curthread);
+       ret = pwd->pwd_rdir != NULL;
+       pwd_drop(pwd);
+       return (ret);
+}
+
 /*
  * Find a file which contains given module and load it, if "parent" is not
  * NULL, register a reference to it.
@@ -2084,15 +2100,13 @@ linker_load_module(const char *kldname, const char *mo
                 */
                KASSERT(verinfo == NULL, ("linker_load_module: verinfo"
                    " is not NULL"));
-               /* check if root file system is not mounted */
-               if (rootvnode == NULL || curproc->p_fd->fd_pwd->pwd_rdir == 
NULL)
+               if (!linker_root_mounted())
                        return (ENXIO);
                pathname = linker_search_kld(kldname);
        } else {
                if (modlist_lookup2(modname, verinfo) != NULL)
                        return (EEXIST);
-               /* check if root file system is not mounted */
-               if (rootvnode == NULL || curproc->p_fd->fd_pwd->pwd_rdir == 
NULL)
+               if (!linker_root_mounted())
                        return (ENXIO);
                if (kldname != NULL)
                        pathname = strdup(kldname, M_LINKER);

Modified: head/sys/sys/filedesc.h
==============================================================================
--- head/sys/sys/filedesc.h     Sun Mar  8 00:22:32 2020        (r358733)
+++ head/sys/sys/filedesc.h     Sun Mar  8 00:23:36 2020        (r358734)
@@ -42,6 +42,8 @@
 #include <sys/priority.h>
 #include <sys/seqc.h>
 #include <sys/sx.h>
+#include <sys/_smr.h>
+#include <sys/smr_types.h>
 
 #include <machine/_limits.h>
 
@@ -76,16 +78,23 @@ struct fdescenttbl {
  */
 #define NDSLOTTYPE     u_long
 
+/*
+ * This struct is copy-on-write and allocated from an SMR zone.
+ * All fields are constant after initialization apart from the reference count.
+ *
+ * Check pwd_* routines for usage.
+ */
 struct pwd {
        volatile u_int pwd_refcount;
        struct  vnode *pwd_cdir;                /* current directory */
        struct  vnode *pwd_rdir;                /* root directory */
        struct  vnode *pwd_jdir;                /* jail root directory */
 };
+typedef SMR_POINTER(struct pwd *) smrpwd_t;
 
 struct filedesc {
        struct  fdescenttbl *fd_files;  /* open files table */
-       struct  pwd *fd_pwd;            /* directories */
+       smrpwd_t fd_pwd;                /* directories */
        NDSLOTTYPE *fd_map;             /* bitmap of free fds */
        int     fd_lastfile;            /* high-water mark of fd_ofiles */
        int     fd_freefile;            /* approx. next free file */
@@ -141,6 +150,38 @@ struct filedesc_to_leader {
                                            SX_NOTRECURSED)
 #define        FILEDESC_UNLOCK_ASSERT(fdp)     sx_assert(&(fdp)->fd_sx, 
SX_UNLOCKED)
 
+#define        FILEDESC_LOCKED_LOAD_PWD(fdp)   ({                              
        \
+       struct filedesc *_fdp = (fdp);                                          
\
+       struct pwd *_pwd;                                                       
\
+       _pwd = smr_serialized_load(&(_fdp)->fd_pwd,                             
\
+           (FILEDESC_LOCK_ASSERT(_fdp), true));                                
\
+       _pwd;                                                                   
\
+})
+
+#define        FILEDESC_XLOCKED_LOAD_PWD(fdp)  ({                              
        \
+       struct filedesc *_fdp = (fdp);                                          
\
+       struct pwd *_pwd;                                                       
\
+       _pwd = smr_serialized_load(&(_fdp)->fd_pwd,                             
\
+           (FILEDESC_XLOCK_ASSERT(_fdp), true));                               
\
+       _pwd;                                                                   
\
+})
+
+#else
+
+/*
+ * Accessor for libkvm et al.
+ */
+#define        FILEDESC_KVM_LOAD_PWD(fdp)      ({                              
        \
+       struct filedesc *_fdp = (fdp);                                          
\
+       struct pwd *_pwd;                                                       
\
+       _pwd = smr_kvm_load(&(_fdp)->fd_pwd);                                   
\
+       _pwd;                                                                   
\
+})
+
+#endif
+
+#ifdef _KERNEL
+
 /* Operation types for kern_dup(). */
 enum {
        FDDUP_NORMAL,           /* dup() behavior. */
@@ -265,8 +306,8 @@ static inline void
 pwd_set(struct filedesc *fdp, struct pwd *newpwd)
 {
 
-       FILEDESC_XLOCK_ASSERT(fdp);
-       fdp->fd_pwd = newpwd;
+       smr_serialized_store(&fdp->fd_pwd, newpwd,
+           (FILEDESC_XLOCK_ASSERT(fdp), true));
 }
 
 #endif /* _KERNEL */

Modified: head/sys/ufs/ffs/ffs_alloc.c
==============================================================================
--- head/sys/ufs/ffs/ffs_alloc.c        Sun Mar  8 00:22:32 2020        
(r358733)
+++ head/sys/ufs/ffs/ffs_alloc.c        Sun Mar  8 00:23:36 2020        
(r358734)
@@ -3590,6 +3590,7 @@ buffered_write(fp, uio, active_cred, flags, td)
        int flags;
        struct thread *td;
 {
+       struct pwd *pwd;
        struct vnode *devvp, *vp;
        struct inode *ip;
        struct buf *bp;
@@ -3610,7 +3611,8 @@ buffered_write(fp, uio, active_cred, flags, td)
                return (EINVAL);
        fdp = td->td_proc->p_fd;
        FILEDESC_SLOCK(fdp);
-       vp = fdp->fd_pwd->pwd_cdir;
+       pwd = FILEDESC_LOCKED_LOAD_PWD(fdp);
+       vp = pwd->pwd_cdir;
        vref(vp);
        FILEDESC_SUNLOCK(fdp);
        vn_lock(vp, LK_SHARED | LK_RETRY);
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to