Author: mjg
Date: Tue Jun  9 23:03:48 2020
New Revision: 361993
URL: https://svnweb.freebsd.org/changeset/base/361993

Log:
  cred: distribute reference count per thread
  
  This avoids dirtying creds in the common case, see the comment in kern_prot.c
  for details.
  
  Reviewed by:  kib
  Differential Revision:        https://reviews.freebsd.org/D24007

Modified:
  head/sys/kern/init_main.c
  head/sys/kern/kern_fork.c
  head/sys/kern/kern_prot.c
  head/sys/kern/kern_thread.c
  head/sys/sys/proc.h
  head/sys/sys/ucred.h

Modified: head/sys/kern/init_main.c
==============================================================================
--- head/sys/kern/init_main.c   Tue Jun  9 22:42:54 2020        (r361992)
+++ head/sys/kern/init_main.c   Tue Jun  9 23:03:48 2020        (r361993)
@@ -541,7 +541,10 @@ proc0_init(void *dummy __unused)
        /* End hack. creds get properly set later with thread_cow_get_proc */
        curthread->td_ucred = NULL;
        newcred->cr_prison = &prison0;
+       newcred->cr_users++; /* avoid assertion failure */
        proc_set_cred_init(p, newcred);
+       newcred->cr_users--;
+       crfree(newcred);
 #ifdef AUDIT
        audit_cred_kproc0(newcred);
 #endif
@@ -810,8 +813,9 @@ create_init(const void *udata __unused)
 #endif
        proc_set_cred(initproc, newcred);
        td = FIRST_THREAD_IN_PROC(initproc);
-       crfree(td->td_ucred);
-       td->td_ucred = crhold(initproc->p_ucred);
+       crcowfree(td);
+       td->td_realucred = crcowget(initproc->p_ucred);
+       td->td_ucred = td->td_realucred;
        PROC_UNLOCK(initproc);
        sx_xunlock(&proctree_lock);
        crfree(oldcred);

Modified: head/sys/kern/kern_fork.c
==============================================================================
--- head/sys/kern/kern_fork.c   Tue Jun  9 22:42:54 2020        (r361992)
+++ head/sys/kern/kern_fork.c   Tue Jun  9 23:03:48 2020        (r361993)
@@ -961,7 +961,7 @@ fork1(struct thread *td, struct fork_req *fr)
         * XXX: This is ugly; when we copy resource usage, we need to bump
         *      per-cred resource counters.
         */
-       proc_set_cred_init(newproc, crhold(td->td_ucred));
+       proc_set_cred_init(newproc, td->td_ucred);
 
        /*
         * Initialize resource accounting for the child process.
@@ -998,8 +998,7 @@ fail0:
 #endif
        racct_proc_exit(newproc);
 fail1:
-       crfree(newproc->p_ucred);
-       newproc->p_ucred = NULL;
+       proc_unset_cred(newproc);
 fail2:
        if (vm2 != NULL)
                vmspace_free(vm2);

Modified: head/sys/kern/kern_prot.c
==============================================================================
--- head/sys/kern/kern_prot.c   Tue Jun  9 22:42:54 2020        (r361992)
+++ head/sys/kern/kern_prot.c   Tue Jun  9 23:03:48 2020        (r361993)
@@ -1841,6 +1841,98 @@ p_canwait(struct thread *td, struct proc *p)
 }
 
 /*
+ * Credential management.
+ *
+ * struct ucred objects are rarely allocated but gain and lose references all
+ * the time (e.g., on struct file alloc/dealloc) turning refcount updates into
+ * a significant source of cache-line ping ponging. Common cases are worked
+ * around by modifying thread-local counter instead if the cred to operate on
+ * matches td_realucred.
+ *
+ * The counter is split into 2 parts:
+ * - cr_users -- total count of all struct proc and struct thread objects
+ *   which have given cred in p_ucred and td_ucred respectively
+ * - cr_ref -- the actual ref count, only valid if cr_users == 0
+ *
+ * If users == 0 then cr_ref behaves similarly to refcount(9), in particular if
+ * the count reaches 0 the object is freeable.
+ * If users > 0 and curthread->td_realucred == cred, then updates are performed
+ * against td_ucredref.
+ * In other cases updates are performed against cr_ref.
+ *
+ * Changing td_realucred into something else decrements cr_users and transfers
+ * accumulated updates.
+ */
+struct ucred *
+crcowget(struct ucred *cr)
+{
+
+       mtx_lock(&cr->cr_mtx);
+       KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
+           __func__, cr->cr_users, cr));
+       cr->cr_users++;
+       cr->cr_ref++;
+       mtx_unlock(&cr->cr_mtx);
+       return (cr);
+}
+
+static struct ucred *
+crunuse(struct thread *td)
+{
+       struct ucred *cr, *crold;
+
+       cr = td->td_ucred;
+       mtx_lock(&cr->cr_mtx);
+       cr->cr_ref += td->td_ucredref;
+       td->td_ucredref = 0;
+       KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
+           __func__, cr->cr_users, cr));
+       cr->cr_users--;
+       if (cr->cr_users == 0) {
+               KASSERT(cr->cr_ref > 0, ("%s: ref %d not > 0 on cred %p",
+                   __func__, cr->cr_ref, cr));
+               crold = cr;
+       } else {
+               cr->cr_ref--;
+               crold = NULL;
+       }
+       mtx_unlock(&cr->cr_mtx);
+       return (crold);
+}
+
+void
+crcowfree(struct thread *td)
+{
+       struct ucred *cr;
+
+       cr = crunuse(td);
+       if (cr != NULL)
+               crfree(cr);
+}
+
+struct ucred *
+crcowsync(void)
+{
+       struct thread *td;
+       struct proc *p;
+       struct ucred *crnew, *crold;
+
+       td = curthread;
+       p = td->td_proc;
+       PROC_LOCK_ASSERT(p, MA_OWNED);
+
+       MPASS(td->td_realucred == td->td_ucred);
+       if (td->td_realucred == p->p_ucred)
+               return (NULL);
+
+       crnew = crcowget(p->p_ucred);
+       crold = crunuse(td);
+       td->td_realucred = crnew;
+       td->td_ucred = td->td_realucred;
+       return (crold);
+}
+
+/*
  * Allocate a zeroed cred structure.
  */
 struct ucred *
@@ -1849,7 +1941,8 @@ crget(void)
        struct ucred *cr;
 
        cr = malloc(sizeof(*cr), M_CRED, M_WAITOK | M_ZERO);
-       refcount_init(&cr->cr_ref, 1);
+       mtx_init(&cr->cr_mtx, "cred", NULL, MTX_DEF);
+       cr->cr_ref = 1;
 #ifdef AUDIT
        audit_cred_init(cr);
 #endif
@@ -1868,8 +1961,18 @@ crget(void)
 struct ucred *
 crhold(struct ucred *cr)
 {
+       struct thread *td;
 
-       refcount_acquire(&cr->cr_ref);
+       td = curthread;
+       if (__predict_true(td->td_realucred == cr)) {
+               KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
+                   __func__, cr->cr_users, cr));
+               td->td_ucredref++;
+               return (cr);
+       }
+       mtx_lock(&cr->cr_mtx);
+       cr->cr_ref++;
+       mtx_unlock(&cr->cr_mtx);
        return (cr);
 }
 
@@ -1879,36 +1982,51 @@ crhold(struct ucred *cr)
 void
 crfree(struct ucred *cr)
 {
+       struct thread *td;
 
-       KASSERT(cr->cr_ref > 0, ("bad ucred refcount: %d", cr->cr_ref));
-       KASSERT(cr->cr_ref != 0xdeadc0de, ("dangling reference to ucred"));
-       if (refcount_release(&cr->cr_ref)) {
-               /*
-                * Some callers of crget(), such as nfs_statfs(),
-                * allocate a temporary credential, but don't
-                * allocate a uidinfo structure.
-                */
-               if (cr->cr_uidinfo != NULL)
-                       uifree(cr->cr_uidinfo);
-               if (cr->cr_ruidinfo != NULL)
-                       uifree(cr->cr_ruidinfo);
-               /*
-                * Free a prison, if any.
-                */
-               if (cr->cr_prison != NULL)
-                       prison_free(cr->cr_prison);
-               if (cr->cr_loginclass != NULL)
-                       loginclass_free(cr->cr_loginclass);
+       td = curthread;
+       if (td->td_realucred == cr) {
+               KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
+                   __func__, cr->cr_users, cr));
+               td->td_ucredref--;
+               return;
+       }
+       mtx_lock(&cr->cr_mtx);
+       KASSERT(cr->cr_users >= 0, ("%s: users %d not >= 0 on cred %p",
+           __func__, cr->cr_users, cr));
+       cr->cr_ref--;
+       if (cr->cr_users > 0) {
+               mtx_unlock(&cr->cr_mtx);
+               return;
+       }
+       KASSERT(cr->cr_ref >= 0, ("%s: ref %d not >= 0 on cred %p",
+           __func__, cr->cr_ref, cr));
+       if (cr->cr_ref > 0) {
+               mtx_unlock(&cr->cr_mtx);
+               return;
+       }
+       /*
+        * Some callers of crget(), such as nfs_statfs(), allocate a temporary
+        * credential, but don't allocate a uidinfo structure.
+        */
+       if (cr->cr_uidinfo != NULL)
+               uifree(cr->cr_uidinfo);
+       if (cr->cr_ruidinfo != NULL)
+               uifree(cr->cr_ruidinfo);
+       if (cr->cr_prison != NULL)
+               prison_free(cr->cr_prison);
+       if (cr->cr_loginclass != NULL)
+               loginclass_free(cr->cr_loginclass);
 #ifdef AUDIT
-               audit_cred_destroy(cr);
+       audit_cred_destroy(cr);
 #endif
 #ifdef MAC
-               mac_cred_destroy(cr);
+       mac_cred_destroy(cr);
 #endif
-               if (cr->cr_groups != cr->cr_smallgroups)
-                       free(cr->cr_groups, M_CRED);
-               free(cr, M_CRED);
-       }
+       mtx_destroy(&cr->cr_mtx);
+       if (cr->cr_groups != cr->cr_smallgroups)
+               free(cr->cr_groups, M_CRED);
+       free(cr, M_CRED);
 }
 
 /*
@@ -1982,7 +2100,7 @@ void
 proc_set_cred_init(struct proc *p, struct ucred *newcred)
 {
 
-       p->p_ucred = newcred;
+       p->p_ucred = crcowget(newcred);
 }
 
 /*
@@ -1998,10 +2116,20 @@ proc_set_cred_init(struct proc *p, struct ucred *newcr
 void
 proc_set_cred(struct proc *p, struct ucred *newcred)
 {
+       struct ucred *cr;
 
-       MPASS(p->p_ucred != NULL);
+       cr = p->p_ucred;
+       MPASS(cr != NULL);
        PROC_LOCK_ASSERT(p, MA_OWNED);
+       KASSERT(newcred->cr_users == 0, ("%s: users %d not 0 on cred %p",
+           __func__, newcred->cr_users, newcred));
+       mtx_lock(&cr->cr_mtx);
+       KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
+           __func__, cr->cr_users, cr));
+       cr->cr_users--;
+       mtx_unlock(&cr->cr_mtx);
        p->p_ucred = newcred;
+       newcred->cr_users = 1;
        PROC_UPDATE_COW(p);
 }
 
@@ -2010,9 +2138,17 @@ proc_unset_cred(struct proc *p)
 {
        struct ucred *cr;
 
-       MPASS(p->p_state == PRS_ZOMBIE);
+       MPASS(p->p_state == PRS_ZOMBIE || p->p_state == PRS_NEW);
        cr = p->p_ucred;
        p->p_ucred = NULL;
+       KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
+           __func__, cr->cr_users, cr));
+       mtx_lock(&cr->cr_mtx);
+       cr->cr_users--;
+       if (cr->cr_users == 0)
+               KASSERT(cr->cr_ref > 0, ("%s: ref %d not > 0 on cred %p",
+                   __func__, cr->cr_ref, cr));
+       mtx_unlock(&cr->cr_mtx);
        crfree(cr);
 }
 

Modified: head/sys/kern/kern_thread.c
==============================================================================
--- head/sys/kern/kern_thread.c Tue Jun  9 22:42:54 2020        (r361992)
+++ head/sys/kern/kern_thread.c Tue Jun  9 23:03:48 2020        (r361993)
@@ -82,9 +82,9 @@ _Static_assert(offsetof(struct thread, td_flags) == 0x
     "struct thread KBI td_flags");
 _Static_assert(offsetof(struct thread, td_pflags) == 0x104,
     "struct thread KBI td_pflags");
-_Static_assert(offsetof(struct thread, td_frame) == 0x498,
+_Static_assert(offsetof(struct thread, td_frame) == 0x4a8,
     "struct thread KBI td_frame");
-_Static_assert(offsetof(struct thread, td_emuldata) == 0x6a0,
+_Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0,
     "struct thread KBI td_emuldata");
 _Static_assert(offsetof(struct proc, p_flag) == 0xb0,
     "struct proc KBI p_flag");
@@ -102,9 +102,9 @@ _Static_assert(offsetof(struct thread, td_flags) == 0x
     "struct thread KBI td_flags");
 _Static_assert(offsetof(struct thread, td_pflags) == 0xa0,
     "struct thread KBI td_pflags");
-_Static_assert(offsetof(struct thread, td_frame) == 0x2fc,
+_Static_assert(offsetof(struct thread, td_frame) == 0x304,
     "struct thread KBI td_frame");
-_Static_assert(offsetof(struct thread, td_emuldata) == 0x340,
+_Static_assert(offsetof(struct thread, td_emuldata) == 0x348,
     "struct thread KBI td_emuldata");
 _Static_assert(offsetof(struct proc, p_flag) == 0x68,
     "struct proc KBI p_flag");
@@ -463,7 +463,8 @@ thread_cow_get_proc(struct thread *newtd, struct proc 
 {
 
        PROC_LOCK_ASSERT(p, MA_OWNED);
-       newtd->td_ucred = crhold(p->p_ucred);
+       newtd->td_realucred = crcowget(p->p_ucred);
+       newtd->td_ucred = newtd->td_realucred;
        newtd->td_limit = lim_hold(p->p_limit);
        newtd->td_cowgen = p->p_cowgen;
 }
@@ -472,7 +473,9 @@ void
 thread_cow_get(struct thread *newtd, struct thread *td)
 {
 
-       newtd->td_ucred = crhold(td->td_ucred);
+       MPASS(td->td_realucred == td->td_ucred);
+       newtd->td_realucred = crcowget(td->td_realucred);
+       newtd->td_ucred = newtd->td_realucred;
        newtd->td_limit = lim_hold(td->td_limit);
        newtd->td_cowgen = td->td_cowgen;
 }
@@ -481,8 +484,8 @@ void
 thread_cow_free(struct thread *td)
 {
 
-       if (td->td_ucred != NULL)
-               crfree(td->td_ucred);
+       if (td->td_realucred != NULL)
+               crcowfree(td);
        if (td->td_limit != NULL)
                lim_free(td->td_limit);
 }
@@ -495,13 +498,9 @@ thread_cow_update(struct thread *td)
        struct plimit *oldlimit;
 
        p = td->td_proc;
-       oldcred = NULL;
        oldlimit = NULL;
        PROC_LOCK(p);
-       if (td->td_ucred != p->p_ucred) {
-               oldcred = td->td_ucred;
-               td->td_ucred = crhold(p->p_ucred);
-       }
+       oldcred = crcowsync();
        if (td->td_limit != p->p_limit) {
                oldlimit = td->td_limit;
                td->td_limit = lim_hold(p->p_limit);

Modified: head/sys/sys/proc.h
==============================================================================
--- head/sys/sys/proc.h Tue Jun  9 22:42:54 2020        (r361992)
+++ head/sys/sys/proc.h Tue Jun  9 23:03:48 2020        (r361993)
@@ -267,7 +267,8 @@ struct thread {
        struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */
        int             td_intr_nesting_level; /* (k) Interrupt recursion. */
        int             td_pinned;      /* (k) Temporary cpu pin count. */
-       struct ucred    *td_ucred;      /* (k) Reference to credentials. */
+       struct ucred    *td_realucred;  /* (k) Reference to credentials. */
+       struct ucred    *td_ucred;      /* (k) Used credentials, temporarily 
switchable. */
        struct plimit   *td_limit;      /* (k) Resource limits. */
        int             td_slptick;     /* (t) Time at sleep. */
        int             td_blktick;     /* (t) Time spent blocked. */
@@ -305,6 +306,7 @@ struct thread {
        int             td_errno;       /* (k) Error from last syscall. */
        size_t          td_vslock_sz;   /* (k) amount of vslock-ed space */
        struct kcov_info *td_kcov_info; /* (*) Kernel code coverage data */
+       u_int           td_ucredref;    /* (k) references on td_realucred */
 #define        td_endzero td_sigmask
 
 /* Copied during fork1() or create_thread(). */

Modified: head/sys/sys/ucred.h
==============================================================================
--- head/sys/sys/ucred.h        Tue Jun  9 22:42:54 2020        (r361992)
+++ head/sys/sys/ucred.h        Tue Jun  9 23:03:48 2020        (r361993)
@@ -35,6 +35,10 @@
 #ifndef _SYS_UCRED_H_
 #define        _SYS_UCRED_H_
 
+#if defined(_KERNEL) || defined(_WANT_UCRED)
+#include <sys/_lock.h>
+#include <sys/_mutex.h>
+#endif
 #include <bsm/audit.h>
 
 struct loginclass;
@@ -46,10 +50,19 @@ struct loginclass;
  *
  * Please do not inspect cr_uid directly to determine superuserness.  The
  * priv(9) interface should be used to check for privilege.
+ *
+ * Lock reference:
+ *      c - cr_mtx
+ *
+ * Unmarked fields are constant after creation.
+ *
+ * See "Credential management" comment in kern_prot.c for more information.
  */
 #if defined(_KERNEL) || defined(_WANT_UCRED)
 struct ucred {
-       u_int   cr_ref;                 /* reference count */
+       struct mtx cr_mtx;
+       u_int   cr_ref;                 /* (c) reference count */
+       u_int   cr_users;               /* (c) proc + thread using this cred */
 #define        cr_startcopy cr_uid
        uid_t   cr_uid;                 /* effective user id */
        uid_t   cr_ruid;                /* real user id */
@@ -115,8 +128,11 @@ void       proc_set_cred_init(struct proc *p, struct ucred 
*
 void   proc_set_cred(struct proc *p, struct ucred *cr);
 void   proc_unset_cred(struct proc *p);
 void   crfree(struct ucred *cr);
+struct ucred   *crcowsync(void);
 struct ucred   *crget(void);
 struct ucred   *crhold(struct ucred *cr);
+struct ucred   *crcowget(struct ucred *cr);
+void   crcowfree(struct thread *td);
 void   cru2x(struct ucred *cr, struct xucred *xcr);
 void   cru2xt(struct thread *td, struct xucred *xcr);
 void   crsetgroups(struct ucred *cr, int n, gid_t *groups);
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to