Author: mjg
Date: Thu Nov 19 10:00:48 2020
New Revision: 367842
URL: https://svnweb.freebsd.org/changeset/base/367842

Log:
  thread: numa-aware zombie reaping
  
  The current global list is a significant problem, in particular induces a lot
  of cross-domain thread frees. When running poudriere on a 2 domain box about
  half of all frees were of that nature.
  
  Patch below introduces per-domain thread data containing zombie lists and
  domain-aware reaping. By default it only reaps from the current domain, only
  reaping from others if there is free TID shortage.
  
  A dedicated callout is introduced to reap lingering threads if there happens
  to be no activity.
  
  Reviewed by:  kib, markj
  Differential Revision:        https://reviews.freebsd.org/D27185

Modified:
  head/sys/kern/kern_thread.c

Modified: head/sys/kern/kern_thread.c
==============================================================================
--- head/sys/kern/kern_thread.c Thu Nov 19 09:26:51 2020        (r367841)
+++ head/sys/kern/kern_thread.c Thu Nov 19 10:00:48 2020        (r367842)
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/turnstile.h>
+#include <sys/taskqueue.h>
 #include <sys/ktr.h>
 #include <sys/rwlock.h>
 #include <sys/umtx.h>
@@ -64,9 +65,11 @@ __FBSDID("$FreeBSD$");
 
 #include <security/audit/audit.h>
 
+#include <vm/pmap.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
+#include <vm/vm_phys.h>
 #include <sys/eventhandler.h>
 
 /*
@@ -128,9 +131,20 @@ SDT_PROBE_DEFINE(proc, , , lwp__exit);
  */
 static uma_zone_t thread_zone;
 
-static __exclusive_cache_line struct thread *thread_zombies;
+struct thread_domain_data {
+       struct thread   *tdd_zombies;
+       int             tdd_reapticks;
+} __aligned(CACHE_LINE_SIZE);
 
+static struct thread_domain_data thread_domain_data[MAXMEMDOM];
+
+static struct task     thread_reap_task;
+static struct callout          thread_reap_callout;
+
 static void thread_zombie(struct thread *);
+static void thread_reap_all(void);
+static void thread_reap_task_cb(void *, int);
+static void thread_reap_callout_cb(void *);
 static int thread_unsuspend_one(struct thread *td, struct proc *p,
     bool boundary);
 static void thread_free_batched(struct thread *td);
@@ -159,30 +173,45 @@ EVENTHANDLER_LIST_DEFINE(thread_init);
 EVENTHANDLER_LIST_DEFINE(thread_fini);
 
 static bool
-thread_count_inc(void)
+thread_count_inc_try(void)
 {
-       static struct timeval lastfail;
-       static int curfail;
        int nthreads_new;
 
-       thread_reap();
-
        nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1;
        if (nthreads_new >= maxthread - 100) {
                if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != 0 ||
                    nthreads_new >= maxthread) {
                        atomic_subtract_int(&nthreads, 1);
-                       if (ppsratecheck(&lastfail, &curfail, 1)) {
-                               printf("maxthread limit exceeded by uid %u "
-                               "(pid %d); consider increasing 
kern.maxthread\n",
-                               curthread->td_ucred->cr_ruid, curproc->p_pid);
-                       }
                        return (false);
                }
        }
        return (true);
 }
 
+static bool
+thread_count_inc(void)
+{
+       static struct timeval lastfail;
+       static int curfail;
+
+       thread_reap();
+       if (thread_count_inc_try()) {
+               return (true);
+       }
+
+       thread_reap_all();
+       if (thread_count_inc_try()) {
+               return (true);
+       }
+
+       if (ppsratecheck(&lastfail, &curfail, 1)) {
+               printf("maxthread limit exceeded by uid %u "
+                   "(pid %d); consider increasing kern.maxthread\n",
+                   curthread->td_ucred->cr_ruid, curproc->p_pid);
+       }
+       return (false);
+}
+
 static void
 thread_count_sub(int n)
 {
@@ -500,6 +529,10 @@ threadinit(void)
            M_TIDHASH, M_WAITOK | M_ZERO);
        for (i = 0; i < tidhashlock + 1; i++)
                rw_init(&tidhashtbl_lock[i], "tidhash");
+
+       TASK_INIT(&thread_reap_task, 0, thread_reap_task_cb, NULL);
+       callout_init(&thread_reap_callout, 1);
+       callout_reset(&thread_reap_callout, 5 * hz, thread_reap_callout_cb, 
NULL);
 }
 
 /*
@@ -508,12 +541,14 @@ threadinit(void)
 void
 thread_zombie(struct thread *td)
 {
+       struct thread_domain_data *tdd;
        struct thread *ztd;
 
-       ztd = atomic_load_ptr(&thread_zombies);
+       tdd = &thread_domain_data[vm_phys_domain(vtophys(td))];
+       ztd = atomic_load_ptr(&tdd->tdd_zombies);
        for (;;) {
                td->td_zombie = ztd;
-               if (atomic_fcmpset_rel_ptr((uintptr_t *)&thread_zombies,
+               if (atomic_fcmpset_rel_ptr((uintptr_t *)&tdd->tdd_zombies,
                    (uintptr_t *)&ztd, (uintptr_t)td))
                        break;
                continue;
@@ -531,10 +566,10 @@ thread_stash(struct thread *td)
 }
 
 /*
- * Reap zombie threads.
+ * Reap zombies from passed domain.
  */
-void
-thread_reap(void)
+static void
+thread_reap_domain(struct thread_domain_data *tdd)
 {
        struct thread *itd, *ntd;
        struct tidbatch tidbatch;
@@ -547,19 +582,26 @@ thread_reap(void)
         * Reading upfront is pessimal if followed by concurrent atomic_swap,
         * but most of the time the list is empty.
         */
-       if (thread_zombies == NULL)
+       if (tdd->tdd_zombies == NULL)
                return;
 
-       itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&thread_zombies,
+       itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&tdd->tdd_zombies,
            (uintptr_t)NULL);
        if (itd == NULL)
                return;
 
+       /*
+        * Multiple CPUs can get here, the race is fine as ticks is only
+        * advisory.
+        */
+       tdd->tdd_reapticks = ticks;
+
        tidbatch_prep(&tidbatch);
        credbatch_prep(&credbatch);
        tdcount = 0;
        lim = NULL;
        limcount = 0;
+
        while (itd != NULL) {
                ntd = itd->td_zombie;
                EVENTHANDLER_DIRECT_INVOKE(thread_dtor, itd);
@@ -592,6 +634,68 @@ thread_reap(void)
        }
        MPASS(limcount != 0);
        lim_freen(lim, limcount);
+}
+
+/*
+ * Reap zombies from all domains.
+ */
+static void
+thread_reap_all(void)
+{
+       struct thread_domain_data *tdd;
+       int i, domain;
+
+       domain = PCPU_GET(domain);
+       for (i = 0; i < vm_ndomains; i++) {
+               tdd = &thread_domain_data[(i + domain) % vm_ndomains];
+               thread_reap_domain(tdd);
+       }
+}
+
+/*
+ * Reap zombies from local domain.
+ */
+void
+thread_reap(void)
+{
+       struct thread_domain_data *tdd;
+       int domain;
+
+       domain = PCPU_GET(domain);
+       tdd = &thread_domain_data[domain];
+
+       thread_reap_domain(tdd);
+}
+
+static void
+thread_reap_task_cb(void *arg __unused, int pending __unused)
+{
+
+       thread_reap_all();
+}
+
+static void
+thread_reap_callout_cb(void *arg __unused)
+{
+       struct thread_domain_data *tdd;
+       int i, cticks, lticks;
+       bool wantreap;
+
+       wantreap = false;
+       cticks = atomic_load_int(&ticks);
+       for (i = 0; i < vm_ndomains; i++) {
+               tdd = &thread_domain_data[i];
+               lticks = tdd->tdd_reapticks;
+               if (tdd->tdd_zombies != NULL &&
+                   (u_int)(cticks - lticks) > 5 * hz) {
+                       wantreap = true;
+                       break;
+               }
+       }
+
+       if (wantreap)
+               taskqueue_enqueue(taskqueue_thread, &thread_reap_task);
+       callout_reset(&thread_reap_callout, 5 * hz, thread_reap_callout_cb, 
NULL);
 }
 
 /*
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to