The branch main has been updated by jhb:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=a72ee355646c4a379f55ac32d7d8dfe6c0d366f9

commit a72ee355646c4a379f55ac32d7d8dfe6c0d366f9
Author:     John Baldwin <j...@freebsd.org>
AuthorDate: 2021-10-14 22:48:34 +0000
Commit:     John Baldwin <j...@freebsd.org>
CommitDate: 2021-10-14 22:48:34 +0000

    ktls: Defer creation of threads and zones until first use.
    
    Run ktls_init() when the first KTLS session is created rather than
    unconditionally during boot.  This avoids creating unused threads and
    allocating unused resources on systems which do not use KTLS.
    
    Reviewed by:    gallatin, markj
    Sponsored by:   Chelsio Communications
    Differential Revision:  https://reviews.freebsd.org/D32487
---
 sys/kern/uipc_ktls.c | 173 ++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 115 insertions(+), 58 deletions(-)

diff --git a/sys/kern/uipc_ktls.c b/sys/kern/uipc_ktls.c
index 1e778dbf113a..bde9fca97f50 100644
--- a/sys/kern/uipc_ktls.c
+++ b/sys/kern/uipc_ktls.c
@@ -109,6 +109,9 @@ static struct proc *ktls_proc;
 static uma_zone_t ktls_session_zone;
 static uma_zone_t ktls_buffer_zone;
 static uint16_t ktls_cpuid_lookup[MAXCPU];
+static int ktls_init_state;
+static struct sx ktls_init_lock;
+SX_SYSINIT(ktls_init_lock, &ktls_init_lock, "ktls init");
 
 SYSCTL_NODE(_kern_ipc, OID_AUTO, tls, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Kernel TLS offload");
@@ -379,12 +382,11 @@ ktls_free_mext_contig(struct mbuf *m)
        uma_zfree(ktls_buffer_zone, (void *)PHYS_TO_DMAP(m->m_epg_pa[0]));
 }
 
-static void
-ktls_init(void *dummy __unused)
+static int
+ktls_init(void)
 {
        struct thread *td;
        struct pcpu *pc;
-       cpuset_t mask;
        int count, domain, error, i;
 
        ktls_wq = malloc(sizeof(*ktls_wq) * (mp_maxid + 1), M_KTLS,
@@ -410,36 +412,40 @@ ktls_init(void *dummy __unused)
                STAILQ_INIT(&ktls_wq[i].m_head);
                STAILQ_INIT(&ktls_wq[i].so_head);
                mtx_init(&ktls_wq[i].mtx, "ktls work queue", NULL, MTX_DEF);
-               error = kproc_kthread_add(ktls_work_thread, &ktls_wq[i],
-                   &ktls_proc, &td, 0, 0, "KTLS", "thr_%d", i);
-               if (error)
-                       panic("Can't add KTLS thread %d error %d", i, error);
-
-               /*
-                * Bind threads to cores.  If ktls_bind_threads is >
-                * 1, then we bind to the NUMA domain.
-                */
-               if (ktls_bind_threads) {
-                       if (ktls_bind_threads > 1) {
-                               pc = pcpu_find(i);
-                               domain = pc->pc_domain;
-                               CPU_COPY(&cpuset_domain[domain], &mask);
-                               count = ktls_domains[domain].count;
-                               ktls_domains[domain].cpu[count] = i;
-                               ktls_domains[domain].count++;
-                       } else {
-                               CPU_SETOF(i, &mask);
-                       }
-                       error = cpuset_setthread(td->td_tid, &mask);
-                       if (error)
-                               panic(
-                           "Unable to bind KTLS thread for CPU %d error %d",
-                                    i, error);
+               if (ktls_bind_threads > 1) {
+                       pc = pcpu_find(i);
+                       domain = pc->pc_domain;
+                       count = ktls_domains[domain].count;
+                       ktls_domains[domain].cpu[count] = i;
+                       ktls_domains[domain].count++;
                }
                ktls_cpuid_lookup[ktls_number_threads] = i;
                ktls_number_threads++;
        }
 
+       /*
+        * If we somehow have an empty domain, fall back to choosing
+        * among all KTLS threads.
+        */
+       if (ktls_bind_threads > 1) {
+               for (i = 0; i < vm_ndomains; i++) {
+                       if (ktls_domains[i].count == 0) {
+                               ktls_bind_threads = 1;
+                               break;
+                       }
+               }
+       }
+
+       /* Start kthreads for each workqueue. */
+       CPU_FOREACH(i) {
+               error = kproc_kthread_add(ktls_work_thread, &ktls_wq[i],
+                   &ktls_proc, &td, 0, 0, "KTLS", "thr_%d", i);
+               if (error) {
+                       printf("Can't add KTLS thread %d error %d\n", i, error);
+                       return (error);
+               }
+       }
+
        /*
         * Start an allocation thread per-domain to perform blocking allocations
         * of 16k physically contiguous TLS crypto destination buffers.
@@ -454,35 +460,46 @@ ktls_init(void *dummy __unused)
                            &ktls_domains[domain], &ktls_proc,
                            &ktls_domains[domain].alloc_td.td,
                            0, 0, "KTLS", "alloc_%d", domain);
-                       if (error)
-                               panic("Can't add KTLS alloc thread %d error %d",
+                       if (error) {
+                               printf("Can't add KTLS alloc thread %d error 
%d\n",
                                    domain, error);
-                       CPU_COPY(&cpuset_domain[domain], &mask);
-                       error = 
cpuset_setthread(ktls_domains[domain].alloc_td.td->td_tid,
-                           &mask);
-                       if (error)
-                               panic("Unable to bind KTLS alloc %d error %d",
-                                   domain, error);
-               }
-       }
-
-       /*
-        * If we somehow have an empty domain, fall back to choosing
-        * among all KTLS threads.
-        */
-       if (ktls_bind_threads > 1) {
-               for (i = 0; i < vm_ndomains; i++) {
-                       if (ktls_domains[i].count == 0) {
-                               ktls_bind_threads = 1;
-                               break;
+                               return (error);
                        }
                }
        }
 
        if (bootverbose)
                printf("KTLS: Initialized %d threads\n", ktls_number_threads);
+       return (0);
+}
+
+static int
+ktls_start_kthreads(void)
+{
+       int error, state;
+
+start:
+       state = atomic_load_acq_int(&ktls_init_state);
+       if (__predict_true(state > 0))
+               return (0);
+       if (state < 0)
+               return (ENXIO);
+
+       sx_xlock(&ktls_init_lock);
+       if (ktls_init_state != 0) {
+               sx_xunlock(&ktls_init_lock);
+               goto start;
+       }
+
+       error = ktls_init();
+       if (error == 0)
+               state = 1;
+       else
+               state = -1;
+       atomic_store_rel_int(&ktls_init_state, state);
+       sx_xunlock(&ktls_init_lock);
+       return (error);
 }
-SYSINIT(ktls, SI_SUB_SMP + 1, SI_ORDER_ANY, ktls_init, NULL);
 
 #if defined(INET) || defined(INET6)
 static int
@@ -583,6 +600,10 @@ ktls_create_session(struct socket *so, struct tls_enable 
*en,
                return (EINVAL);
        }
 
+       error = ktls_start_kthreads();
+       if (error != 0)
+               return (error);
+
        tls = uma_zalloc(ktls_session_zone, M_WAITOK | M_ZERO);
 
        counter_u64_add(ktls_offload_active, 1);
@@ -2457,6 +2478,18 @@ ktls_encrypt_async(struct ktls_wq *wq, struct mbuf *top)
        CURVNET_RESTORE();
 }
 
+static int
+ktls_bind_domain(int domain)
+{
+       int error;
+
+       error = cpuset_setthread(curthread->td_tid, &cpuset_domain[domain]);
+       if (error != 0)
+               return (error);
+       curthread->td_domain.dr_policy = DOMAINSET_PREF(domain);
+       return (0);
+}
+
 static void
 ktls_alloc_thread(void *ctx)
 {
@@ -2465,14 +2498,16 @@ ktls_alloc_thread(void *ctx)
        void **buf;
        struct sysctl_oid *oid;
        char name[80];
-       int i, nbufs;
+       int domain, error, i, nbufs;
 
-       curthread->td_domain.dr_policy =
-           DOMAINSET_PREF(PCPU_GET(domain));
-       snprintf(name, sizeof(name), "domain%d", PCPU_GET(domain));
+       domain = ktls_domain - ktls_domains;
        if (bootverbose)
-               printf("Starting KTLS alloc thread for domain %d\n",
-                   PCPU_GET(domain));
+               printf("Starting KTLS alloc thread for domain %d\n", domain);
+       error = ktls_bind_domain(domain);
+       if (error)
+               printf("Unable to bind KTLS alloc thread for domain %d: error 
%d\n",
+                   domain, error);
+       snprintf(name, sizeof(name), "domain%d", domain);
        oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_kern_ipc_tls), 
OID_AUTO,
            name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
        SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "allocs",
@@ -2527,10 +2562,32 @@ ktls_work_thread(void *ctx)
        struct socket *so, *son;
        STAILQ_HEAD(, mbuf) local_m_head;
        STAILQ_HEAD(, socket) local_so_head;
+       int cpu;
 
-       if (ktls_bind_threads > 1) {
-               curthread->td_domain.dr_policy =
-                       DOMAINSET_PREF(PCPU_GET(domain));
+       cpu = wq - ktls_wq;
+       if (bootverbose)
+               printf("Starting KTLS worker thread for CPU %d\n", cpu);
+
+       /*
+        * Bind to a core.  If ktls_bind_threads is > 1, then
+        * we bind to the NUMA domain instead.
+        */
+       if (ktls_bind_threads) {
+               int error;
+
+               if (ktls_bind_threads > 1) {
+                       struct pcpu *pc = pcpu_find(cpu);
+
+                       error = ktls_bind_domain(pc->pc_domain);
+               } else {
+                       cpuset_t mask;
+
+                       CPU_SETOF(cpu, &mask);
+                       error = cpuset_setthread(curthread->td_tid, &mask);
+               }
+               if (error)
+                       printf("Unable to bind KTLS worker thread for CPU %d: 
error %d\n",
+                               cpu, error);
        }
 #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
        fpu_kern_thread(0);

Reply via email to