Author: kib
Date: Fri Jul 26 19:35:33 2019
New Revision: 350365
URL: https://svnweb.freebsd.org/changeset/base/350365

Log:
  MFC r347695, r347696, r347697, r347957, r349326:
  Lockless delayed invalidation for amd64 pmap.

Modified:
  stable/12/sys/amd64/amd64/machdep.c
  stable/12/sys/amd64/amd64/pmap.c
  stable/12/sys/amd64/amd64/trap.c
  stable/12/sys/amd64/amd64/vm_machdep.c
  stable/12/sys/amd64/include/pmap.h
  stable/12/sys/amd64/include/proc.h
  stable/12/sys/sys/proc.h
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/sys/amd64/amd64/machdep.c
==============================================================================
--- stable/12/sys/amd64/amd64/machdep.c Fri Jul 26 19:16:02 2019        
(r350364)
+++ stable/12/sys/amd64/amd64/machdep.c Fri Jul 26 19:35:33 2019        
(r350365)
@@ -1618,6 +1618,13 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
        physfree += kstack0_sz;
 
        /*
+        * Initialize enough of thread0 for delayed invalidation to
+        * work very early.  Rely on thread0.td_base_pri
+        * zero-initialization, it is reset to PVM at proc0_init().
+        */
+       pmap_thread_init_invl_gen(&thread0);
+
+       /*
         * make gdt memory segments
         */
        for (x = 0; x < NGDT; x++) {

Modified: stable/12/sys/amd64/amd64/pmap.c
==============================================================================
--- stable/12/sys/amd64/amd64/pmap.c    Fri Jul 26 19:16:02 2019        
(r350364)
+++ stable/12/sys/amd64/amd64/pmap.c    Fri Jul 26 19:35:33 2019        
(r350365)
@@ -107,6 +107,7 @@ __FBSDID("$FreeBSD$");
  *     and to when physical maps must be made correct.
  */
 
+#include "opt_ddb.h"
 #include "opt_pmap.h"
 #include "opt_vm.h"
 
@@ -130,6 +131,10 @@ __FBSDID("$FreeBSD$");
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/smp.h>
+#ifdef DDB
+#include <sys/kdb.h>
+#include <ddb/ddb.h>
+#endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
@@ -475,22 +480,100 @@ SYSCTL_PROC(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLTYPE
 static LIST_HEAD(, pmap_invl_gen) pmap_invl_gen_tracker =
     LIST_HEAD_INITIALIZER(&pmap_invl_gen_tracker);
 static struct mtx invl_gen_mtx;
-static u_long pmap_invl_gen = 0;
 /* Fake lock object to satisfy turnstiles interface. */
 static struct lock_object invl_gen_ts = {
        .lo_name = "invlts",
 };
+static struct pmap_invl_gen pmap_invl_gen_head = {
+       .gen = 1,
+       .next = NULL,
+};
+static u_long pmap_invl_gen = 1;
+static int pmap_invl_waiters;
+static struct callout pmap_invl_callout;
+static bool pmap_invl_callout_inited;
 
+#define        PMAP_ASSERT_NOT_IN_DI() \
+    KASSERT(pmap_not_in_di(), ("DI already started"))
+
 static bool
-pmap_not_in_di(void)
+pmap_di_locked(void)
 {
+       int tun;
 
-       return (curthread->td_md.md_invl_gen.gen == 0);
+       if ((cpu_feature2 & CPUID2_CX16) == 0)
+               return (true);
+       tun = 0;
+       TUNABLE_INT_FETCH("vm.pmap.di_locked", &tun);
+       return (tun != 0);
 }
 
-#define        PMAP_ASSERT_NOT_IN_DI() \
-    KASSERT(pmap_not_in_di(), ("DI already started"))
+static int
+sysctl_pmap_di_locked(SYSCTL_HANDLER_ARGS)
+{
+       int locked;
 
+       locked = pmap_di_locked();
+       return (sysctl_handle_int(oidp, &locked, 0, req));
+}
+SYSCTL_PROC(_vm_pmap, OID_AUTO, di_locked, CTLTYPE_INT | CTLFLAG_RDTUN |
+    CTLFLAG_MPSAFE, 0, 0, sysctl_pmap_di_locked, "",
+    "Locked delayed invalidation");
+
+static bool pmap_not_in_di_l(void);
+static bool pmap_not_in_di_u(void);
+DEFINE_IFUNC(, bool, pmap_not_in_di, (void), static)
+{
+
+       return (pmap_di_locked() ? pmap_not_in_di_l : pmap_not_in_di_u);
+}
+
+static bool
+pmap_not_in_di_l(void)
+{
+       struct pmap_invl_gen *invl_gen;
+
+       invl_gen = &curthread->td_md.md_invl_gen;
+       return (invl_gen->gen == 0);
+}
+
+static void
+pmap_thread_init_invl_gen_l(struct thread *td)
+{
+       struct pmap_invl_gen *invl_gen;
+
+       invl_gen = &td->td_md.md_invl_gen;
+       invl_gen->gen = 0;
+}
+
+static void
+pmap_delayed_invl_wait_block(u_long *m_gen, u_long *invl_gen)
+{
+       struct turnstile *ts;
+
+       ts = turnstile_trywait(&invl_gen_ts);
+       if (*m_gen > atomic_load_long(invl_gen))
+               turnstile_wait(ts, NULL, TS_SHARED_QUEUE);
+       else
+               turnstile_cancel(ts);
+}
+
+static void
+pmap_delayed_invl_finish_unblock(u_long new_gen)
+{
+       struct turnstile *ts;
+
+       turnstile_chain_lock(&invl_gen_ts);
+       ts = turnstile_lookup(&invl_gen_ts);
+       if (new_gen != 0)
+               pmap_invl_gen = new_gen;
+       if (ts != NULL) {
+               turnstile_broadcast(ts, TS_SHARED_QUEUE);
+               turnstile_unpend(ts);
+       }
+       turnstile_chain_unlock(&invl_gen_ts);
+}
+
 /*
  * Start a new Delayed Invalidation (DI) block of code, executed by
  * the current thread.  Within a DI block, the current thread may
@@ -500,7 +583,7 @@ pmap_not_in_di(void)
  * pmap active.
  */
 static void
-pmap_delayed_invl_started(void)
+pmap_delayed_invl_start_l(void)
 {
        struct pmap_invl_gen *invl_gen;
        u_long currgen;
@@ -532,36 +615,311 @@ pmap_delayed_invl_started(void)
  * current thread's DI.
  */
 static void
-pmap_delayed_invl_finished(void)
+pmap_delayed_invl_finish_l(void)
 {
        struct pmap_invl_gen *invl_gen, *next;
-       struct turnstile *ts;
 
        invl_gen = &curthread->td_md.md_invl_gen;
-       KASSERT(invl_gen->gen != 0, ("missed invl_started"));
+       KASSERT(invl_gen->gen != 0, ("missed invl_start"));
        mtx_lock(&invl_gen_mtx);
        next = LIST_NEXT(invl_gen, link);
-       if (next == NULL) {
-               turnstile_chain_lock(&invl_gen_ts);
-               ts = turnstile_lookup(&invl_gen_ts);
-               pmap_invl_gen = invl_gen->gen;
-               if (ts != NULL) {
-                       turnstile_broadcast(ts, TS_SHARED_QUEUE);
-                       turnstile_unpend(ts);
-               }
-               turnstile_chain_unlock(&invl_gen_ts);
-       } else {
+       if (next == NULL)
+               pmap_delayed_invl_finish_unblock(invl_gen->gen);
+       else
                next->gen = invl_gen->gen;
-       }
        LIST_REMOVE(invl_gen, link);
        mtx_unlock(&invl_gen_mtx);
        invl_gen->gen = 0;
 }
 
+static bool
+pmap_not_in_di_u(void)
+{
+       struct pmap_invl_gen *invl_gen;
+
+       invl_gen = &curthread->td_md.md_invl_gen;
+       return (((uintptr_t)invl_gen->next & PMAP_INVL_GEN_NEXT_INVALID) != 0);
+}
+
+static void
+pmap_thread_init_invl_gen_u(struct thread *td)
+{
+       struct pmap_invl_gen *invl_gen;
+
+       invl_gen = &td->td_md.md_invl_gen;
+       invl_gen->gen = 0;
+       invl_gen->next = (void *)PMAP_INVL_GEN_NEXT_INVALID;
+}
+
+static bool
+pmap_di_load_invl(struct pmap_invl_gen *ptr, struct pmap_invl_gen *out)
+{
+       uint64_t new_high, new_low, old_high, old_low;
+       char res;
+
+       old_low = new_low = 0;
+       old_high = new_high = (uintptr_t)0;
+
+       __asm volatile("lock;cmpxchg16b\t%1;sete\t%0"
+           : "=r" (res), "+m" (*ptr), "+a" (old_low), "+d" (old_high)
+           : "b"(new_low), "c" (new_high)
+           : "memory", "cc");
+       if (res == 0) {
+               if ((old_high & PMAP_INVL_GEN_NEXT_INVALID) != 0)
+                       return (false);
+               out->gen = old_low;
+               out->next = (void *)old_high;
+       } else {
+               out->gen = new_low;
+               out->next = (void *)new_high;
+       }
+       return (true);
+}
+
+static bool
+pmap_di_store_invl(struct pmap_invl_gen *ptr, struct pmap_invl_gen *old_val,
+    struct pmap_invl_gen *new_val)
+{
+       uint64_t new_high, new_low, old_high, old_low;
+       char res;
+
+       new_low = new_val->gen;
+       new_high = (uintptr_t)new_val->next;
+       old_low = old_val->gen;
+       old_high = (uintptr_t)old_val->next;
+
+       __asm volatile("lock;cmpxchg16b\t%1;sete\t%0"
+           : "=r" (res), "+m" (*ptr), "+a" (old_low), "+d" (old_high)
+           : "b"(new_low), "c" (new_high)
+           : "memory", "cc");
+       return (res);
+}
+
 #ifdef PV_STATS
+static long invl_start_restart;
+SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_start_restart, CTLFLAG_RD,
+    &invl_start_restart, 0,
+    "");
+static long invl_finish_restart;
+SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_finish_restart, CTLFLAG_RD,
+    &invl_finish_restart, 0,
+    "");
+static int invl_max_qlen;
+SYSCTL_INT(_vm_pmap, OID_AUTO, invl_max_qlen, CTLFLAG_RD,
+    &invl_max_qlen, 0,
+    "");
+#endif
+
+static struct lock_delay_config __read_frequently di_delay;
+LOCK_DELAY_SYSINIT_DEFAULT(di_delay);
+
+static void
+pmap_delayed_invl_start_u(void)
+{
+       struct pmap_invl_gen *invl_gen, *p, prev, new_prev;
+       struct thread *td;
+       struct lock_delay_arg lda;
+       uintptr_t prevl;
+       u_char pri;
+#ifdef PV_STATS
+       int i, ii;
+#endif
+
+       td = curthread;
+       invl_gen = &td->td_md.md_invl_gen;
+       PMAP_ASSERT_NOT_IN_DI();
+       lock_delay_arg_init(&lda, &di_delay);
+       invl_gen->saved_pri = 0;
+       pri = td->td_base_pri;
+       if (pri > PVM) {
+               thread_lock(td);
+               pri = td->td_base_pri;
+               if (pri > PVM) {
+                       invl_gen->saved_pri = pri;
+                       sched_prio(td, PVM);
+               }
+               thread_unlock(td);
+       }
+again:
+       PV_STAT(i = 0);
+       for (p = &pmap_invl_gen_head;; p = prev.next) {
+               PV_STAT(i++);
+               prevl = atomic_load_ptr(&p->next);
+               if ((prevl & PMAP_INVL_GEN_NEXT_INVALID) != 0) {
+                       PV_STAT(atomic_add_long(&invl_start_restart, 1));
+                       lock_delay(&lda);
+                       goto again;
+               }
+               if (prevl == 0)
+                       break;
+               prev.next = (void *)prevl;
+       }
+#ifdef PV_STATS
+       if ((ii = invl_max_qlen) < i)
+               atomic_cmpset_int(&invl_max_qlen, ii, i);
+#endif
+
+       if (!pmap_di_load_invl(p, &prev) || prev.next != NULL) {
+               PV_STAT(atomic_add_long(&invl_start_restart, 1));
+               lock_delay(&lda);
+               goto again;
+       }
+
+       new_prev.gen = prev.gen;
+       new_prev.next = invl_gen;
+       invl_gen->gen = prev.gen + 1;
+
+       /* Formal fence between store to invl->gen and updating *p. */
+       atomic_thread_fence_rel();
+
+       /*
+        * After inserting an invl_gen element with invalid bit set,
+        * this thread blocks any other thread trying to enter the
+        * delayed invalidation block.  Do not allow to remove us from
+        * the CPU, because it causes starvation for other threads.
+        */
+       critical_enter();
+
+       /*
+        * ABA for *p is not possible there, since p->gen can only
+        * increase.  So if the *p thread finished its di, then
+        * started a new one and got inserted into the list at the
+        * same place, its gen will appear greater than the previously
+        * read gen.
+        */
+       if (!pmap_di_store_invl(p, &prev, &new_prev)) {
+               critical_exit();
+               PV_STAT(atomic_add_long(&invl_start_restart, 1));
+               lock_delay(&lda);
+               goto again;
+       }
+
+       /*
+        * There we clear PMAP_INVL_GEN_NEXT_INVALID in
+        * invl_gen->next, allowing other threads to iterate past us.
+        * pmap_di_store_invl() provides fence between the generation
+        * write and the update of next.
+        */
+       invl_gen->next = NULL;
+       critical_exit();
+}
+
+static bool
+pmap_delayed_invl_finish_u_crit(struct pmap_invl_gen *invl_gen,
+    struct pmap_invl_gen *p)
+{
+       struct pmap_invl_gen prev, new_prev;
+       u_long mygen;
+
+       /*
+        * Load invl_gen->gen after setting invl_gen->next
+        * PMAP_INVL_GEN_NEXT_INVALID.  This prevents larger
+        * generations to propagate to our invl_gen->gen.  Lock prefix
+        * in atomic_set_ptr() worked as seq_cst fence.
+        */
+       mygen = atomic_load_long(&invl_gen->gen);
+
+       if (!pmap_di_load_invl(p, &prev) || prev.next != invl_gen)
+               return (false);
+
+       KASSERT(prev.gen < mygen,
+           ("invalid di gen sequence %lu %lu", prev.gen, mygen));
+       new_prev.gen = mygen;
+       new_prev.next = (void *)((uintptr_t)invl_gen->next &
+           ~PMAP_INVL_GEN_NEXT_INVALID);
+
+       /* Formal fence between load of prev and storing update to it. */
+       atomic_thread_fence_rel();
+
+       return (pmap_di_store_invl(p, &prev, &new_prev));
+}
+
+static void
+pmap_delayed_invl_finish_u(void)
+{
+       struct pmap_invl_gen *invl_gen, *p;
+       struct thread *td;
+       struct lock_delay_arg lda;
+       uintptr_t prevl;
+
+       td = curthread;
+       invl_gen = &td->td_md.md_invl_gen;
+       KASSERT(invl_gen->gen != 0, ("missed invl_start: gen 0"));
+       KASSERT(((uintptr_t)invl_gen->next & PMAP_INVL_GEN_NEXT_INVALID) == 0,
+           ("missed invl_start: INVALID"));
+       lock_delay_arg_init(&lda, &di_delay);
+
+again:
+       for (p = &pmap_invl_gen_head; p != NULL; p = (void *)prevl) {
+               prevl = atomic_load_ptr(&p->next);
+               if ((prevl & PMAP_INVL_GEN_NEXT_INVALID) != 0) {
+                       PV_STAT(atomic_add_long(&invl_finish_restart, 1));
+                       lock_delay(&lda);
+                       goto again;
+               }
+               if ((void *)prevl == invl_gen)
+                       break;
+       }
+
+       /*
+        * It is legitimate to not find ourself on the list if a
+        * thread before us finished its DI and started it again.
+        */
+       if (__predict_false(p == NULL)) {
+               PV_STAT(atomic_add_long(&invl_finish_restart, 1));
+               lock_delay(&lda);
+               goto again;
+       }
+
+       critical_enter();
+       atomic_set_ptr((uintptr_t *)&invl_gen->next,
+           PMAP_INVL_GEN_NEXT_INVALID);
+       if (!pmap_delayed_invl_finish_u_crit(invl_gen, p)) {
+               atomic_clear_ptr((uintptr_t *)&invl_gen->next,
+                   PMAP_INVL_GEN_NEXT_INVALID);
+               critical_exit();
+               PV_STAT(atomic_add_long(&invl_finish_restart, 1));
+               lock_delay(&lda);
+               goto again;
+       }
+       critical_exit();
+       if (atomic_load_int(&pmap_invl_waiters) > 0)
+               pmap_delayed_invl_finish_unblock(0);
+       if (invl_gen->saved_pri != 0) {
+               thread_lock(td);
+               sched_prio(td, invl_gen->saved_pri);
+               thread_unlock(td);
+       }
+}
+
+#ifdef DDB
+DB_SHOW_COMMAND(di_queue, pmap_di_queue)
+{
+       struct pmap_invl_gen *p, *pn;
+       struct thread *td;
+       uintptr_t nextl;
+       bool first;
+
+       for (p = &pmap_invl_gen_head, first = true; p != NULL; p = pn,
+           first = false) {
+               nextl = atomic_load_ptr(&p->next);
+               pn = (void *)(nextl & ~PMAP_INVL_GEN_NEXT_INVALID);
+               td = first ? NULL : __containerof(p, struct thread,
+                   td_md.md_invl_gen);
+               db_printf("gen %lu inv %d td %p tid %d\n", p->gen,
+                   (nextl & PMAP_INVL_GEN_NEXT_INVALID) != 0, td,
+                   td != NULL ? td->td_tid : -1);
+       }
+}
+#endif
+
+#ifdef PV_STATS
 static long invl_wait;
 SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait, CTLFLAG_RD, &invl_wait, 0,
     "Number of times DI invalidation blocked pmap_remove_all/write");
+static long invl_wait_slow;
+SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait_slow, CTLFLAG_RD, &invl_wait_slow, 0,
+    "Number of slow invalidation waits for lockless DI");
 #endif
 
 static u_long *
@@ -571,6 +929,27 @@ pmap_delayed_invl_genp(vm_page_t m)
        return (&pv_invl_gen[pa_index(VM_PAGE_TO_PHYS(m)) % NPV_LIST_LOCKS]);
 }
 
+static void
+pmap_delayed_invl_callout_func(void *arg __unused)
+{
+
+       if (atomic_load_int(&pmap_invl_waiters) == 0)
+               return;
+       pmap_delayed_invl_finish_unblock(0);
+}
+
+static void
+pmap_delayed_invl_callout_init(void *arg __unused)
+{
+
+       if (pmap_di_locked())
+               return;
+       callout_init(&pmap_invl_callout, 1);
+       pmap_invl_callout_inited = true;
+}
+SYSINIT(pmap_di_callout, SI_SUB_CPU + 1, SI_ORDER_ANY,
+    pmap_delayed_invl_callout_init, NULL);
+
 /*
  * Ensure that all currently executing DI blocks, that need to flush
  * TLB for the given page m, actually flushed the TLB at the time the
@@ -586,9 +965,8 @@ pmap_delayed_invl_genp(vm_page_t m)
  * processor.
  */
 static void
-pmap_delayed_invl_wait(vm_page_t m)
+pmap_delayed_invl_wait_l(vm_page_t m)
 {
-       struct turnstile *ts;
        u_long *m_gen;
 #ifdef PV_STATS
        bool accounted = false;
@@ -602,14 +980,92 @@ pmap_delayed_invl_wait(vm_page_t m)
                        accounted = true;
                }
 #endif
-               ts = turnstile_trywait(&invl_gen_ts);
-               if (*m_gen > pmap_invl_gen)
-                       turnstile_wait(ts, NULL, TS_SHARED_QUEUE);
-               else
-                       turnstile_cancel(ts);
+               pmap_delayed_invl_wait_block(m_gen, &pmap_invl_gen);
        }
 }
 
+static void
+pmap_delayed_invl_wait_u(vm_page_t m)
+{
+       u_long *m_gen;
+       struct lock_delay_arg lda;
+       bool fast;
+
+       fast = true;
+       m_gen = pmap_delayed_invl_genp(m);
+       lock_delay_arg_init(&lda, &di_delay);
+       while (*m_gen > atomic_load_long(&pmap_invl_gen_head.gen)) {
+               if (fast || !pmap_invl_callout_inited) {
+                       PV_STAT(atomic_add_long(&invl_wait, 1));
+                       lock_delay(&lda);
+                       fast = false;
+               } else {
+                       /*
+                        * The page's invalidation generation number
+                        * is still below the current thread's number.
+                        * Prepare to block so that we do not waste
+                        * CPU cycles or worse, suffer livelock.
+                        *
+                        * Since it is impossible to block without
+                        * racing with pmap_delayed_invl_finish_u(),
+                        * prepare for the race by incrementing
+                        * pmap_invl_waiters and arming a 1-tick
+                        * callout which will unblock us if we lose
+                        * the race.
+                        */
+                       atomic_add_int(&pmap_invl_waiters, 1);
+
+                       /*
+                        * Re-check the current thread's invalidation
+                        * generation after incrementing
+                        * pmap_invl_waiters, so that there is no race
+                        * with pmap_delayed_invl_finish_u() setting
+                        * the page generation and checking
+                        * pmap_invl_waiters.  The only race allowed
+                        * is for a missed unblock, which is handled
+                        * by the callout.
+                        */
+                       if (*m_gen >
+                           atomic_load_long(&pmap_invl_gen_head.gen)) {
+                               callout_reset(&pmap_invl_callout, 1,
+                                   pmap_delayed_invl_callout_func, NULL);
+                               PV_STAT(atomic_add_long(&invl_wait_slow, 1));
+                               pmap_delayed_invl_wait_block(m_gen,
+                                   &pmap_invl_gen_head.gen);
+                       }
+                       atomic_add_int(&pmap_invl_waiters, -1);
+               }
+       }
+}
+
+DEFINE_IFUNC(, void, pmap_thread_init_invl_gen, (struct thread *), static)
+{
+
+       return (pmap_di_locked() ? pmap_thread_init_invl_gen_l :
+           pmap_thread_init_invl_gen_u);
+}
+
+DEFINE_IFUNC(static, void, pmap_delayed_invl_start, (void), static)
+{
+
+       return (pmap_di_locked() ? pmap_delayed_invl_start_l :
+           pmap_delayed_invl_start_u);
+}
+
+DEFINE_IFUNC(static, void, pmap_delayed_invl_finish, (void), static)
+{
+
+       return (pmap_di_locked() ? pmap_delayed_invl_finish_l :
+           pmap_delayed_invl_finish_u);
+}
+
+DEFINE_IFUNC(static, void, pmap_delayed_invl_wait, (vm_page_t), static)
+{
+
+       return (pmap_di_locked() ? pmap_delayed_invl_wait_l :
+           pmap_delayed_invl_wait_u);
+}
+
 /*
  * Mark the page m's PV list as participating in the current thread's
  * DI block.  Any threads concurrently using m's PV list to remove or
@@ -619,7 +1075,7 @@ pmap_delayed_invl_wait(vm_page_t m)
  * The function works by setting the DI generation number for m's PV
  * list to at least the DI generation number of the current thread.
  * This forces a caller of pmap_delayed_invl_wait() to block until
- * current thread calls pmap_delayed_invl_finished().
+ * current thread calls pmap_delayed_invl_finish().
  */
 static void
 pmap_delayed_invl_page(vm_page_t m)
@@ -2869,6 +3325,7 @@ void
 pmap_pinit0(pmap_t pmap)
 {
        struct proc *p;
+       struct thread *td;
        int i;
 
        PMAP_LOCK_INIT(pmap);
@@ -2887,12 +3344,14 @@ pmap_pinit0(pmap_t pmap)
                pmap->pm_pcids[i].pm_gen = 1;
        }
        pmap_activate_boot(pmap);
+       td = curthread;
        if (pti) {
-               p = curproc;
+               p = td->td_proc;
                PROC_LOCK(p);
                p->p_amd64_md_flags |= P_MD_KPTI;
                PROC_UNLOCK(p);
        }
+       pmap_thread_init_invl_gen(td);
 
        if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) {
                pmap_pkru_ranges_zone = uma_zcreate("pkru ranges",
@@ -3450,7 +3909,7 @@ reclaim_pv_chunk_leave_pmap(pmap_t pmap, pmap_t locked
        if (pmap != locked_pmap)
                PMAP_UNLOCK(pmap);
        if (start_di)
-               pmap_delayed_invl_finished();
+               pmap_delayed_invl_finish();
 }
 
 /*
@@ -3533,13 +3992,13 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **l
                                RELEASE_PV_LIST_LOCK(lockp);
                                PMAP_LOCK(pmap);
                                if (start_di)
-                                       pmap_delayed_invl_started();
+                                       pmap_delayed_invl_start();
                                mtx_lock(&pv_chunks_mutex);
                                continue;
                        } else if (pmap != locked_pmap) {
                                if (PMAP_TRYLOCK(pmap)) {
                                        if (start_di)
-                                               pmap_delayed_invl_started();
+                                               pmap_delayed_invl_start();
                                        mtx_lock(&pv_chunks_mutex);
                                        continue;
                                } else {
@@ -3552,7 +4011,7 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **l
                                        goto next_chunk;
                                }
                        } else if (start_di)
-                               pmap_delayed_invl_started();
+                               pmap_delayed_invl_start();
                        PG_G = pmap_global_bit(pmap);
                        PG_A = pmap_accessed_bit(pmap);
                        PG_M = pmap_modified_bit(pmap);
@@ -4531,7 +4990,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t 
        anyvalid = 0;
        SLIST_INIT(&free);
 
-       pmap_delayed_invl_started();
+       pmap_delayed_invl_start();
        PMAP_LOCK(pmap);
        pmap_pkru_on_remove(pmap, sva, eva);
 
@@ -4628,7 +5087,7 @@ out:
        if (anyvalid)
                pmap_invalidate_all(pmap);
        PMAP_UNLOCK(pmap);
-       pmap_delayed_invl_finished();
+       pmap_delayed_invl_finish();
        vm_page_free_pages_toq(&free, true);
 }
 
@@ -4812,8 +5271,8 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t
        /*
         * Although this function delays and batches the invalidation
         * of stale TLB entries, it does not need to call
-        * pmap_delayed_invl_started() and
-        * pmap_delayed_invl_finished(), because it does not
+        * pmap_delayed_invl_start() and
+        * pmap_delayed_invl_finish(), because it does not
         * ordinarily destroy mappings.  Stale TLB entries from
         * protection-only changes need only be invalidated before the
         * pmap lock is released, because protection-only changes do
@@ -5440,11 +5899,11 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t
                        if ((oldpde & PG_G) == 0)
                                pmap_invalidate_pde_page(pmap, va, oldpde);
                } else {
-                       pmap_delayed_invl_started();
+                       pmap_delayed_invl_start();
                        if (pmap_remove_ptes(pmap, va, va + NBPDR, pde, &free,
                            lockp))
                               pmap_invalidate_all(pmap);
-                       pmap_delayed_invl_finished();
+                       pmap_delayed_invl_finish();
                }
                vm_page_free_pages_toq(&free, true);
                if (va >= VM_MAXUSER_ADDRESS) {
@@ -5800,7 +6259,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_
  *     The wired attribute of the page table entry is not a hardware
  *     feature, so there is no need to invalidate any TLB entries.
  *     Since pmap_demote_pde() for the wired entry must never fail,
- *     pmap_delayed_invl_started()/finished() calls around the
+ *     pmap_delayed_invl_start()/finish() calls around the
  *     function are not needed.
  */
 void
@@ -6299,8 +6758,8 @@ pmap_page_is_mapped(vm_page_t m)
  *
  * Although this function destroys all of the pmap's managed,
  * non-wired mappings, it can delay and batch the invalidation of TLB
- * entries without calling pmap_delayed_invl_started() and
- * pmap_delayed_invl_finished().  Because the pmap is not active on
+ * entries without calling pmap_delayed_invl_start() and
+ * pmap_delayed_invl_finish().  Because the pmap is not active on
  * any other processor, none of these TLB entries will ever be used
  * before their eventual invalidation.  Consequently, there is no need
  * for either pmap_remove_all() or pmap_remove_write() to wait for
@@ -7005,7 +7464,7 @@ pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t 
        PG_V = pmap_valid_bit(pmap);
        PG_RW = pmap_rw_bit(pmap);
        anychanged = FALSE;
-       pmap_delayed_invl_started();
+       pmap_delayed_invl_start();
        PMAP_LOCK(pmap);
        for (; sva < eva; sva = va_next) {
                pml4e = pmap_pml4e(pmap, sva);
@@ -7102,7 +7561,7 @@ maybe_invlrng:
        if (anychanged)
                pmap_invalidate_all(pmap);
        PMAP_UNLOCK(pmap);
-       pmap_delayed_invl_finished();
+       pmap_delayed_invl_finish();
 }
 
 /*
@@ -9433,11 +9892,7 @@ pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offse
        return (error);
 }
 
-#include "opt_ddb.h"
 #ifdef DDB
-#include <sys/kdb.h>
-#include <ddb/ddb.h>
-
 DB_SHOW_COMMAND(pte, pmap_print_pte)
 {
        pmap_t pmap;

Modified: stable/12/sys/amd64/amd64/trap.c
==============================================================================
--- stable/12/sys/amd64/amd64/trap.c    Fri Jul 26 19:16:02 2019        
(r350364)
+++ stable/12/sys/amd64/amd64/trap.c    Fri Jul 26 19:35:33 2019        
(r350365)
@@ -1203,7 +1203,7 @@ amd64_syscall(struct thread *td, int traced)
        KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td),
            ("System call %s returning with mangled pcb_save",
             syscallname(td->td_proc, td->td_sa.code)));
-       KASSERT(td->td_md.md_invl_gen.gen == 0,
+       KASSERT(pmap_not_in_di(),
            ("System call %s returning with leaked invl_gen %lu",
            syscallname(td->td_proc, td->td_sa.code),
            td->td_md.md_invl_gen.gen));

Modified: stable/12/sys/amd64/amd64/vm_machdep.c
==============================================================================
--- stable/12/sys/amd64/amd64/vm_machdep.c      Fri Jul 26 19:16:02 2019        
(r350364)
+++ stable/12/sys/amd64/amd64/vm_machdep.c      Fri Jul 26 19:35:33 2019        
(r350365)
@@ -229,7 +229,7 @@ cpu_fork(struct thread *td1, struct proc *p2, struct t
        /* Setup to release spin count in fork_exit(). */
        td2->td_md.md_spinlock_count = 1;
        td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
-       td2->td_md.md_invl_gen.gen = 0;
+       pmap_thread_init_invl_gen(td2);
 
        /* As an i386, do not copy io permission bitmap. */
        pcb2->pcb_tssp = NULL;
@@ -545,6 +545,7 @@ cpu_copy_thread(struct thread *td, struct thread *td0)
        /* Setup to release spin count in fork_exit(). */
        td->td_md.md_spinlock_count = 1;
        td->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
+       pmap_thread_init_invl_gen(td);
 }
 
 /*

Modified: stable/12/sys/amd64/include/pmap.h
==============================================================================
--- stable/12/sys/amd64/include/pmap.h  Fri Jul 26 19:16:02 2019        
(r350364)
+++ stable/12/sys/amd64/include/pmap.h  Fri Jul 26 19:35:33 2019        
(r350365)
@@ -442,6 +442,7 @@ void        *pmap_mapbios(vm_paddr_t, vm_size_t);
 void   *pmap_mapdev(vm_paddr_t, vm_size_t);
 void   *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int);
 void   *pmap_mapdev_pciecfg(vm_paddr_t pa, vm_size_t size);
+bool   pmap_not_in_di(void);
 boolean_t pmap_page_is_mapped(vm_page_t m);
 void   pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma);
 void   pmap_pinit_pml4(vm_page_t);
@@ -466,6 +467,7 @@ void        pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t 
kcr
 int    pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
 int    pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
            u_int keyidx, int flags);
+void   pmap_thread_init_invl_gen(struct thread *td);
 int    pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap);
 #endif /* _KERNEL */
 

Modified: stable/12/sys/amd64/include/proc.h
==============================================================================
--- stable/12/sys/amd64/include/proc.h  Fri Jul 26 19:16:02 2019        
(r350364)
+++ stable/12/sys/amd64/include/proc.h  Fri Jul 26 19:35:33 2019        
(r350365)
@@ -50,10 +50,17 @@ struct proc_ldt {
        int     ldt_refcnt;
 };
 
+#define PMAP_INVL_GEN_NEXT_INVALID     0x1ULL
 struct pmap_invl_gen {
        u_long gen;                     /* (k) */
-       LIST_ENTRY(pmap_invl_gen) link; /* (pp) */
-};
+       union {
+               LIST_ENTRY(pmap_invl_gen) link; /* (pp) */
+               struct {
+                       struct pmap_invl_gen *next;
+                       u_char saved_pri;
+               };
+       };
+} __aligned(16);
 
 /*
  * Machine-dependent part of the proc structure for AMD64.

Modified: stable/12/sys/sys/proc.h
==============================================================================
--- stable/12/sys/sys/proc.h    Fri Jul 26 19:16:02 2019        (r350364)
+++ stable/12/sys/sys/proc.h    Fri Jul 26 19:35:33 2019        (r350365)
@@ -343,7 +343,11 @@ struct thread {
        vm_offset_t     td_kstack;      /* (a) Kernel VA of kstack. */
        int             td_kstack_pages; /* (a) Size of the kstack. */
        volatile u_int  td_critnest;    /* (k*) Critical section nest level. */
+#ifdef __amd64__
+       uint32_t        td_md_pad0[16];
+#else
        struct mdthread td_md;          /* (k) Any machine-dependent fields. */
+#endif
        struct kaudit_record    *td_ar; /* (k) Active audit record, if any. */
        struct lpohead  td_lprof[2];    /* (a) lock profiling objects. */
        struct kdtrace_thread   *td_dtrace; /* (*) DTrace-specific data. */
@@ -361,6 +365,9 @@ struct thread {
        int             td_oncpu;       /* (t) Which cpu we are on. */
        void            *td_lkpi_task;  /* LinuxKPI task struct pointer */
        int             td_pmcpend;
+#ifdef __amd64__
+       struct mdthread td_md;          /* (k) Any machine-dependent fields. */
+#endif
 };
 
 struct thread0_storage {
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to