Author: mmacy
Date: Mon May 14 00:21:04 2018
New Revision: 333596
URL: https://svnweb.freebsd.org/changeset/base/333596

Log:
  hwpmc: fix load/unload race and vm map LOR
  
  - fix load/unload race by allocating the per-domain list structure at boot
  
  - fix long extant vm map LOR by replacing pmc_sx sx_slock with global_epoch
    to protect the liveness of elements of the pmc_ss_owners list
  
  Reported by:  pho
  Approved by:  sbruno

Modified:
  head/sys/dev/hwpmc/hwpmc_logging.c
  head/sys/dev/hwpmc/hwpmc_mod.c
  head/sys/kern/kern_pmc.c
  head/sys/sys/pmc.h
  head/sys/sys/pmckern.h

Modified: head/sys/dev/hwpmc/hwpmc_logging.c
==============================================================================
--- head/sys/dev/hwpmc/hwpmc_logging.c  Mon May 14 00:14:00 2018        
(r333595)
+++ head/sys/dev/hwpmc/hwpmc_logging.c  Mon May 14 00:21:04 2018        
(r333596)
@@ -63,20 +63,10 @@ __FBSDID("$FreeBSD$");
 
 #ifdef NUMA
 #define NDOMAINS vm_ndomains
-
-static int
-getdomain(int cpu)
-{
-       struct pcpu *pc;
-
-       pc = pcpu_find(cpu);
-       return (pc->pc_domain);
-}
 #else
 #define NDOMAINS 1
 #define malloc_domain(size, type, domain, flags) malloc((size), (type), 
(flags))
 #define free_domain(addr, type) free(addr, type)
-#define getdomain(cpu) 0
 #endif
 
 /*
@@ -225,16 +215,6 @@ struct pmclog_buffer {
        uint16_t         plb_domain;
 } __aligned(CACHE_LINE_SIZE);
 
-struct pmc_domain_buffer_header {
-       struct mtx pdbh_mtx;
-       TAILQ_HEAD(, pmclog_buffer) pdbh_head;
-       struct pmclog_buffer *pdbh_plbs;
-       int pdbh_ncpus;
-} __aligned(CACHE_LINE_SIZE);
-
-struct pmc_domain_buffer_header *pmc_dom_hdrs[MAXMEMDOM];
-
-
 /*
  * Prototypes
  */
@@ -280,6 +260,7 @@ pmclog_get_buffer(struct pmc_owner *po)
            ("[pmclog,%d] po=%p current buffer still valid", __LINE__, po));
 
        domain = PCPU_GET(domain);
+       MPASS(pmc_dom_hdrs[domain]);
        mtx_lock_spin(&pmc_dom_hdrs[domain]->pdbh_mtx);
        if ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL)
                TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next);
@@ -1165,7 +1146,7 @@ pmclog_process_userlog(struct pmc_owner *po, struct pm
 void
 pmclog_initialize()
 {
-       int domain, cpu;
+       int domain;
        struct pmclog_buffer *plb;
 
        if (pmclog_buffer_size <= 0 || pmclog_buffer_size > 16*1024) {
@@ -1180,7 +1161,6 @@ pmclog_initialize()
                                          "than zero.\n", pmc_nlogbuffers_pcpu);
                pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU;
        }
-
        if (pmc_nlogbuffers_pcpu*pmclog_buffer_size > 32*1024) {
                (void) printf("hwpmc: memory allocated pcpu must be less than 
32MB (is %dK).\n",
                                          
pmc_nlogbuffers_pcpu*pmclog_buffer_size);
@@ -1188,17 +1168,6 @@ pmclog_initialize()
                pmclog_buffer_size = PMC_LOG_BUFFER_SIZE;
        }
        for (domain = 0; domain < NDOMAINS; domain++) {
-               pmc_dom_hdrs[domain] = malloc_domain(sizeof(struct 
pmc_domain_buffer_header), M_PMC, domain,
-                                                                               
M_WAITOK|M_ZERO);
-               mtx_init(&pmc_dom_hdrs[domain]->pdbh_mtx, "pmc_bufferlist_mtx", 
"pmc-leaf", MTX_SPIN);
-               TAILQ_INIT(&pmc_dom_hdrs[domain]->pdbh_head);
-       }
-       CPU_FOREACH(cpu) {
-               domain = getdomain(cpu);
-               KASSERT(pmc_dom_hdrs[domain] != NULL, ("no mem allocated for 
domain: %d", domain));
-               pmc_dom_hdrs[domain]->pdbh_ncpus++;
-       }
-       for (domain = 0; domain < NDOMAINS; domain++) {
                int ncpus = pmc_dom_hdrs[domain]->pdbh_ncpus;
                int total = ncpus*pmc_nlogbuffers_pcpu;
 
@@ -1231,12 +1200,10 @@ pmclog_shutdown()
        mtx_destroy(&pmc_kthread_mtx);
 
        for (domain = 0; domain < NDOMAINS; domain++) {
-               mtx_destroy(&pmc_dom_hdrs[domain]->pdbh_mtx);
                while ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != 
NULL) {
                        TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, 
plb_next);
                        free(plb->plb_base, M_PMC);
                }
                free(pmc_dom_hdrs[domain]->pdbh_plbs, M_PMC);
-               free(pmc_dom_hdrs[domain], M_PMC);
        }
 }

Modified: head/sys/dev/hwpmc/hwpmc_mod.c
==============================================================================
--- head/sys/dev/hwpmc/hwpmc_mod.c      Mon May 14 00:14:00 2018        
(r333595)
+++ head/sys/dev/hwpmc/hwpmc_mod.c      Mon May 14 00:21:04 2018        
(r333596)
@@ -1564,12 +1564,14 @@ pmc_process_mmap(struct thread *td, struct pmckern_map
        const struct pmc_process *pp;
 
        freepath = fullpath = NULL;
+       epoch_exit(global_epoch);
        pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath);
 
        pid = td->td_proc->p_pid;
 
+       epoch_enter(global_epoch);
        /* Inform owners of all system-wide sampling PMCs. */
-       LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+       CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
            if (po->po_flags & PMC_PO_OWNS_LOGFILE)
                        pmclog_process_map_in(po, pid, pkm->pm_address, 
fullpath);
 
@@ -1606,10 +1608,12 @@ pmc_process_munmap(struct thread *td, struct pmckern_m
 
        pid = td->td_proc->p_pid;
 
-       LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+       epoch_enter(global_epoch);
+       CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
            if (po->po_flags & PMC_PO_OWNS_LOGFILE)
                pmclog_process_map_out(po, pid, pkm->pm_address,
                    pkm->pm_address + pkm->pm_size);
+       epoch_exit(global_epoch);
 
        if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
                return;
@@ -1631,7 +1635,7 @@ pmc_log_kernel_mappings(struct pmc *pm)
        struct pmc_owner *po;
        struct pmckern_map_in *km, *kmbase;
 
-       sx_assert(&pmc_sx, SX_LOCKED);
+       MPASS(in_epoch() || sx_xlocked(&pmc_sx));
        KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
            ("[pmc,%d] non-sampling PMC (%p) desires mapping information",
                __LINE__, (void *) pm));
@@ -1905,11 +1909,13 @@ pmc_hook_handler(struct thread *td, int function, void
 
                pk = (struct pmckern_procexec *) arg;
 
+               epoch_enter(global_epoch);
                /* Inform owners of SS mode PMCs of the exec event. */
-               LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+               CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
                    if (po->po_flags & PMC_PO_OWNS_LOGFILE)
                            pmclog_process_procexec(po, PMC_ID_INVALID,
                                p->p_pid, pk->pm_entryaddr, fullpath);
+               epoch_exit(global_epoch);
 
                PROC_LOCK(p);
                is_using_hwpmcs = p->p_flag & P_HWPMC;
@@ -2033,12 +2039,12 @@ pmc_hook_handler(struct thread *td, int function, void
                break;
 
        case PMC_FN_MMAP:
-               sx_assert(&pmc_sx, SX_LOCKED);
+               MPASS(in_epoch() || sx_xlocked(&pmc_sx));
                pmc_process_mmap(td, (struct pmckern_map_in *) arg);
                break;
 
        case PMC_FN_MUNMAP:
-               sx_assert(&pmc_sx, SX_LOCKED);
+               MPASS(in_epoch() || sx_xlocked(&pmc_sx));
                pmc_process_munmap(td, (struct pmckern_map_out *) arg);
                break;
 
@@ -2360,7 +2366,8 @@ pmc_release_pmc_descriptor(struct pmc *pm)
                        po->po_sscount--;
                        if (po->po_sscount == 0) {
                                atomic_subtract_rel_int(&pmc_ss_count, 1);
-                               LIST_REMOVE(po, po_ssnext);
+                               CK_LIST_REMOVE(po, po_ssnext);
+                               epoch_wait(global_epoch);
                        }
                }
 
@@ -2727,7 +2734,7 @@ pmc_start(struct pmc *pm)
 
        if (mode == PMC_MODE_SS) {
                if (po->po_sscount == 0) {
-                       LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext);
+                       CK_LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext);
                        atomic_add_rel_int(&pmc_ss_count, 1);
                        PMCDBG1(PMC,OPS,1, "po=%p in global list", po);
                }
@@ -2854,7 +2861,8 @@ pmc_stop(struct pmc *pm)
                po->po_sscount--;
                if (po->po_sscount == 0) {
                        atomic_subtract_rel_int(&pmc_ss_count, 1);
-                       LIST_REMOVE(po, po_ssnext);
+                       CK_LIST_REMOVE(po, po_ssnext);
+                       epoch_wait(global_epoch);
                        PMCDBG1(PMC,OPS,2,"po=%p removed from global list", po);
                }
        }
@@ -2900,6 +2908,9 @@ pmc_syscall_handler(struct thread *td, void *syscall_a
        c = (struct pmc_syscall_args *)syscall_args;
        op = c->pmop_code;
        arg = c->pmop_data;
+       /* PMC isn't set up yet */
+       if (pmc_hook == NULL)
+               return (EINVAL);
        if (op == PMC_OP_CONFIGURELOG) {
                /*
                 * We cannot create the logging process inside
@@ -2916,7 +2927,6 @@ pmc_syscall_handler(struct thread *td, void *syscall_a
 
        PMC_GET_SX_XLOCK(ENOSYS);
        is_sx_downgraded = 0;
-
        PMCDBG3(MOD,PMS,1, "syscall op=%d \"%s\" arg=%p", op,
            pmc_op_to_name[op], arg);
 
@@ -4482,9 +4492,11 @@ pmc_process_exit(void *arg __unused, struct proc *p)
        /*
         * Log a sysexit event to all SS PMC owners.
         */
-       LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+       epoch_enter(global_epoch);
+       CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
            if (po->po_flags & PMC_PO_OWNS_LOGFILE)
                    pmclog_process_sysexit(po, p->p_pid);
+       epoch_exit(global_epoch);
 
        if (!is_using_hwpmcs)
                return;
@@ -4659,10 +4671,11 @@ pmc_process_fork(void *arg __unused, struct proc *p1, 
         * If there are system-wide sampling PMCs active, we need to
         * log all fork events to their owner's logs.
         */
-
-       LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+       epoch_enter(global_epoch);
+       CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
            if (po->po_flags & PMC_PO_OWNS_LOGFILE)
                    pmclog_process_procfork(po, p1->p_pid, newproc->p_pid);
+       epoch_exit(global_epoch);
 
        if (!is_using_hwpmcs)
                return;
@@ -4728,21 +4741,19 @@ pmc_kld_load(void *arg __unused, linker_file_t lf)
 {
        struct pmc_owner *po;
 
-       sx_slock(&pmc_sx);
-
        /*
         * Notify owners of system sampling PMCs about KLD operations.
         */
-       LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+       epoch_enter(global_epoch);
+       CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
                if (po->po_flags & PMC_PO_OWNS_LOGFILE)
                        pmclog_process_map_in(po, (pid_t) -1,
                            (uintfptr_t) lf->address, lf->filename);
+       epoch_exit(global_epoch);
 
        /*
         * TODO: Notify owners of (all) process-sampling PMCs too.
         */
-
-       sx_sunlock(&pmc_sx);
 }
 
 static void
@@ -4751,18 +4762,16 @@ pmc_kld_unload(void *arg __unused, const char *filenam
 {
        struct pmc_owner *po;
 
-       sx_slock(&pmc_sx);
-
-       LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+       epoch_enter(global_epoch);
+       CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
                if (po->po_flags & PMC_PO_OWNS_LOGFILE)
                        pmclog_process_map_out(po, (pid_t) -1,
                            (uintfptr_t) address, (uintfptr_t) address + size);
+       epoch_exit(global_epoch);
 
        /*
         * TODO: Notify owners of process-sampling PMCs.
         */
-
-       sx_sunlock(&pmc_sx);
 }
 
 /*
@@ -5064,6 +5073,7 @@ pmc_initialize(void)
 
        /* set hook functions */
        pmc_intr = md->pmd_intr;
+       wmb();
        pmc_hook = pmc_hook_handler;
 
        if (error == 0) {
@@ -5282,6 +5292,3 @@ load (struct module *module __unused, int cmd, void *a
 
        return error;
 }
-
-/* memory pool */
-MALLOC_DEFINE(M_PMC, "pmc", "Memory space for the PMC module");

Modified: head/sys/kern/kern_pmc.c
==============================================================================
--- head/sys/kern/kern_pmc.c    Mon May 14 00:14:00 2018        (r333595)
+++ head/sys/kern/kern_pmc.c    Mon May 14 00:21:04 2018        (r333596)
@@ -48,6 +48,10 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+
 #ifdef HWPMC_HOOKS
 FEATURE(hwpmc_hooks, "Kernel support for HW PMC");
 #define        PMC_KERNEL_VERSION      PMC_VERSION
@@ -58,6 +62,9 @@ FEATURE(hwpmc_hooks, "Kernel support for HW PMC");
 MALLOC_DECLARE(M_PMCHOOKS);
 MALLOC_DEFINE(M_PMCHOOKS, "pmchooks", "Memory space for PMC hooks");
 
+/* memory pool */
+MALLOC_DEFINE(M_PMC, "pmc", "Memory space for the PMC module");
+
 const int pmc_kernel_version = PMC_KERNEL_VERSION;
 
 /* Hook variable. */
@@ -84,6 +91,7 @@ volatile int pmc_ss_count;
  * somewhat more expensive than a simple 'if' check and indirect call.
  */
 struct sx pmc_sx;
+SX_SYSINIT(pmcsx, &pmc_sx, "pmc-sx");
 
 /*
  * PMC Soft per cpu trapframe.
@@ -91,6 +99,11 @@ struct sx pmc_sx;
 struct trapframe pmc_tf[MAXCPU];
 
 /*
+ * Per domain list of buffer headers
+ */
+__read_mostly struct pmc_domain_buffer_header *pmc_dom_hdrs[MAXMEMDOM];
+
+/*
  * PMC Soft use a global table to store registered events.
  */
 
@@ -100,20 +113,12 @@ static int pmc_softevents = 16;
 SYSCTL_INT(_kern_hwpmc, OID_AUTO, softevents, CTLFLAG_RDTUN,
     &pmc_softevents, 0, "maximum number of soft events");
 
-struct mtx pmc_softs_mtx;
 int pmc_softs_count;
 struct pmc_soft **pmc_softs;
 
+struct mtx pmc_softs_mtx;
 MTX_SYSINIT(pmc_soft_mtx, &pmc_softs_mtx, "pmc-softs", MTX_SPIN);
 
-static void
-pmc_init_sx(void)
-{
-       sx_init_flags(&pmc_sx, "pmc-sx", SX_NOWITNESS);
-}
-
-SYSINIT(pmcsx, SI_SUB_LOCK, SI_ORDER_MIDDLE, pmc_init_sx, NULL);
-
 /*
  * Helper functions.
  */
@@ -325,12 +330,30 @@ pmc_soft_ev_release(struct pmc_soft *ps)
        mtx_unlock_spin(&pmc_softs_mtx);
 }
 
+#ifdef NUMA
+#define NDOMAINS vm_ndomains
+
+static int
+getdomain(int cpu)
+{
+       struct pcpu *pc;
+
+       pc = pcpu_find(cpu);
+       return (pc->pc_domain);
+}
+#else
+#define NDOMAINS 1
+#define malloc_domain(size, type, domain, flags) malloc((size), (type), 
(flags))
+#define getdomain(cpu) 0
+#endif
 /*
  *  Initialise hwpmc.
  */
 static void
 init_hwpmc(void *dummy __unused)
 {
+       int domain, cpu;
+
        if (pmc_softevents <= 0 ||
            pmc_softevents > PMC_EV_DYN_COUNT) {
                (void) printf("hwpmc: tunable \"softevents\"=%d out of "
@@ -339,6 +362,19 @@ init_hwpmc(void *dummy __unused)
        }
        pmc_softs = malloc(pmc_softevents * sizeof(struct pmc_soft *), 
M_PMCHOOKS, M_NOWAIT|M_ZERO);
        KASSERT(pmc_softs != NULL, ("cannot allocate soft events table"));
+
+       for (domain = 0; domain < NDOMAINS; domain++) {
+               pmc_dom_hdrs[domain] = malloc_domain(sizeof(struct 
pmc_domain_buffer_header), M_PMC, domain,
+                                                                               
M_WAITOK|M_ZERO);
+               mtx_init(&pmc_dom_hdrs[domain]->pdbh_mtx, "pmc_bufferlist_mtx", 
"pmc-leaf", MTX_SPIN);
+               TAILQ_INIT(&pmc_dom_hdrs[domain]->pdbh_head);
+       }
+       CPU_FOREACH(cpu) {
+               domain = getdomain(cpu);
+               KASSERT(pmc_dom_hdrs[domain] != NULL, ("no mem allocated for 
domain: %d", domain));
+               pmc_dom_hdrs[domain]->pdbh_ncpus++;
+       }
+
 }
 
 SYSINIT(hwpmc, SI_SUB_KDTRACE, SI_ORDER_FIRST, init_hwpmc, NULL);

Modified: head/sys/sys/pmc.h
==============================================================================
--- head/sys/sys/pmc.h  Mon May 14 00:14:00 2018        (r333595)
+++ head/sys/sys/pmc.h  Mon May 14 00:21:04 2018        (r333596)
@@ -40,6 +40,8 @@
 #include <sys/counter.h>
 #include <machine/pmc_mdep.h>
 #include <machine/profile.h>
+#include <sys/epoch.h>
+#include <ck_queue.h>
 
 #define        PMC_MODULE_NAME         "hwpmc"
 #define        PMC_NAME_MAX            64 /* HW counter name size */
@@ -826,7 +828,7 @@ struct pmc_process {
 
 struct pmc_owner  {
        LIST_ENTRY(pmc_owner)   po_next;        /* hash chain */
-       LIST_ENTRY(pmc_owner)   po_ssnext;      /* list of SS PMC owners */
+       CK_LIST_ENTRY(pmc_owner)        po_ssnext;      /* list of SS PMC 
owners */
        LIST_HEAD(, pmc)        po_pmcs;        /* owned PMC list */
        TAILQ_HEAD(, pmclog_buffer) po_logbuffers; /* (o) logbuffer list */
        struct mtx              po_mtx;         /* spin lock for (o) */

Modified: head/sys/sys/pmckern.h
==============================================================================
--- head/sys/sys/pmckern.h      Mon May 14 00:14:00 2018        (r333595)
+++ head/sys/sys/pmckern.h      Mon May 14 00:21:04 2018        (r333596)
@@ -157,6 +157,15 @@ struct pmc_soft {
        struct pmc_dyn_event_descr      ps_ev;
 };
 
+struct pmclog_buffer;
+
+struct pmc_domain_buffer_header {
+       struct mtx pdbh_mtx;
+       TAILQ_HEAD(, pmclog_buffer) pdbh_head;
+       struct pmclog_buffer *pdbh_plbs;
+       int pdbh_ncpus;
+} __aligned(CACHE_LINE_SIZE);
+
 /* hook */
 extern int (*pmc_hook)(struct thread *_td, int _function, void *_arg);
 extern int (*pmc_intr)(int _cpu, struct trapframe *_frame);
@@ -176,16 +185,19 @@ extern const int pmc_kernel_version;
 /* PMC soft per cpu trapframe */
 extern struct trapframe pmc_tf[MAXCPU];
 
+/* per domain buffer header list */
+extern struct pmc_domain_buffer_header *pmc_dom_hdrs[MAXMEMDOM];
+
 /* Quick check if preparatory work is necessary */
 #define        PMC_HOOK_INSTALLED(cmd) __predict_false(pmc_hook != NULL)
 
 /* Hook invocation; for use within the kernel */
 #define        PMC_CALL_HOOK(t, cmd, arg)              \
 do {                                           \
-       sx_slock(&pmc_sx);                      \
+       epoch_enter(global_epoch);              \
        if (pmc_hook != NULL)                   \
                (pmc_hook)((t), (cmd), (arg));  \
-       sx_sunlock(&pmc_sx);                    \
+       epoch_exit(global_epoch);                       \
 } while (0)
 
 /* Hook invocation that needs an exclusive lock */
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to