Author: mmacy Date: Mon May 14 00:21:04 2018 New Revision: 333596 URL: https://svnweb.freebsd.org/changeset/base/333596
Log: hwpmc: fix load/unload race and vm map LOR - fix load/unload race by allocating the per-domain list structure at boot - fix long extant vm map LOR by replacing pmc_sx sx_slock with global_epoch to protect the liveness of elements of the pmc_ss_owners list Reported by: pho Approved by: sbruno Modified: head/sys/dev/hwpmc/hwpmc_logging.c head/sys/dev/hwpmc/hwpmc_mod.c head/sys/kern/kern_pmc.c head/sys/sys/pmc.h head/sys/sys/pmckern.h Modified: head/sys/dev/hwpmc/hwpmc_logging.c ============================================================================== --- head/sys/dev/hwpmc/hwpmc_logging.c Mon May 14 00:14:00 2018 (r333595) +++ head/sys/dev/hwpmc/hwpmc_logging.c Mon May 14 00:21:04 2018 (r333596) @@ -63,20 +63,10 @@ __FBSDID("$FreeBSD$"); #ifdef NUMA #define NDOMAINS vm_ndomains - -static int -getdomain(int cpu) -{ - struct pcpu *pc; - - pc = pcpu_find(cpu); - return (pc->pc_domain); -} #else #define NDOMAINS 1 #define malloc_domain(size, type, domain, flags) malloc((size), (type), (flags)) #define free_domain(addr, type) free(addr, type) -#define getdomain(cpu) 0 #endif /* @@ -225,16 +215,6 @@ struct pmclog_buffer { uint16_t plb_domain; } __aligned(CACHE_LINE_SIZE); -struct pmc_domain_buffer_header { - struct mtx pdbh_mtx; - TAILQ_HEAD(, pmclog_buffer) pdbh_head; - struct pmclog_buffer *pdbh_plbs; - int pdbh_ncpus; -} __aligned(CACHE_LINE_SIZE); - -struct pmc_domain_buffer_header *pmc_dom_hdrs[MAXMEMDOM]; - - /* * Prototypes */ @@ -280,6 +260,7 @@ pmclog_get_buffer(struct pmc_owner *po) ("[pmclog,%d] po=%p current buffer still valid", __LINE__, po)); domain = PCPU_GET(domain); + MPASS(pmc_dom_hdrs[domain]); mtx_lock_spin(&pmc_dom_hdrs[domain]->pdbh_mtx); if ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL) TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next); @@ -1165,7 +1146,7 @@ pmclog_process_userlog(struct pmc_owner *po, struct pm void pmclog_initialize() { - int domain, cpu; + int domain; struct pmclog_buffer *plb; if (pmclog_buffer_size <= 0 || pmclog_buffer_size > 16*1024) { @@ -1180,7 +1161,6 @@ pmclog_initialize() "than zero.\n", pmc_nlogbuffers_pcpu); pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU; } - if (pmc_nlogbuffers_pcpu*pmclog_buffer_size > 32*1024) { (void) printf("hwpmc: memory allocated pcpu must be less than 32MB (is %dK).\n", pmc_nlogbuffers_pcpu*pmclog_buffer_size); @@ -1188,17 +1168,6 @@ pmclog_initialize() pmclog_buffer_size = PMC_LOG_BUFFER_SIZE; } for (domain = 0; domain < NDOMAINS; domain++) { - pmc_dom_hdrs[domain] = malloc_domain(sizeof(struct pmc_domain_buffer_header), M_PMC, domain, - M_WAITOK|M_ZERO); - mtx_init(&pmc_dom_hdrs[domain]->pdbh_mtx, "pmc_bufferlist_mtx", "pmc-leaf", MTX_SPIN); - TAILQ_INIT(&pmc_dom_hdrs[domain]->pdbh_head); - } - CPU_FOREACH(cpu) { - domain = getdomain(cpu); - KASSERT(pmc_dom_hdrs[domain] != NULL, ("no mem allocated for domain: %d", domain)); - pmc_dom_hdrs[domain]->pdbh_ncpus++; - } - for (domain = 0; domain < NDOMAINS; domain++) { int ncpus = pmc_dom_hdrs[domain]->pdbh_ncpus; int total = ncpus*pmc_nlogbuffers_pcpu; @@ -1231,12 +1200,10 @@ pmclog_shutdown() mtx_destroy(&pmc_kthread_mtx); for (domain = 0; domain < NDOMAINS; domain++) { - mtx_destroy(&pmc_dom_hdrs[domain]->pdbh_mtx); while ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL) { TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next); free(plb->plb_base, M_PMC); } free(pmc_dom_hdrs[domain]->pdbh_plbs, M_PMC); - free(pmc_dom_hdrs[domain], M_PMC); } } Modified: head/sys/dev/hwpmc/hwpmc_mod.c ============================================================================== --- head/sys/dev/hwpmc/hwpmc_mod.c Mon May 14 00:14:00 2018 (r333595) +++ head/sys/dev/hwpmc/hwpmc_mod.c Mon May 14 00:21:04 2018 (r333596) @@ -1564,12 +1564,14 @@ pmc_process_mmap(struct thread *td, struct pmckern_map const struct pmc_process *pp; freepath = fullpath = NULL; + epoch_exit(global_epoch); pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath); pid = td->td_proc->p_pid; + epoch_enter(global_epoch); /* Inform owners of all system-wide sampling PMCs. */ - LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) + CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_in(po, pid, pkm->pm_address, fullpath); @@ -1606,10 +1608,12 @@ pmc_process_munmap(struct thread *td, struct pmckern_m pid = td->td_proc->p_pid; - LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) + epoch_enter(global_epoch); + CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_out(po, pid, pkm->pm_address, pkm->pm_address + pkm->pm_size); + epoch_exit(global_epoch); if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) return; @@ -1631,7 +1635,7 @@ pmc_log_kernel_mappings(struct pmc *pm) struct pmc_owner *po; struct pmckern_map_in *km, *kmbase; - sx_assert(&pmc_sx, SX_LOCKED); + MPASS(in_epoch() || sx_xlocked(&pmc_sx)); KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] non-sampling PMC (%p) desires mapping information", __LINE__, (void *) pm)); @@ -1905,11 +1909,13 @@ pmc_hook_handler(struct thread *td, int function, void pk = (struct pmckern_procexec *) arg; + epoch_enter(global_epoch); /* Inform owners of SS mode PMCs of the exec event. */ - LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) + CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_procexec(po, PMC_ID_INVALID, p->p_pid, pk->pm_entryaddr, fullpath); + epoch_exit(global_epoch); PROC_LOCK(p); is_using_hwpmcs = p->p_flag & P_HWPMC; @@ -2033,12 +2039,12 @@ pmc_hook_handler(struct thread *td, int function, void break; case PMC_FN_MMAP: - sx_assert(&pmc_sx, SX_LOCKED); + MPASS(in_epoch() || sx_xlocked(&pmc_sx)); pmc_process_mmap(td, (struct pmckern_map_in *) arg); break; case PMC_FN_MUNMAP: - sx_assert(&pmc_sx, SX_LOCKED); + MPASS(in_epoch() || sx_xlocked(&pmc_sx)); pmc_process_munmap(td, (struct pmckern_map_out *) arg); break; @@ -2360,7 +2366,8 @@ pmc_release_pmc_descriptor(struct pmc *pm) po->po_sscount--; if (po->po_sscount == 0) { atomic_subtract_rel_int(&pmc_ss_count, 1); - LIST_REMOVE(po, po_ssnext); + CK_LIST_REMOVE(po, po_ssnext); + epoch_wait(global_epoch); } } @@ -2727,7 +2734,7 @@ pmc_start(struct pmc *pm) if (mode == PMC_MODE_SS) { if (po->po_sscount == 0) { - LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext); + CK_LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext); atomic_add_rel_int(&pmc_ss_count, 1); PMCDBG1(PMC,OPS,1, "po=%p in global list", po); } @@ -2854,7 +2861,8 @@ pmc_stop(struct pmc *pm) po->po_sscount--; if (po->po_sscount == 0) { atomic_subtract_rel_int(&pmc_ss_count, 1); - LIST_REMOVE(po, po_ssnext); + CK_LIST_REMOVE(po, po_ssnext); + epoch_wait(global_epoch); PMCDBG1(PMC,OPS,2,"po=%p removed from global list", po); } } @@ -2900,6 +2908,9 @@ pmc_syscall_handler(struct thread *td, void *syscall_a c = (struct pmc_syscall_args *)syscall_args; op = c->pmop_code; arg = c->pmop_data; + /* PMC isn't set up yet */ + if (pmc_hook == NULL) + return (EINVAL); if (op == PMC_OP_CONFIGURELOG) { /* * We cannot create the logging process inside @@ -2916,7 +2927,6 @@ pmc_syscall_handler(struct thread *td, void *syscall_a PMC_GET_SX_XLOCK(ENOSYS); is_sx_downgraded = 0; - PMCDBG3(MOD,PMS,1, "syscall op=%d \"%s\" arg=%p", op, pmc_op_to_name[op], arg); @@ -4482,9 +4492,11 @@ pmc_process_exit(void *arg __unused, struct proc *p) /* * Log a sysexit event to all SS PMC owners. */ - LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) + epoch_enter(global_epoch); + CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_sysexit(po, p->p_pid); + epoch_exit(global_epoch); if (!is_using_hwpmcs) return; @@ -4659,10 +4671,11 @@ pmc_process_fork(void *arg __unused, struct proc *p1, * If there are system-wide sampling PMCs active, we need to * log all fork events to their owner's logs. */ - - LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) + epoch_enter(global_epoch); + CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_procfork(po, p1->p_pid, newproc->p_pid); + epoch_exit(global_epoch); if (!is_using_hwpmcs) return; @@ -4728,21 +4741,19 @@ pmc_kld_load(void *arg __unused, linker_file_t lf) { struct pmc_owner *po; - sx_slock(&pmc_sx); - /* * Notify owners of system sampling PMCs about KLD operations. */ - LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) + epoch_enter(global_epoch); + CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_in(po, (pid_t) -1, (uintfptr_t) lf->address, lf->filename); + epoch_exit(global_epoch); /* * TODO: Notify owners of (all) process-sampling PMCs too. */ - - sx_sunlock(&pmc_sx); } static void @@ -4751,18 +4762,16 @@ pmc_kld_unload(void *arg __unused, const char *filenam { struct pmc_owner *po; - sx_slock(&pmc_sx); - - LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) + epoch_enter(global_epoch); + CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_out(po, (pid_t) -1, (uintfptr_t) address, (uintfptr_t) address + size); + epoch_exit(global_epoch); /* * TODO: Notify owners of process-sampling PMCs. */ - - sx_sunlock(&pmc_sx); } /* @@ -5064,6 +5073,7 @@ pmc_initialize(void) /* set hook functions */ pmc_intr = md->pmd_intr; + wmb(); pmc_hook = pmc_hook_handler; if (error == 0) { @@ -5282,6 +5292,3 @@ load (struct module *module __unused, int cmd, void *a return error; } - -/* memory pool */ -MALLOC_DEFINE(M_PMC, "pmc", "Memory space for the PMC module"); Modified: head/sys/kern/kern_pmc.c ============================================================================== --- head/sys/kern/kern_pmc.c Mon May 14 00:14:00 2018 (r333595) +++ head/sys/kern/kern_pmc.c Mon May 14 00:21:04 2018 (r333596) @@ -48,6 +48,10 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #include <sys/systm.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_kern.h> + #ifdef HWPMC_HOOKS FEATURE(hwpmc_hooks, "Kernel support for HW PMC"); #define PMC_KERNEL_VERSION PMC_VERSION @@ -58,6 +62,9 @@ FEATURE(hwpmc_hooks, "Kernel support for HW PMC"); MALLOC_DECLARE(M_PMCHOOKS); MALLOC_DEFINE(M_PMCHOOKS, "pmchooks", "Memory space for PMC hooks"); +/* memory pool */ +MALLOC_DEFINE(M_PMC, "pmc", "Memory space for the PMC module"); + const int pmc_kernel_version = PMC_KERNEL_VERSION; /* Hook variable. */ @@ -84,6 +91,7 @@ volatile int pmc_ss_count; * somewhat more expensive than a simple 'if' check and indirect call. */ struct sx pmc_sx; +SX_SYSINIT(pmcsx, &pmc_sx, "pmc-sx"); /* * PMC Soft per cpu trapframe. @@ -91,6 +99,11 @@ struct sx pmc_sx; struct trapframe pmc_tf[MAXCPU]; /* + * Per domain list of buffer headers + */ +__read_mostly struct pmc_domain_buffer_header *pmc_dom_hdrs[MAXMEMDOM]; + +/* * PMC Soft use a global table to store registered events. */ @@ -100,20 +113,12 @@ static int pmc_softevents = 16; SYSCTL_INT(_kern_hwpmc, OID_AUTO, softevents, CTLFLAG_RDTUN, &pmc_softevents, 0, "maximum number of soft events"); -struct mtx pmc_softs_mtx; int pmc_softs_count; struct pmc_soft **pmc_softs; +struct mtx pmc_softs_mtx; MTX_SYSINIT(pmc_soft_mtx, &pmc_softs_mtx, "pmc-softs", MTX_SPIN); -static void -pmc_init_sx(void) -{ - sx_init_flags(&pmc_sx, "pmc-sx", SX_NOWITNESS); -} - -SYSINIT(pmcsx, SI_SUB_LOCK, SI_ORDER_MIDDLE, pmc_init_sx, NULL); - /* * Helper functions. */ @@ -325,12 +330,30 @@ pmc_soft_ev_release(struct pmc_soft *ps) mtx_unlock_spin(&pmc_softs_mtx); } +#ifdef NUMA +#define NDOMAINS vm_ndomains + +static int +getdomain(int cpu) +{ + struct pcpu *pc; + + pc = pcpu_find(cpu); + return (pc->pc_domain); +} +#else +#define NDOMAINS 1 +#define malloc_domain(size, type, domain, flags) malloc((size), (type), (flags)) +#define getdomain(cpu) 0 +#endif /* * Initialise hwpmc. */ static void init_hwpmc(void *dummy __unused) { + int domain, cpu; + if (pmc_softevents <= 0 || pmc_softevents > PMC_EV_DYN_COUNT) { (void) printf("hwpmc: tunable \"softevents\"=%d out of " @@ -339,6 +362,19 @@ init_hwpmc(void *dummy __unused) } pmc_softs = malloc(pmc_softevents * sizeof(struct pmc_soft *), M_PMCHOOKS, M_NOWAIT|M_ZERO); KASSERT(pmc_softs != NULL, ("cannot allocate soft events table")); + + for (domain = 0; domain < NDOMAINS; domain++) { + pmc_dom_hdrs[domain] = malloc_domain(sizeof(struct pmc_domain_buffer_header), M_PMC, domain, + M_WAITOK|M_ZERO); + mtx_init(&pmc_dom_hdrs[domain]->pdbh_mtx, "pmc_bufferlist_mtx", "pmc-leaf", MTX_SPIN); + TAILQ_INIT(&pmc_dom_hdrs[domain]->pdbh_head); + } + CPU_FOREACH(cpu) { + domain = getdomain(cpu); + KASSERT(pmc_dom_hdrs[domain] != NULL, ("no mem allocated for domain: %d", domain)); + pmc_dom_hdrs[domain]->pdbh_ncpus++; + } + } SYSINIT(hwpmc, SI_SUB_KDTRACE, SI_ORDER_FIRST, init_hwpmc, NULL); Modified: head/sys/sys/pmc.h ============================================================================== --- head/sys/sys/pmc.h Mon May 14 00:14:00 2018 (r333595) +++ head/sys/sys/pmc.h Mon May 14 00:21:04 2018 (r333596) @@ -40,6 +40,8 @@ #include <sys/counter.h> #include <machine/pmc_mdep.h> #include <machine/profile.h> +#include <sys/epoch.h> +#include <ck_queue.h> #define PMC_MODULE_NAME "hwpmc" #define PMC_NAME_MAX 64 /* HW counter name size */ @@ -826,7 +828,7 @@ struct pmc_process { struct pmc_owner { LIST_ENTRY(pmc_owner) po_next; /* hash chain */ - LIST_ENTRY(pmc_owner) po_ssnext; /* list of SS PMC owners */ + CK_LIST_ENTRY(pmc_owner) po_ssnext; /* list of SS PMC owners */ LIST_HEAD(, pmc) po_pmcs; /* owned PMC list */ TAILQ_HEAD(, pmclog_buffer) po_logbuffers; /* (o) logbuffer list */ struct mtx po_mtx; /* spin lock for (o) */ Modified: head/sys/sys/pmckern.h ============================================================================== --- head/sys/sys/pmckern.h Mon May 14 00:14:00 2018 (r333595) +++ head/sys/sys/pmckern.h Mon May 14 00:21:04 2018 (r333596) @@ -157,6 +157,15 @@ struct pmc_soft { struct pmc_dyn_event_descr ps_ev; }; +struct pmclog_buffer; + +struct pmc_domain_buffer_header { + struct mtx pdbh_mtx; + TAILQ_HEAD(, pmclog_buffer) pdbh_head; + struct pmclog_buffer *pdbh_plbs; + int pdbh_ncpus; +} __aligned(CACHE_LINE_SIZE); + /* hook */ extern int (*pmc_hook)(struct thread *_td, int _function, void *_arg); extern int (*pmc_intr)(int _cpu, struct trapframe *_frame); @@ -176,16 +185,19 @@ extern const int pmc_kernel_version; /* PMC soft per cpu trapframe */ extern struct trapframe pmc_tf[MAXCPU]; +/* per domain buffer header list */ +extern struct pmc_domain_buffer_header *pmc_dom_hdrs[MAXMEMDOM]; + /* Quick check if preparatory work is necessary */ #define PMC_HOOK_INSTALLED(cmd) __predict_false(pmc_hook != NULL) /* Hook invocation; for use within the kernel */ #define PMC_CALL_HOOK(t, cmd, arg) \ do { \ - sx_slock(&pmc_sx); \ + epoch_enter(global_epoch); \ if (pmc_hook != NULL) \ (pmc_hook)((t), (cmd), (arg)); \ - sx_sunlock(&pmc_sx); \ + epoch_exit(global_epoch); \ } while (0) /* Hook invocation that needs an exclusive lock */ _______________________________________________ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"