IMC(In-memory Collection Counters) does performance monitoring in two different modes, i.e accumulation mode(core-imc and thread-imc events), and trace mode(trace-imc events). A cpu thread can either be in accumulation-mode or trace-mode at a time and this is done via the LDBAR register in POWER architecture. The current design does not address the races between thread-imc and trace-imc events.
Patch implements a global id and lock to avoid the races between core, trace and thread imc events. With this global id-lock implementation, the system can either run core, thread or trace imc events at a time. i.e. to run any core-imc events, thread/trace imc events should not be enabled/monitored. Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com> --- arch/powerpc/perf/imc-pmu.c | 177 +++++++++++++++++++++++++++++++----- 1 file changed, 153 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index cb50a9e1fd2d..2e220f199530 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -44,6 +44,16 @@ static DEFINE_PER_CPU(u64 *, trace_imc_mem); static struct imc_pmu_ref *trace_imc_refc; static int trace_imc_mem_size; +/* + * Global data structure used to avoid races between thread, + * core and trace-imc + */ +static struct imc_pmu_ref imc_global_refc = { + .lock = __MUTEX_INITIALIZER(imc_global_refc.lock), + .id = 0, + .refc = 0, +}; + static struct imc_pmu *imc_event_to_pmu(struct perf_event *event) { return container_of(event->pmu, struct imc_pmu, pmu); @@ -759,6 +769,20 @@ static void core_imc_counters_release(struct perf_event *event) ref->refc = 0; } mutex_unlock(&ref->lock); + + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == IMC_DOMAIN_CORE) { + imc_global_refc.refc--; + /* + * If no other thread is running any core-imc + * event, set the global id to zero. + */ + if (imc_global_refc.refc <= 0) { + imc_global_refc.refc = 0; + imc_global_refc.id = 0; + } + } + mutex_unlock(&imc_global_refc.lock); } static int core_imc_event_init(struct perf_event *event) @@ -779,6 +803,22 @@ static int core_imc_event_init(struct perf_event *event) if (event->cpu < 0) return -EINVAL; + /* + * Take the global lock, and make sure + * no other thread is running any trace OR thread imc event + */ + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == 0) { + imc_global_refc.id = IMC_DOMAIN_CORE; + imc_global_refc.refc++; + } else if (imc_global_refc.id == IMC_DOMAIN_CORE) { + imc_global_refc.refc++; + } else { + mutex_unlock(&imc_global_refc.lock); + return -EBUSY; + } + mutex_unlock(&imc_global_refc.lock); + event->hw.idx = -1; pmu = imc_event_to_pmu(event); @@ -877,7 +917,16 @@ static int ppc_thread_imc_cpu_online(unsigned int cpu) static int ppc_thread_imc_cpu_offline(unsigned int cpu) { - mtspr(SPRN_LDBAR, 0); + /* + * Toggle the bit 0 of LDBAR. + * + * If bit 0 of LDBAR is unset, it will stop posting + * the counetr data to memory. + * For thread-imc, bit 0 of LDBAR will be set to 1 in the + * event_add function. So toggle this bit here, to stop the updates + * to memory in the cpu_offline path. + */ + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) ^ (1UL << 63))); return 0; } @@ -889,6 +938,24 @@ static int thread_imc_cpu_init(void) ppc_thread_imc_cpu_offline); } +static void thread_imc_counters_release(struct perf_event *event) +{ + + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == IMC_DOMAIN_THREAD) { + imc_global_refc.refc--; + /* + * If no other thread is running any thread-imc + * event, set the global id to zero. + */ + if (imc_global_refc.refc <= 0) { + imc_global_refc.refc = 0; + imc_global_refc.id = 0; + } + } + mutex_unlock(&imc_global_refc.lock); +} + static int thread_imc_event_init(struct perf_event *event) { u32 config = event->attr.config; @@ -905,6 +972,27 @@ static int thread_imc_event_init(struct perf_event *event) if (event->hw.sample_period) return -EINVAL; + mutex_lock(&imc_global_refc.lock); + /* + * Check if any other thread is running + * core-engine, if not set the global id to + * thread-imc. + */ + if (imc_global_refc.id == 0) { + imc_global_refc.id = IMC_DOMAIN_THREAD; + imc_global_refc.refc++; + } else if (imc_global_refc.id == IMC_DOMAIN_THREAD) { + /* + * Increase the ref count if the global id is + * set to thread-imc. + */ + imc_global_refc.refc++; + } else { + mutex_unlock(&imc_global_refc.lock); + return -EBUSY; + } + mutex_unlock(&imc_global_refc.lock); + event->hw.idx = -1; pmu = imc_event_to_pmu(event); @@ -917,6 +1005,7 @@ static int thread_imc_event_init(struct perf_event *event) return -EINVAL; event->pmu->task_ctx_nr = perf_sw_context; + event->destroy = thread_imc_counters_release; return 0; } @@ -1063,10 +1152,12 @@ static void thread_imc_event_del(struct perf_event *event, int flags) int core_id; struct imc_pmu_ref *ref; - mtspr(SPRN_LDBAR, 0); - core_id = smp_processor_id() / threads_per_core; ref = &core_imc_refc[core_id]; + if (!ref) { + pr_debug("imc: Failed to get event reference count\n"); + return; + } mutex_lock(&ref->lock); ref->refc--; @@ -1082,6 +1173,10 @@ static void thread_imc_event_del(struct perf_event *event, int flags) ref->refc = 0; } mutex_unlock(&ref->lock); + + /* Toggle bit 0 of LDBAR */ + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) ^ (1UL << 63))); + /* * Take a snapshot and calculate the delta and update * the event counter values. @@ -1133,7 +1228,8 @@ static int ppc_trace_imc_cpu_online(unsigned int cpu) static int ppc_trace_imc_cpu_offline(unsigned int cpu) { - mtspr(SPRN_LDBAR, 0); + /* Toggle bit 0 of LDBAR. */ + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) ^ (1UL << 63))); return 0; } @@ -1226,15 +1322,14 @@ static int trace_imc_event_add(struct perf_event *event, int flags) local_mem = get_trace_imc_event_base_addr(); ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE; - if (core_imc_refc) - ref = &core_imc_refc[core_id]; + /* trace-imc reference count */ + if (trace_imc_refc) + ref = &trace_imc_refc[core_id]; if (!ref) { - /* If core-imc is not enabled, use trace-imc reference count */ - if (trace_imc_refc) - ref = &trace_imc_refc[core_id]; - if (!ref) - return -EINVAL; + pr_debug("imc: Failed to get the event reference count\n"); + return -EINVAL; } + mtspr(SPRN_LDBAR, ldbar_value); mutex_lock(&ref->lock); if (ref->refc == 0) { @@ -1242,13 +1337,11 @@ static int trace_imc_event_add(struct perf_event *event, int flags) get_hard_smp_processor_id(smp_processor_id()))) { mutex_unlock(&ref->lock); pr_err("trace-imc: Unable to start the counters for core %d\n", core_id); - mtspr(SPRN_LDBAR, 0); return -EINVAL; } } ++ref->refc; mutex_unlock(&ref->lock); - return 0; } @@ -1274,16 +1367,13 @@ static void trace_imc_event_del(struct perf_event *event, int flags) int core_id = smp_processor_id() / threads_per_core; struct imc_pmu_ref *ref = NULL; - if (core_imc_refc) - ref = &core_imc_refc[core_id]; + if (trace_imc_refc) + ref = &trace_imc_refc[core_id]; if (!ref) { - /* If core-imc is not enabled, use trace-imc reference count */ - if (trace_imc_refc) - ref = &trace_imc_refc[core_id]; - if (!ref) - return; + pr_debug("imc: Failed to get event reference count\n"); + return; } - mtspr(SPRN_LDBAR, 0); + mutex_lock(&ref->lock); ref->refc--; if (ref->refc == 0) { @@ -1297,9 +1387,30 @@ static void trace_imc_event_del(struct perf_event *event, int flags) ref->refc = 0; } mutex_unlock(&ref->lock); + + /* Toggle bit 0 of LDBAR */ + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) ^ (1UL << 63))); + trace_imc_event_stop(event, flags); } +static void trace_imc_counters_release(struct perf_event *event) +{ + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == IMC_DOMAIN_TRACE) { + imc_global_refc.refc--; + /* + * If no other thread is running any trace-imc + * event, set the global id to zero. + */ + if (imc_global_refc.refc <= 0) { + imc_global_refc.refc = 0; + imc_global_refc.id = 0; + } + } + mutex_unlock(&imc_global_refc.lock); +} + static int trace_imc_event_init(struct perf_event *event) { struct task_struct *target; @@ -1314,10 +1425,28 @@ static int trace_imc_event_init(struct perf_event *event) if (event->attr.sample_period == 0) return -ENOENT; + /* + * Take the global lock, and make sure + * no other thread is running any core/thread imc + * event + */ + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == 0) { + imc_global_refc.id = IMC_DOMAIN_TRACE; + imc_global_refc.refc++; + } else if (imc_global_refc.id == IMC_DOMAIN_TRACE) { + imc_global_refc.refc++; + } else { + mutex_unlock(&imc_global_refc.lock); + return -EBUSY; + } + mutex_unlock(&imc_global_refc.lock); + event->hw.idx = -1; target = event->hw.target; event->pmu->task_ctx_nr = perf_hw_context; + event->destroy = trace_imc_counters_release; return 0; } @@ -1429,10 +1558,10 @@ static void cleanup_all_core_imc_memory(void) static void thread_imc_ldbar_disable(void *dummy) { /* - * By Zeroing LDBAR, we disable thread-imc - * updates. + * By toggling 0th bit of LDBAR, we disable thread-imc + * updates to memory. */ - mtspr(SPRN_LDBAR, 0); + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) ^ (1UL << 63))); } void thread_imc_disable(void) -- 2.20.1