From: Andi Kleen <a...@linux.intel.com> Restructure event reading to use affinity to minimize the number of IPIs needed.
Before on a large test case with 94 CPUs: % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 3.16 0.106079 4 22082 read After: 3.43 0.081295 3 22082 read Signed-off-by: Andi Kleen <a...@linux.intel.com> --- tools/perf/builtin-stat.c | 97 ++++++++++++++++++++++----------------- tools/perf/util/evsel.h | 1 + 2 files changed, 57 insertions(+), 41 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 651ee9ddf0a3..b52121bb28a3 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -264,15 +264,10 @@ static int read_single_counter(struct evsel *counter, int cpu, * Read out the results of a single counter: * do not aggregate counts across CPUs in system-wide mode */ -static int read_counter(struct evsel *counter, struct timespec *rs) +static int read_counter(struct evsel *counter, struct timespec *rs, int cpu) { int nthreads = perf_thread_map__nr(evsel_list->core.threads); - int ncpus, cpu, thread; - - if (target__has_cpu(&target) && !target__has_per_thread(&target)) - ncpus = perf_evsel__nr_cpus(counter); - else - ncpus = 1; + int thread; if (!counter->supported) return -ENOENT; @@ -281,40 +276,38 @@ static int read_counter(struct evsel *counter, struct timespec *rs) nthreads = 1; for (thread = 0; thread < nthreads; thread++) { - for (cpu = 0; cpu < ncpus; cpu++) { - struct perf_counts_values *count; - - count = perf_counts(counter->counts, cpu, thread); - - /* - * The leader's group read loads data into its group members - * (via perf_evsel__read_counter) and sets threir count->loaded. - */ - if (!perf_counts__is_loaded(counter->counts, cpu, thread) && - read_single_counter(counter, cpu, thread, rs)) { - counter->counts->scaled = -1; - perf_counts(counter->counts, cpu, thread)->ena = 0; - perf_counts(counter->counts, cpu, thread)->run = 0; - return -1; - } + struct perf_counts_values *count; - perf_counts__set_loaded(counter->counts, cpu, thread, false); + count = perf_counts(counter->counts, cpu, thread); - if (STAT_RECORD) { - if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { - pr_err("failed to write stat event\n"); - return -1; - } - } + /* + * The leader's group read loads data into its group members + * (via perf_evsel__read_counter) and sets threir count->loaded. + */ + if (!perf_counts__is_loaded(counter->counts, cpu, thread) && + read_single_counter(counter, cpu, thread, rs)) { + counter->counts->scaled = -1; + perf_counts(counter->counts, cpu, thread)->ena = 0; + perf_counts(counter->counts, cpu, thread)->run = 0; + return -1; + } + + perf_counts__set_loaded(counter->counts, cpu, thread, false); - if (verbose > 1) { - fprintf(stat_config.output, - "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", - perf_evsel__name(counter), - cpu, - count->val, count->ena, count->run); + if (STAT_RECORD) { + if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { + pr_err("failed to write stat event\n"); + return -1; } } + + if (verbose > 1) { + fprintf(stat_config.output, + "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", + perf_evsel__name(counter), + cpu, + count->val, count->ena, count->run); + } } return 0; @@ -323,15 +316,37 @@ static int read_counter(struct evsel *counter, struct timespec *rs) static void read_counters(struct timespec *rs) { struct evsel *counter; - int ret; + struct affinity affinity; + int i, ncpus; + struct perf_cpu_map *cpus; + + if (affinity__setup(&affinity) < 0) + return; + + cpus = evlist__cpu_iter_start(evsel_list); + + ncpus = cpus->nr; + if (!(target__has_cpu(&target) && !target__has_per_thread(&target))) + ncpus = 1; + for (i = 0; i < ncpus; i++) { + int cpu = cpus->map[i]; + affinity__set(&affinity, cpu); + + evlist__for_each_entry(evsel_list, counter) { + if (evlist__cpu_iter_skip(counter, cpu)) + continue; + counter->err = read_counter(counter, rs, counter->cpu_index); + evlist__cpu_iter_next(counter); + } + } + affinity__cleanup(&affinity); evlist__for_each_entry(evsel_list, counter) { - ret = read_counter(counter, rs); - if (ret) + if (counter->err) pr_debug("failed to read counter %s\n", counter->name); - - if (ret == 0 && perf_stat_process_counter(&stat_config, counter)) + if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter)) pr_warning("failed to process counter %s\n", counter->name); + counter->err = 0; } } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 28692abe1d42..ef87b76c0565 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -163,6 +163,7 @@ struct evsel { struct list_head config_terms; struct bpf_object *bpf_obj; int bpf_fd; + int err; bool auto_merge_stats; bool merged_stat; const char * metric_expr; -- 2.21.0