With this patch, we can use the coresum event qualifier in perf-stat.

root@skl:/tmp# perf stat -e 
cpu/event=0,umask=0x3,coresum=1/,cpu/event=0,umask=0x3/ -a -A -I1000
 #           time CPU                  counts      unit events
     1.000816660 S0-C0             80,676,154      
cpu/event=0,umask=0x3,coresum=1/                                     (50.04%)
     1.000816660 S0-C1            115,314,959      
cpu/event=0,umask=0x3,coresum=1/                                     (50.04%)
     1.000816660 S0-C2            126,541,249      
cpu/event=0,umask=0x3,coresum=1/                                     (50.04%)
     1.000816660 S0-C3            119,950,015      
cpu/event=0,umask=0x3,coresum=1/                                     (50.04%)
     1.000816660 CPU0              52,439,489      cpu/event=0,umask=0x3/       
                                 (49.96%)
     1.000816660 CPU1              53,431,155      cpu/event=0,umask=0x3/       
                                 (49.96%)
     1.000816660 CPU2              91,192,070      cpu/event=0,umask=0x3/       
                                 (49.96%)
     1.000816660 CPU3              90,852,159      cpu/event=0,umask=0x3/       
                                 (49.96%)
     1.000816660 CPU4              29,715,956      cpu/event=0,umask=0x3/       
                                 (49.96%)
     1.000816660 CPU5              63,289,507      cpu/event=0,umask=0x3/       
                                 (49.96%)
     1.000816660 CPU6              29,036,120      cpu/event=0,umask=0x3/       
                                 (49.96%)
     1.000816660 CPU7              28,996,591      cpu/event=0,umask=0x3/       
                                 (49.97%)

In this example, we count the event 'ref-cycles' per-core and per-CPU in
one perf stat command-line. From the output, we can see:

  S0-C0 = CPU0 + CPU4
  S0-C1 = CPU1 + CPU5
  S0-C2 = CPU2 + CPU6
  S0-C3 = CPU3 + CPU7

So the result is expected (tiny difference is ignored).

Signed-off-by: Jin Yao <yao....@linux.intel.com>
---
 tools/perf/Documentation/perf-stat.txt |   4 ++
 tools/perf/builtin-stat.c              |  21 +++++++
 tools/perf/util/stat-display.c         | 108 ++++++++++++++++++++++++---------
 tools/perf/util/stat.c                 |   8 ++-
 4 files changed, 108 insertions(+), 33 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt 
b/tools/perf/Documentation/perf-stat.txt
index 4bc2085..cd5fbd9 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -43,6 +43,10 @@ report::
          param1 and param2 are defined as formats for the PMU in
          /sys/bus/event_source/devices/<pmu>/format/*
 
+         'coresum' is a event qualifier that sums up the event counts for both
+         hardware threads in a core. For example:
+         perf stat -A -a -e cpu/event,coresum=1/,otherevent ...
+
        - a symbolically formed event like 'pmu/config=M,config1=N,config2=K/'
          where M, N, K are numbers (in decimal, hex, octal format).
          Acceptable values for each of 'config', 'config1' and 'config2'
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 7b8f09b..8ce0168 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -832,6 +832,18 @@ static int perf_stat__get_core_cached(struct 
perf_stat_config *config,
        return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
 }
 
+static bool term_coresum_set(void)
+{
+       struct perf_evsel *counter;
+
+       evlist__for_each_entry(evsel_list, counter) {
+               if (counter->coresum)
+                       return true;
+       }
+
+       return false;
+}
+
 static int perf_stat_init_aggr_mode(void)
 {
        int nr;
@@ -852,6 +864,15 @@ static int perf_stat_init_aggr_mode(void)
                stat_config.aggr_get_id = perf_stat__get_core_cached;
                break;
        case AGGR_NONE:
+               if (term_coresum_set()) {
+                       if (cpu_map__build_core_map(evsel_list->cpus,
+                                                   &stat_config.aggr_map)) {
+                               perror("cannot build core map");
+                               return -1;
+                       }
+                       stat_config.aggr_get_id = perf_stat__get_core_cached;
+               }
+               break;
        case AGGR_GLOBAL:
        case AGGR_THREAD:
        case AGGR_UNSET:
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 6d043c7..23c9368 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -93,9 +93,17 @@ static void aggr_printout(struct perf_stat_config *config,
                        config->csv_sep);
                        break;
        case AGGR_NONE:
-               fprintf(config->output, "CPU%*d%s",
-                       config->csv_output ? 0 : -4,
-                       perf_evsel__cpus(evsel)->map[id], config->csv_sep);
+               if (evsel->coresum) {
+                       fprintf(config->output, "S%d-C%*d%s",
+                               cpu_map__id_to_socket(id),
+                               config->csv_output ? 0 : -5,
+                               cpu_map__id_to_cpu(id), config->csv_sep);
+               } else {
+                       fprintf(config->output, "CPU%*d%s ",
+                               config->csv_output ? 0 : -5,
+                               perf_evsel__cpus(evsel)->map[id],
+                               config->csv_sep);
+               }
                break;
        case AGGR_THREAD:
                fprintf(config->output, "%*s-%*d%s",
@@ -599,6 +607,41 @@ static void aggr_cb(struct perf_stat_config *config,
        }
 }
 
+static void print_counter_aggrdata(struct perf_stat_config *config,
+                                  struct perf_evsel *counter, int s,
+                                  char *prefix, bool metric_only,
+                                  bool *first)
+{
+       struct aggr_data ad;
+       FILE *output = config->output;
+       u64 ena, run, val;
+       int id, nr;
+       double uval;
+
+       ad.id = id = config->aggr_map->map[s];
+       ad.val = ad.ena = ad.run = 0;
+       ad.nr = 0;
+       if (!collect_data(config, counter, aggr_cb, &ad))
+               return;
+
+       nr = ad.nr;
+       ena = ad.ena;
+       run = ad.run;
+       val = ad.val;
+       if (*first && metric_only) {
+               *first = false;
+               aggr_printout(config, counter, id, nr);
+       }
+       if (prefix && !metric_only)
+               fprintf(output, "%s", prefix);
+
+       uval = val * counter->scale;
+       printout(config, id, nr, counter, uval, prefix,
+                run, ena, 1.0, &rt_stat);
+       if (!metric_only)
+               fputc('\n', output);
+}
+
 static void print_aggr(struct perf_stat_config *config,
                       struct perf_evlist *evlist,
                       char *prefix)
@@ -606,9 +649,7 @@ static void print_aggr(struct perf_stat_config *config,
        bool metric_only = config->metric_only;
        FILE *output = config->output;
        struct perf_evsel *counter;
-       int s, id, nr;
-       double uval;
-       u64 ena, run, val;
+       int s;
        bool first;
 
        if (!(config->aggr_map || config->aggr_get_id))
@@ -621,36 +662,16 @@ static void print_aggr(struct perf_stat_config *config,
         * Without each counter has its own line.
         */
        for (s = 0; s < config->aggr_map->nr; s++) {
-               struct aggr_data ad;
                if (prefix && metric_only)
                        fprintf(output, "%s", prefix);
 
-               ad.id = id = config->aggr_map->map[s];
                first = true;
                evlist__for_each_entry(evlist, counter) {
                        if (is_duration_time(counter))
                                continue;
-
-                       ad.val = ad.ena = ad.run = 0;
-                       ad.nr = 0;
-                       if (!collect_data(config, counter, aggr_cb, &ad))
-                               continue;
-                       nr = ad.nr;
-                       ena = ad.ena;
-                       run = ad.run;
-                       val = ad.val;
-                       if (first && metric_only) {
-                               first = false;
-                               aggr_printout(config, counter, id, nr);
-                       }
-                       if (prefix && !metric_only)
-                               fprintf(output, "%s", prefix);
-
-                       uval = val * counter->scale;
-                       printout(config, id, nr, counter, uval, prefix,
-                                run, ena, 1.0, &rt_stat);
-                       if (!metric_only)
-                               fputc('\n', output);
+                       print_counter_aggrdata(config, counter, s,
+                                              prefix, metric_only,
+                                              &first);
                }
                if (metric_only)
                        fputc('\n', output);
@@ -1101,6 +1122,30 @@ static void print_footer(struct perf_stat_config *config)
                        "the same PMU. Try reorganizing the group.\n");
 }
 
+static void print_coresum(struct perf_stat_config *config,
+                         struct perf_evsel *counter, char *prefix)
+{
+       bool metric_only = config->metric_only;
+       FILE *output = config->output;
+       int s;
+       bool first = true;
+
+       if (!(config->aggr_map || config->aggr_get_id))
+               return;
+
+       for (s = 0; s < config->aggr_map->nr; s++) {
+               if (prefix && metric_only)
+                       fprintf(output, "%s", prefix);
+
+               print_counter_aggrdata(config, counter, s,
+                                      prefix, metric_only,
+                                      &first);
+       }
+
+       if (metric_only)
+               fputc('\n', output);
+}
+
 void
 perf_evlist__print_counters(struct perf_evlist *evlist,
                            struct perf_stat_config *config,
@@ -1157,7 +1202,10 @@ perf_evlist__print_counters(struct perf_evlist *evlist,
                        evlist__for_each_entry(evlist, counter) {
                                if (is_duration_time(counter))
                                        continue;
-                               print_counter(config, counter, prefix);
+                               if (counter->coresum)
+                                       print_coresum(config, counter, prefix);
+                               else
+                                       print_counter(config, counter, prefix);
                        }
                }
                break;
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 4d40515..33a6e3c 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -277,9 +277,11 @@ process_counter_values(struct perf_stat_config *config, 
struct perf_evsel *evsel
                if (!evsel->snapshot)
                        perf_evsel__compute_deltas(evsel, cpu, thread, count);
                perf_counts_values__scale(count, config->scale, NULL);
-               if (config->aggr_mode == AGGR_NONE)
-                       perf_stat__update_shadow_stats(evsel, count->val, cpu,
-                                                      &rt_stat);
+               if ((config->aggr_mode == AGGR_NONE) && (!evsel->coresum)) {
+                       perf_stat__update_shadow_stats(evsel, count->val,
+                                                      cpu, &rt_stat);
+               }
+
                if (config->aggr_mode == AGGR_THREAD) {
                        if (config->stats)
                                perf_stat__update_shadow_stats(evsel,
-- 
2.7.4

Reply via email to