I found that order of events in a group impacts performance during the
open.  If a group has a software event as a leader and has other
hardware events, the lead needs to be moved to a hardware context.
This includes RCU synchronization which takes about 20 msec on my
system.  And this is just for a single group, so total time increases
in proportion to the number of event groups and the number of cpus.

On my 36 cpu system, opening 3 groups system-wide takes more than 2
seconds.  You can see and compare it easily with the following:

  $ time ./perf stat -a -e '{cs,cycles},{cs,cycles},{cs,cycles}' sleep 1
  ...
       1.006333430 seconds time elapsed

  real  0m3.969s
  user  0m0.089s
  sys   0m0.074s

  $ time ./perf stat -a -e '{cycles,cs},{cycles,cs},{cycles,cs}' sleep 1
  ...
       1.006755292 seconds time elapsed

  real  0m1.144s
  user  0m0.067s
  sys   0m0.083s

This patch just added a warning before running it.  I'd really want to
fix the kernel if possible but don't have a good idea.  Thoughts?

Signed-off-by: Namhyung Kim <namhy...@kernel.org>
---
 tools/perf/builtin-record.c |  2 +
 tools/perf/builtin-stat.c   |  2 +
 tools/perf/builtin-top.c    |  2 +
 tools/perf/util/evlist.c    | 78 +++++++++++++++++++++++++++++++++++++
 tools/perf/util/evlist.h    |  1 +
 5 files changed, 85 insertions(+)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index adf311d15d3d..c0b08cacbae0 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -912,6 +912,8 @@ static int record__open(struct record *rec)
 
        perf_evlist__config(evlist, opts, &callchain_param);
 
+       evlist__warn_mixed_group(evlist);
+
        evlist__for_each_entry(evlist, pos) {
 try_again:
                if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index b01af171d94f..d5d4e02bda69 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -738,6 +738,8 @@ static int __run_perf_stat(int argc, const char **argv, int 
run_idx)
        if (affinity__setup(&affinity) < 0)
                return -1;
 
+       evlist__warn_mixed_group(evsel_list);
+
        evlist__for_each_cpu (evsel_list, i, cpu) {
                affinity__set(&affinity, cpu);
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 7c64134472c7..9ad319cea948 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1027,6 +1027,8 @@ static int perf_top__start_counters(struct perf_top *top)
 
        perf_evlist__config(evlist, opts, &callchain_param);
 
+       evlist__warn_mixed_group(evlist);
+
        evlist__for_each_entry(evlist, counter) {
 try_again:
                if (evsel__open(counter, top->evlist->core.cpus,
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 8bdf3d2c907c..02cff39e509e 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -28,6 +28,7 @@
 #include <unistd.h>
 #include <sched.h>
 #include <stdlib.h>
+#include <dirent.h>
 
 #include "parse-events.h"
 #include <subcmd/parse-options.h>
@@ -1980,3 +1981,80 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, 
int idx)
        }
        return NULL;
 }
+
+static int *sw_types;
+static int nr_sw_types;
+
+static void collect_software_pmu_types(void)
+{
+       const char *known_sw_pmu[] = {
+               "software", "tracepoint", "breakpoint", "kprobe", "uprobe", 
"msr"
+       };
+       DIR *dir;
+       struct dirent *d;
+       char path[PATH_MAX];
+       int i;
+
+       if (sw_types != NULL)
+               return;
+
+       nr_sw_types = ARRAY_SIZE(known_sw_pmu);
+       sw_types = calloc(nr_sw_types, sizeof(int));
+       if (sw_types == NULL) {
+               pr_err("Memory allocation failed!\n");
+               return;
+       }
+
+       dir = opendir("/sys/bus/event_source/devices");
+       while ((d = readdir(dir)) != NULL) {
+               for (i = 0; i < nr_sw_types; i++) {
+                       if (strcmp(d->d_name, known_sw_pmu[i]))
+                               continue;
+
+                       snprintf(path, sizeof(path), "%s/%s/type",
+                                "bus/event_source/devices", d->d_name);
+                       sysfs__read_int(path, &sw_types[i]);
+               }
+       }
+       closedir(dir);
+}
+
+static bool is_software_event(struct evsel *evsel)
+{
+       int i;
+
+       for (i = 0; i < nr_sw_types; i++) {
+               if (evsel->core.attr.type == (unsigned)sw_types[i])
+                       return true;
+       }
+       return false;
+}
+
+void evlist__warn_mixed_group(struct evlist *evlist)
+{
+       struct evsel *leader, *evsel;
+       bool warn = true;
+
+       collect_software_pmu_types();
+
+       /* Warn if an event group has a sw leader and hw siblings */
+       evlist__for_each_entry(evlist, leader) {
+               if (!evsel__is_group_event(leader))
+                       continue;
+
+               if (!is_software_event(leader))
+                       continue;
+
+               for_each_group_member(evsel, leader) {
+                       if (is_software_event(evsel))
+                               continue;
+                       if (!warn)
+                               continue;
+
+                       pr_warning("WARNING: Event group has mixed hw/sw 
events.\n"
+                                  "This will slow down the perf_event_open 
syscall.\n"
+                                  "Consider putting a hw event as a 
leader.\n\n");
+                       warn = false;
+               }
+       }
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index e1a450322bc5..a5b0a1d03a00 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -387,4 +387,5 @@ int evlist__ctlfd_ack(struct evlist *evlist);
 #define EVLIST_DISABLED_MSG "Events disabled\n"
 
 struct evsel *evlist__find_evsel(struct evlist *evlist, int idx);
+void evlist__warn_mixed_group(struct evlist *evlist);
 #endif /* __PERF_EVLIST_H */
-- 
2.29.0.rc1.297.gfa9743e501-goog

Reply via email to