From: Andi Kleen <a...@linux.intel.com>

Restructure event enabling/disabling to use affinity, which
minimizes the number of IPIs needed.

Before on a large test case with 94 CPUs:

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 54.65    1.899986          22     84812       660 ioctl

after:

 39.21    0.930451          10     84796       644 ioctl

Signed-off-by: Andi Kleen <a...@linux.intel.com>
---
 tools/perf/lib/evsel.c              | 43 ++++++++++++++++++-------
 tools/perf/lib/include/perf/evsel.h |  2 ++
 tools/perf/util/evlist.c            | 50 ++++++++++++++++++++++++++---
 tools/perf/util/evsel.c             | 13 ++++++++
 tools/perf/util/evsel.h             |  2 ++
 5 files changed, 93 insertions(+), 17 deletions(-)

diff --git a/tools/perf/lib/evsel.c b/tools/perf/lib/evsel.c
index 5d23bf09e486..417d5c94bc01 100644
--- a/tools/perf/lib/evsel.c
+++ b/tools/perf/lib/evsel.c
@@ -198,38 +198,57 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, 
int thread,
 }
 
 static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
-                                int ioc,  void *arg)
+                                int ioc,  void *arg,
+                                int cpu)
 {
-       int cpu, thread;
+       int thread;
 
-       for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
-               for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
-                       int fd = FD(evsel, cpu, thread),
-                           err = ioctl(fd, ioc, arg);
+       for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
+               int fd = FD(evsel, cpu, thread),
+                   err = ioctl(fd, ioc, arg);
 
-                       if (err)
-                               return err;
-               }
+               if (err)
+                       return err;
        }
 
        return 0;
 }
 
+int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu)
+{
+       return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0, cpu);
+}
+
 int perf_evsel__enable(struct perf_evsel *evsel)
 {
-       return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0);
+       int i;
+       int err = 0;
+
+       for (i = 0; i < evsel->cpus->nr && !err; i++)
+               err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0, i);
+       return err;
+}
+
+int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu)
+{
+       return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0, cpu);
 }
 
 int perf_evsel__disable(struct perf_evsel *evsel)
 {
-       return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0);
+       int i;
+       int err = 0;
+
+       for (i = 0; i < evsel->cpus->nr && !err; i++)
+               err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0, 
i);
+       return err;
 }
 
 int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter)
 {
        return perf_evsel__run_ioctl(evsel,
                                     PERF_EVENT_IOC_SET_FILTER,
-                                    (void *)filter);
+                                    (void *)filter, -1);
 }
 
 struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
diff --git a/tools/perf/lib/include/perf/evsel.h 
b/tools/perf/lib/include/perf/evsel.h
index ed10a914cd3f..db31e512a120 100644
--- a/tools/perf/lib/include/perf/evsel.h
+++ b/tools/perf/lib/include/perf/evsel.h
@@ -32,7 +32,9 @@ LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel 
*evsel, int cpu);
 LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
                                 struct perf_counts_values *count);
 LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);
+LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
 LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel);
+LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
 LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
 LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel 
*evsel);
 LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index d9da9fe13933..66b34250c5fc 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -361,26 +361,66 @@ void evlist__cpu_iter_next(struct evsel *ev)
 void evlist__disable(struct evlist *evlist)
 {
        struct evsel *pos;
+       struct affinity affinity;
+       struct perf_cpu_map *cpus;
+       int i;
 
+       if (affinity__setup(&affinity) < 0)
+               return;
+
+       cpus = evlist__cpu_iter_start(evlist);
+       for (i = 0; i < cpus->nr; i++) {
+               int cpu = cpus->map[i];
+               affinity__set(&affinity, cpu);
+
+               evlist__for_each_entry(evlist, pos) {
+                       if (evlist__cpu_iter_skip(pos, cpu))
+                               continue;
+                       if (pos->disabled || !perf_evsel__is_group_leader(pos) 
|| !pos->core.fd)
+                               continue;
+                       evsel__disable_cpu(pos, pos->cpu_index);
+                       evlist__cpu_iter_next(pos);
+               }
+       }
+       affinity__cleanup(&affinity);
        evlist__for_each_entry(evlist, pos) {
-               if (pos->disabled || !perf_evsel__is_group_leader(pos) || 
!pos->core.fd)
+               if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
                        continue;
-               evsel__disable(pos);
+               pos->disabled = true;
        }
-
        evlist->enabled = false;
 }
 
 void evlist__enable(struct evlist *evlist)
 {
        struct evsel *pos;
+       struct affinity affinity;
+       struct perf_cpu_map *cpus;
+       int i;
+
+       if (affinity__setup(&affinity) < 0)
+               return;
+
+       cpus = evlist__cpu_iter_start(evlist);
+       for (i = 0; i < cpus->nr; i++) {
+               int cpu = cpus->map[i];
+               affinity__set(&affinity, cpu);
 
+               evlist__for_each_entry(evlist, pos) {
+                       if (evlist__cpu_iter_skip(pos, cpu))
+                               continue;
+                       if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
+                               continue;
+                       evsel__enable_cpu(pos, pos->cpu_index);
+                       evlist__cpu_iter_next(pos);
+               }
+       }
+       affinity__cleanup(&affinity);
        evlist__for_each_entry(evlist, pos) {
                if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
                        continue;
-               evsel__enable(pos);
+               pos->disabled = false;
        }
-
        evlist->enabled = true;
 }
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 394fceb4bf31..37387aa808ae 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1201,13 +1201,26 @@ int perf_evsel__append_addr_filter(struct evsel *evsel, 
const char *filter)
        return perf_evsel__append_filter(evsel, "%s,%s", filter);
 }
 
+/* Caller has to clear disabled after going through all CPUs. */
+int evsel__enable_cpu(struct evsel *evsel, int cpu)
+{
+       int err = perf_evsel__enable_cpu(&evsel->core, cpu);
+       return err;
+}
+
 int evsel__enable(struct evsel *evsel)
 {
        int err = perf_evsel__enable(&evsel->core);
 
        if (!err)
                evsel->disabled = false;
+       return err;
+}
 
+/* Caller has to set disabled after going through all CPUs. */
+int evsel__disable_cpu(struct evsel *evsel, int cpu)
+{
+       int err = perf_evsel__disable_cpu(&evsel->core, cpu);
        return err;
 }
 
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index ef87b76c0565..e3eef1a62f4b 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -306,8 +306,10 @@ int perf_evsel__set_filter(struct evsel *evsel, const char 
*filter);
 int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter);
 int perf_evsel__append_addr_filter(struct evsel *evsel,
                                   const char *filter);
+int evsel__enable_cpu(struct evsel *evsel, int cpu);
 int evsel__enable(struct evsel *evsel);
 int evsel__disable(struct evsel *evsel);
+int evsel__disable_cpu(struct evsel *evsel, int cpu);
 
 int perf_evsel__open_per_cpu(struct evsel *evsel,
                             struct perf_cpu_map *cpus,
-- 
2.21.0

Reply via email to