From: Andi Kleen <a...@linux.intel.com>

Closing a perf fd can also trigger an IPI to the target CPU.
Use the same affinity technique as we use for reading/enabling events
to closing to optimize the CPU transitions.

Before on a large test case with 94 CPUs:

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 32.56    3.085463          50     61483           close

After:

 10.54    0.735704          11     61485           close

Signed-off-by: Andi Kleen <a...@linux.intel.com>
---
 tools/perf/lib/evsel.c              | 27 +++++++++++++++++++------
 tools/perf/lib/include/perf/evsel.h |  1 +
 tools/perf/util/evlist.c            | 31 +++++++++++++++++++++++++++--
 tools/perf/util/evsel.h             |  1 +
 4 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/tools/perf/lib/evsel.c b/tools/perf/lib/evsel.c
index 5a89857b0381..ea775dacbd2d 100644
--- a/tools/perf/lib/evsel.c
+++ b/tools/perf/lib/evsel.c
@@ -114,16 +114,23 @@ int perf_evsel__open(struct perf_evsel *evsel, struct 
perf_cpu_map *cpus,
        return err;
 }
 
+static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
+{
+       int thread;
+
+       for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
+               if (FD(evsel, cpu, thread) >= 0)
+                       close(FD(evsel, cpu, thread));
+               FD(evsel, cpu, thread) = -1;
+       }
+}
+
 void perf_evsel__close_fd(struct perf_evsel *evsel)
 {
-       int cpu, thread;
+       int cpu;
 
        for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
-               for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
-                       if (FD(evsel, cpu, thread) >= 0)
-                               close(FD(evsel, cpu, thread));
-                       FD(evsel, cpu, thread) = -1;
-               }
+               perf_evsel__close_fd_cpu(evsel, cpu);
 }
 
 void perf_evsel__free_fd(struct perf_evsel *evsel)
@@ -141,6 +148,14 @@ void perf_evsel__close(struct perf_evsel *evsel)
        perf_evsel__free_fd(evsel);
 }
 
+void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu)
+{
+       if (evsel->fd == NULL)
+               return;
+
+       perf_evsel__close_fd_cpu(evsel, cpu);
+}
+
 int perf_evsel__read_size(struct perf_evsel *evsel)
 {
        u64 read_format = evsel->attr.read_format;
diff --git a/tools/perf/lib/include/perf/evsel.h 
b/tools/perf/lib/include/perf/evsel.h
index 4388667f265c..ed10a914cd3f 100644
--- a/tools/perf/lib/include/perf/evsel.h
+++ b/tools/perf/lib/include/perf/evsel.h
@@ -28,6 +28,7 @@ LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel);
 LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map 
*cpus,
                                 struct perf_thread_map *threads);
 LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel);
+LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
 LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
                                 struct perf_counts_values *count);
 LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 27b4b958eddd..b1b29d473a9f 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -18,6 +18,7 @@
 #include "debug.h"
 #include "units.h"
 #include <internal/lib.h> // page_size
+#include "affinity.h"
 #include "../perf.h"
 #include "asm/bug.h"
 #include "bpf-event.h"
@@ -1174,9 +1175,35 @@ void perf_evlist__set_selected(struct evlist *evlist,
 void evlist__close(struct evlist *evlist)
 {
        struct evsel *evsel;
+       struct affinity affinity;
+       struct perf_cpu_map *cpus;
+       int i;
+
+       /* So far record doesn't set this up */
+       if (!evlist->core.cpus) {
+               evlist__for_each_entry_reverse(evlist, evsel)
+                       evsel__close(evsel);
+               return;
+       }
 
-       evlist__for_each_entry_reverse(evlist, evsel)
-               evsel__close(evsel);
+       if (affinity__setup(&affinity) < 0)
+               return;
+       cpus = evlist__cpu_iter_start(evlist);
+       for (i = 0; i < cpus->nr; i++) {
+               int cpu = cpus->map[i];
+               affinity__set(&affinity, cpu);
+
+               evlist__for_each_entry_reverse(evlist, evsel) {
+                       if (evlist__cpu_iter_skip(evsel, cpu))
+                           continue;
+                       perf_evsel__close_cpu(&evsel->core, evsel->cpu_index);
+                       evlist__cpu_iter_next(evsel);
+               }
+       }
+       evlist__for_each_entry_reverse(evlist, evsel) {
+               perf_evsel__free_fd(&evsel->core);
+               perf_evsel__free_id(&evsel->core);
+       }
 }
 
 static int perf_evlist__create_syswide_maps(struct evlist *evlist)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index cf90019ae744..2e3b011ed09e 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -391,4 +391,5 @@ static inline bool evsel__has_callchain(const struct evsel 
*evsel)
 struct perf_env *perf_evsel__env(struct evsel *evsel);
 
 int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
+
 #endif /* __PERF_EVSEL_H */
-- 
2.21.0

Reply via email to