From: Andi Kleen <a...@linux.intel.com> pmu.c does a lot of redundant /sys accesses while parsing aliases and probing for PMUs. On large systems with a lot of PMUs this can get expensive (>2s):
% time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 27.25 1.227847 8 160888 16976 openat 26.42 1.190481 7 164224 164077 stat Add a cache to remember if specific file names exist or don't exist, which eliminates most of this overhead. Also optimize some stat() calls to be slightly cheaper access() Resulting in: 0.18 0.004166 2 1851 305 open 0.08 0.001970 2 829 622 access Signed-off-by: Andi Kleen <a...@linux.intel.com> --- tools/perf/util/Build | 1 + tools/perf/util/fncache.c | 52 ++++++++++++++++++++++++++++++++++++ tools/perf/util/fncache.h | 8 ++++++ tools/perf/util/pmu.c | 55 ++++++++++++++++++++++++--------------- tools/perf/util/srccode.c | 9 +------ 5 files changed, 96 insertions(+), 29 deletions(-) create mode 100644 tools/perf/util/fncache.c create mode 100644 tools/perf/util/fncache.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 0b4d8e0d474c..5477f6afe735 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -46,6 +46,7 @@ perf-y += header.o perf-y += callchain.o perf-y += values.o perf-y += debug.o +perf-y += fncache.o perf-y += machine.o perf-y += map.o perf-y += pstack.o diff --git a/tools/perf/util/fncache.c b/tools/perf/util/fncache.c new file mode 100644 index 000000000000..0e6e2370b3af --- /dev/null +++ b/tools/perf/util/fncache.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Manage a cache of file names' existence */ +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <linux/list.h> +#include "fncache.h" + +struct fncache { + struct hlist_node nd; + bool res; + char name[]; +}; + +#define FNHSIZE 61 + +static struct hlist_head fncache_hash[FNHSIZE]; + +unsigned shash(const unsigned char *s) +{ + unsigned h = 0; + while (*s) + h = 65599 * h + *s++; + return h ^ (h >> 16); +} + +bool lookup_fncache(const char *name, bool *res) +{ + int h = shash((const unsigned char *)name) % FNHSIZE; + struct fncache *n; + + hlist_for_each_entry (n, &fncache_hash[h], nd) { + if (!strcmp(n->name, name)) { + *res = n->res; + return true; + } + } + return false; +} + +/* No LRU, only use when bounded in some other way. */ +void update_fncache(const char *name, bool res) +{ + struct fncache *n = malloc(sizeof(struct fncache) + strlen(name) + 1); + int h = shash((const unsigned char *)name) % FNHSIZE; + + if (!n) + return; + strcpy(n->name, name); + n->res = res; + hlist_add_head(&n->nd, &fncache_hash[h]); +} diff --git a/tools/perf/util/fncache.h b/tools/perf/util/fncache.h new file mode 100644 index 000000000000..93ca473f5357 --- /dev/null +++ b/tools/perf/util/fncache.h @@ -0,0 +1,8 @@ +#ifndef _FCACHE_H +#define _FCACHE_H 1 + +unsigned shash(const unsigned char *s); +void update_fncache(const char *name, bool res); +bool lookup_fncache(const char *name, bool *res); + +#endif diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index fb597fa94234..382cf335b19b 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -25,6 +25,7 @@ #include "pmu-events/pmu-events.h" #include "string2.h" #include "strbuf.h" +#include "fncache.h" struct perf_pmu_format { char *name; @@ -83,9 +84,9 @@ int perf_pmu__format_parse(char *dir, struct list_head *head) */ static int pmu_format(const char *name, struct list_head *format) { - struct stat st; char path[PATH_MAX]; const char *sysfs = sysfs__mountpoint(); + bool res = false; if (!sysfs) return -1; @@ -93,8 +94,12 @@ static int pmu_format(const char *name, struct list_head *format) snprintf(path, PATH_MAX, "%s" EVENT_SOURCE_DEVICE_PATH "%s/format", sysfs, name); - if (stat(path, &st) < 0) + if (lookup_fncache(path, &res) && !res) + return 0; + + if (!res && access(path, R_OK) < 0) return 0; /* no error if format does not exist */ + update_fncache(path, true); if (perf_pmu__format_parse(path, format)) return -1; @@ -243,7 +248,7 @@ static void perf_pmu_assign_str(char *name, const char *field, char **old_str, goto set_new; if (*new_str) { /* Have new string, check with old */ - if (strcasecmp(*old_str, *new_str)) + if (strcasecmp(*old_str, *new_str) && 0) pr_debug("alias %s differs in field '%s'\n", name, field); zfree(old_str); @@ -471,9 +476,9 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) */ static int pmu_aliases(const char *name, struct list_head *head) { - struct stat st; char path[PATH_MAX]; const char *sysfs = sysfs__mountpoint(); + bool res = false; if (!sysfs) return -1; @@ -481,8 +486,11 @@ static int pmu_aliases(const char *name, struct list_head *head) snprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/events", sysfs, name); - if (stat(path, &st) < 0) - return 0; /* no error if 'events' does not exist */ + if (lookup_fncache(path, &res) && !res) + return 0; + if (!res && access(path, R_OK) < 0) + return 0; + update_fncache(path, true); if (pmu_aliases_parse(path, head)) return -1; @@ -521,7 +529,6 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, */ static int pmu_type(const char *name, __u32 *type) { - struct stat st; char path[PATH_MAX]; FILE *file; int ret = 0; @@ -533,7 +540,7 @@ static int pmu_type(const char *name, __u32 *type) snprintf(path, PATH_MAX, "%s" EVENT_SOURCE_DEVICE_PATH "%s/type", sysfs, name); - if (stat(path, &st) < 0) + if (access(path, R_OK) < 0) return -1; file = fopen(path, "r"); @@ -624,14 +631,16 @@ static struct perf_cpu_map *pmu_cpumask(const char *name) static bool pmu_is_uncore(const char *name) { char path[PATH_MAX]; - struct perf_cpu_map *cpus; - const char *sysfs = sysfs__mountpoint(); + const char *sysfs; + bool res; + sysfs = sysfs__mountpoint(); snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name); - cpus = __pmu_cpumask(path); - perf_cpu_map__put(cpus); - - return !!cpus; + if (lookup_fncache(path, &res)) + return res; + res = access(path, R_OK) == 0; + update_fncache(path, res); + return res; } /* @@ -641,9 +650,9 @@ static bool pmu_is_uncore(const char *name) */ static int is_arm_pmu_core(const char *name) { - struct stat st; char path[PATH_MAX]; const char *sysfs = sysfs__mountpoint(); + bool res; if (!sysfs) return 0; @@ -651,10 +660,11 @@ static int is_arm_pmu_core(const char *name) /* Look for cpu sysfs (specific to arm) */ scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus", sysfs, name); - if (stat(path, &st) == 0) - return 1; - - return 0; + if (lookup_fncache(path, &res)) + return res; + res = access(path, R_OK) == 0; + update_fncache(path, res); + return res; } static char *perf_pmu__getcpuid(struct perf_pmu *pmu) @@ -1520,9 +1530,9 @@ bool pmu_have_event(const char *pname, const char *name) static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name) { - struct stat st; char path[PATH_MAX]; const char *sysfs; + bool res = false; sysfs = sysfs__mountpoint(); if (!sysfs) @@ -1531,8 +1541,11 @@ static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name) snprintf(path, PATH_MAX, "%s" EVENT_SOURCE_DEVICE_PATH "%s/%s", sysfs, pmu->name, name); - if (stat(path, &st) < 0) + if (lookup_fncache(path, &res) && !res) + return NULL; + if (!res && access(path, R_OK) < 0) return NULL; + update_fncache(path, true); return fopen(path, "r"); } diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c index adfcf1ff464c..7451b38c326e 100644 --- a/tools/perf/util/srccode.c +++ b/tools/perf/util/srccode.c @@ -16,6 +16,7 @@ #include "srccode.h" #include "debug.h" #include "util.h" +#include "fncache.h" #define MAXSRCCACHE (32*1024*1024) #define MAXSRCFILES 64 @@ -36,14 +37,6 @@ static LIST_HEAD(srcfile_list); static long map_total_sz; static int num_srcfiles; -static unsigned shash(unsigned char *s) -{ - unsigned h = 0; - while (*s) - h = 65599 * h + *s++; - return h ^ (h >> 16); -} - static int countlines(char *map, int maplen) { int numl; -- 2.21.0