On Mon, Jan 18, 2021 at 12:05:21PM +0800, Jin Yao wrote: SNIP
> diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c > index 8ce1479c98f0..5aba8fa92386 100644 > --- a/tools/perf/util/stat.c > +++ b/tools/perf/util/stat.c > @@ -13,6 +13,7 @@ > #include "evlist.h" > #include "evsel.h" > #include "thread_map.h" > +#include "hashmap.h" > #include <linux/zalloc.h> > > void update_stats(struct stats *stats, u64 val) > @@ -276,15 +277,27 @@ void evlist__save_aggr_prev_raw_counts(struct evlist > *evlist) > static void zero_per_pkg(struct evsel *counter) > { > if (counter->per_pkg_mask) > - memset(counter->per_pkg_mask, 0, cpu__max_cpu()); > + hashmap__clear(counter->per_pkg_mask); > +} > + > +static size_t pkg_id_hash(const void *key, void *ctx __maybe_unused) > +{ > + return (size_t)key & 0xffff; > +} > + > +static bool pkg_id_equal(const void *key1, const void *key2, > + void *ctx __maybe_unused) > +{ > + return (size_t)key1 == (size_t)key2; > } > > static int check_per_pkg(struct evsel *counter, > struct perf_counts_values *vals, int cpu, bool *skip) > { > - unsigned long *mask = counter->per_pkg_mask; > + struct hashmap *mask = counter->per_pkg_mask; > struct perf_cpu_map *cpus = evsel__cpus(counter); > - int s; > + int s, d, ret = 0; > + size_t key; please use uint32_t to make it obvious > > *skip = false; > > @@ -295,7 +308,7 @@ static int check_per_pkg(struct evsel *counter, > return 0; > > if (!mask) { > - mask = zalloc(cpu__max_cpu()); > + mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL); > if (!mask) > return -ENOMEM; > > @@ -317,8 +330,21 @@ static int check_per_pkg(struct evsel *counter, > if (s < 0) > return -1; > > - *skip = test_and_set_bit(s, mask) == 1; > - return 0; > + /* > + * On multi-die system, die_id > 0. On no-die system, die_id = 0. > + * We use hashmap(socket, die) to check the used socket+die pair. > + */ > + d = cpu_map__get_die(cpus, cpu, NULL).die; > + if (d < 0) > + return -1; > + > + key = (size_t)d << 16 | s; I'm not sure about the socket number bounds, but I guess we should at least check that it's not over the limit how hard would it be to allocate key values and keep the uint64_t? thanks, jirka