When decoding the perf_regs mask in regs_dump__printf(), we loop through the mask using find_first_bit and find_next_bit functions. "mask" is of type "u64", but sent as a "unsigned long *" to lib functions along with sizeof().
While the exisitng code works fine in most of the case, the logic is broken when using a 32bit perf on a 64bit kernel (Big Endian). When reading u64 using (u32 *)(&val)[0], perf (lib/find_*_bit()) assumes it gets lower 32bits of u64 which is wrong. Proposed fix is to swap the words of the u64 to handle this case. This is _not_ endianess swap. Suggested-by: Yury Norov <yno...@caviumnetworks.com> Reviewed-by: Yury Norov <yno...@caviumnetworks.com> Acked-by: Jiri Olsa <jo...@kernel.org> Cc: Yury Norov <yno...@caviumnetworks.com> Cc: Peter Zijlstra <pet...@infradead.org> Cc: Ingo Molnar <mi...@redhat.com> Cc: Arnaldo Carvalho de Melo <a...@kernel.org> Cc: Alexander Shishkin <alexander.shish...@linux.intel.com> Cc: Jiri Olsa <jo...@kernel.org> Cc: Adrian Hunter <adrian.hun...@intel.com> Cc: Kan Liang <kan.li...@intel.com> Cc: Wang Nan <wangn...@huawei.com> Cc: Michael Ellerman <m...@ellerman.id.au> Signed-off-by: Madhavan Srinivasan <ma...@linux.vnet.ibm.com> --- Fix already posted, but yet to be pulled in. This is needed for the subsequent patches. https://patchwork.kernel.org/patch/9285421/ tools/include/linux/bitmap.h | 2 ++ tools/lib/bitmap.c | 18 ++++++++++++++++++ tools/perf/builtin-script.c | 4 +++- tools/perf/util/session.c | 4 +++- 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 43c1c5021e4b..998ac95a8ddd 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -4,10 +4,12 @@ #include <string.h> #include <linux/bitops.h> #include <stdlib.h> +#include <limits.h> #define DECLARE_BITMAP(name,bits) \ unsigned long name[BITS_TO_LONGS(bits)] +void bitmap_from_u64(unsigned long *dst, u64 mask); int __bitmap_weight(const unsigned long *bitmap, int bits); void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, int bits); diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c index 38748b0e342f..21e17730c35f 100644 --- a/tools/lib/bitmap.c +++ b/tools/lib/bitmap.c @@ -73,3 +73,21 @@ int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, BITMAP_LAST_WORD_MASK(bits)); return result != 0; } + +/* + * bitmap_from_u64 - Check and swap words within u64. + * @mask: source bitmap + * @dst: destination bitmap + * + * In 32 bit big endian userspace on a 64bit kernel, 'unsigned long' is 32 bits. + * When reading u64 using (u32 *)(&val)[0] and (u32 *)(&val)[1], + * we will get wrong value for the mask. That is "(u32 *)(&val)[0]" + * gets upper 32 bits of u64, but perf may expect lower 32bits of u64. + */ +void bitmap_from_u64(unsigned long *dst, u64 mask) +{ + dst[0] = mask & ULONG_MAX; + + if (sizeof(mask) > sizeof(unsigned long)) + dst[1] = mask >> 32; +} diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 6b3c8b0d3276..db270b4f892a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -421,11 +421,13 @@ static void print_sample_iregs(struct perf_sample *sample, struct regs_dump *regs = &sample->intr_regs; uint64_t mask = attr->sample_regs_intr; unsigned i = 0, r; + DECLARE_BITMAP(_mask, 64); if (!regs) return; - for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { + bitmap_from_u64(_mask, mask); + for_each_set_bit(r, _mask, sizeof(mask) * 8) { u64 val = regs->regs[i++]; printf("%5s:0x%"PRIx64" ", perf_reg_name(r), val); } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5d61242a6e64..440a9fb2a6fb 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -944,8 +944,10 @@ static void branch_stack__printf(struct perf_sample *sample) static void regs_dump__printf(u64 mask, u64 *regs) { unsigned rid, i = 0; + DECLARE_BITMAP(_mask, 64); - for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) { + bitmap_from_u64(_mask, mask); + for_each_set_bit(rid, _mask, sizeof(mask) * 8) { u64 val = regs[i++]; printf(".... %-5s 0x%" PRIx64 "\n", -- 2.7.4