Michael Neuling [mi...@neuling.org] wrote: | > I am working on implementing the 'perf mem' command for Power | > systems. This would for instance, let us know where in the memory | > hierarchy (L1, L2, Local RAM etc) the data for a load/store | > instruction was found (hit). | > | > On Power7, if the mcmcra[DCACHE_MISS] is clear _and_ the | > instruction is a load/store, then it implies a L1-hit. | > | > Unlike on Power8, the Power7 event vector has no indication | > if the instruction was load/store. | > | > In the context of a PMU interrupt, is there any way to determine | > if an instruction is a load/store ? | | You could read the instruction from memory and work it out. | | We do something similar to this in power_pmu_bhrb_to() where we read the | instruction and work out where the branch is going to. | | If you do this, please use and/or extend the functions in | arch/powerpc/lib/code-patching.c
Here is a draft of what I could come up with. With this patch, the number of L1 hits on Power7 matches that on Power8 for one application. But, wondering if there is a more efficient way to do this - there are over 50 flavors of load and store! (btw, I will resend my whole patchset after some time-off). --- >From db90cd382f4c1c0d84a0cfb07c9ffdb05d529456 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com> Date: Fri, 23 Aug 2013 18:35:02 -0700 Subject: [PATCH 1/1] Try to detect load/store instruction on Power7 Signed-off-by: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com> --- arch/powerpc/include/asm/code-patching.h | 1 + arch/powerpc/lib/code-patching.c | 97 ++++++++++++++++++++++++++++++ arch/powerpc/perf/power7-pmu.c | 21 +++++++ 3 files changed, 119 insertions(+) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index a6f8c7a..3e47fe0 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -34,6 +34,7 @@ int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr); unsigned long branch_target(const unsigned int *instr); unsigned int translate_branch(const unsigned int *dest, const unsigned int *src); +int instr_is_load_store(const unsigned int *instr); static inline unsigned long ppc_function_entry(void *func) { diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 17e5b23..10e7839 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -159,6 +159,103 @@ unsigned int translate_branch(const unsigned int *dest, const unsigned int *src) return 0; } +/* + * TODO: this is same as branch_opcode(). Rename that function + * and re-use it ? + */ +static unsigned int load_store_opcode(const unsigned int instr) +{ + return (instr >> 26) & 0X3F; +} + +static unsigned int load_store_xval(const unsigned int instr) +{ + return (instr >> 1) & 0x3FF; /* bits 21..30 */ +} + +/* + * Values of bits 21:30 of Fixed-point load and store instructions + * Reference: PowerISA_V2.06B_Public.pdf, Sections 3.3.2 through 3.3.6 + * 4.6.2 through 4.6.4. + */ +#define x_lbzx 87 +#define x_lbzux 119 +#define x_lhzx 279 +#define x_lhzux 311 +#define x_lhax 343 +#define x_lhaux 375 +#define x_lwzx 23 +#define x_lwzux 55 +#define x_lwax 341 +#define x_lwaux 373 +#define x_ldx 21 +#define x_ldux 53 +#define x_stbx 215 +#define x_stbux 247 +#define x_sthx 407 +#define x_sthux 439 +#define x_stwx 151 +#define x_stwux 183 +#define x_stdx 149 +#define x_stdux 181 +#define x_lhbrx 790 +#define x_lwbrx 534 +#define x_sthbrx 918 +#define x_stwbrx 662 +#define x_ldbrx 532 +#define x_stdbrx 660 +#define x_lswi 597 +#define x_lswx 533 +#define x_stswi 725 +#define x_stswx 661 +#define x_lfsx 535 +#define x_lfsux 567 +#define x_lfdx 599 +#define x_lfdux 631 +#define x_lfiwax 855 +#define x_lfiwzx 887 +#define x_stfsx 663 +#define x_stfsux 695 +#define x_stfdx 727 +#define x_stfdux 759 +#define x_stfiwax 983 +#define x_lfdpx 791 +#define x_stfdpx 919 + +static unsigned int x_form_load_store[] = { + x_lbzx, x_lbzux, x_lhzx, x_lhzux, x_lhax, + x_lhaux, x_lwzx, x_lwzux, x_lwax, x_lwaux, + x_ldx, x_ldux, x_stbx, x_stbux, x_sthx, + x_sthux, x_stwx, x_stwux, x_stdx, x_stdux, + x_lhbrx, x_lwbrx, x_sthbrx, x_stwbrx, x_ldbrx, + x_stdbrx, x_lswi, x_lswx, x_stswi, x_stswx, + x_lfsx, x_lfsux, x_lfdx, x_lfdux, x_lfiwax, + x_lfiwzx, x_stfsx, x_stfsux, x_stfdx, x_stfdux, + x_stfiwax, x_lfdpx, x_stfdpx +}; + +int instr_is_load_store(const unsigned int *instr) +{ + unsigned int op; + int i, n; + + op = load_store_opcode(*instr); + + if ((op >= 32 && op <= 58) || (op == 61 || op == 62)) + return 1; + + if (op == 31) { + n = sizeof(x_form_load_store) / sizeof(int); + + for (i = 0; i < n; i++) { + if (x_form_load_store[i] == load_store_xval(*instr)) + return 1; + } + } + + return 0; +} + #ifdef CONFIG_CODE_PATCHING_SELFTEST diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index f8143d6..6e1ca90 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -11,8 +11,10 @@ #include <linux/kernel.h> #include <linux/perf_event.h> #include <linux/string.h> +#include <linux/uaccess.h> #include <asm/reg.h> #include <asm/cputable.h> +#include <asm/code-patching.h> /* * Bits in event code for POWER7 @@ -383,13 +385,32 @@ static void power7_get_mem_data_src(union perf_mem_data_src *dsrc, { u64 idx; u64 mmcra = regs->dsisr; + u64 addr; + int ret; + unsigned int instr; if (mmcra & POWER7_MMCRA_DCACHE_MISS) { idx = mmcra & POWER7_MMCRA_DCACHE_SRC_MASK; idx >>= POWER7_MMCRA_DCACHE_SRC_SHIFT; dsrc->val |= dcache_src_map[idx]; + return; } + + instr = 0; + addr = perf_instruction_pointer(regs); + + if (is_kernel_addr(addr)) + instr = *(unsigned int *)addr; + else { + pagefault_disable(); + ret = __get_user_inatomic(instr, (unsigned int __user *)addr); + pagefault_enable(); + if (ret) + instr = 0; + } + if (instr && instr_is_load_store(&instr)) + dsrc->val |= PLH(LVL, L1); } -- 1.7.9.5 _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev