Em Mon, Dec 06, 2021 at 02:47:49PM +0530, Kajol Jain escreveu: > The code represent memory/cache level data based on PERF_MEM_LVL_* > namespace, which is in the process of deprication in the favour of > newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields. > Add data source encodings to represent cache/memory data based on > newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields.
Thanks, applied. - Arnaldo > Add data source encodings to represent data coming from local > memory/Remote memory/distant memory and remote/distant cache hits. > > Inorder to represent data coming from OpenCAPI cache/memory, we use > LVLNUM "PMEM" field which is used to present persistent memory accesses. > > Result in power10 system with patch changes: > > localhost:# ./perf mem report --sort="mem,sym,dso" --stdio > # Overhead Samples Memory access Symbol > Shared Object > # ........ ............ ........................ > .......................... ................ > # > 29.46% 2331 L1 or L1 hit [.] __random > libc-2.28.so > 23.11% 2121 L1 or L1 hit [.] > producer_populate_cache producer_consumer > 18.56% 1758 L1 or L1 hit [.] __random_r > libc-2.28.so > 15.64% 1559 L2 or L2 hit [.] __random > libc-2.28.so > ..... > 0.09% 5 Remote socket, same board Any cache hit > [.] __random libc-2.28.so > 0.07% 4 Remote socket, same board Any cache hit > [.] __random libc-2.28.so > ..... > > Reviewed-by: Madhavan Srinivasan <ma...@linux.ibm.com> > Signed-off-by: Kajol Jain <kj...@linux.ibm.com> > --- > arch/powerpc/perf/isa207-common.c | 54 ++++++++++++++++++++++++------- > 1 file changed, 42 insertions(+), 12 deletions(-) > > diff --git a/arch/powerpc/perf/isa207-common.c > b/arch/powerpc/perf/isa207-common.c > index 6c6bc8b7d887..4037ea652522 100644 > --- a/arch/powerpc/perf/isa207-common.c > +++ b/arch/powerpc/perf/isa207-common.c > @@ -229,13 +229,28 @@ static inline u64 isa207_find_source(u64 idx, u32 > sub_idx) > ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); > break; > case 4: > - if (sub_idx <= 1) > - ret = PH(LVL, LOC_RAM); > - else if (sub_idx > 1 && sub_idx <= 2) > - ret = PH(LVL, REM_RAM1); > - else > - ret = PH(LVL, REM_RAM2); > - ret |= P(SNOOP, HIT); > + if (cpu_has_feature(CPU_FTR_ARCH_31)) { > + ret = P(SNOOP, HIT); > + > + if (sub_idx == 1) > + ret |= PH(LVL, LOC_RAM) | LEVEL(RAM); > + else if (sub_idx == 2 || sub_idx == 3) > + ret |= P(LVL, HIT) | LEVEL(PMEM); > + else if (sub_idx == 4) > + ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | > P(HOPS, 2); > + else if (sub_idx == 5 || sub_idx == 7) > + ret |= P(LVL, HIT) | LEVEL(PMEM) | REM; > + else if (sub_idx == 6) > + ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | > P(HOPS, 3); > + } else { > + if (sub_idx <= 1) > + ret = PH(LVL, LOC_RAM); > + else if (sub_idx > 1 && sub_idx <= 2) > + ret = PH(LVL, REM_RAM1); > + else > + ret = PH(LVL, REM_RAM2); > + ret |= P(SNOOP, HIT); > + } > break; > case 5: > if (cpu_has_feature(CPU_FTR_ARCH_31)) { > @@ -261,11 +276,26 @@ static inline u64 isa207_find_source(u64 idx, u32 > sub_idx) > } > break; > case 6: > - ret = PH(LVL, REM_CCE2); > - if ((sub_idx == 0) || (sub_idx == 2)) > - ret |= P(SNOOP, HIT); > - else if ((sub_idx == 1) || (sub_idx == 3)) > - ret |= P(SNOOP, HITM); > + if (cpu_has_feature(CPU_FTR_ARCH_31)) { > + if (sub_idx == 0) > + ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | > REM | > + P(SNOOP, HIT) | P(HOPS, 2); > + else if (sub_idx == 1) > + ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | > REM | > + P(SNOOP, HITM) | P(HOPS, 2); > + else if (sub_idx == 2) > + ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | > REM | > + P(SNOOP, HIT) | P(HOPS, 3); > + else if (sub_idx == 3) > + ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | > REM | > + P(SNOOP, HITM) | P(HOPS, 3); > + } else { > + ret = PH(LVL, REM_CCE2); > + if (sub_idx == 0 || sub_idx == 2) > + ret |= P(SNOOP, HIT); > + else if (sub_idx == 1 || sub_idx == 3) > + ret |= P(SNOOP, HITM); > + } > break; > case 7: > ret = PM(LVL, L1); > -- > 2.27.0 -- - Arnaldo