https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/75092
>From ccfee2d0c5399b03b53ef79a4645ab0d10efeafd Mon Sep 17 00:00:00 2001 From: wlei <w...@fb.com> Date: Sun, 10 Dec 2023 18:30:42 -0800 Subject: [PATCH 1/2] [PseudoProbe] Mix and reorder block and call probe ID in lexical order --- clang/test/CodeGen/pseudo-probe-emit.c | 8 +++---- .../llvm/Transforms/IPO/SampleProfileProbe.h | 3 +-- .../lib/Transforms/IPO/SampleProfileProbe.cpp | 19 +++++----------- .../Inputs/pseudo-probe-profile.prof | 8 +++---- .../Inputs/pseudo-probe-update.prof | 8 +++---- .../SampleProfile/pseudo-probe-dangle.ll | 12 +++++----- .../pseudo-probe-discriminator.ll | 6 ++--- .../pseudo-probe-profile-metadata-2.ll | 15 ++++++------- .../SampleProfile/pseudo-probe-profile.ll | 22 +++++++++---------- .../SampleProfile/pseudo-probe-update.ll | 11 +++++----- .../SampleProfile/pseudo-probe-verify.ll | 16 +++++++------- 11 files changed, 59 insertions(+), 69 deletions(-) diff --git a/clang/test/CodeGen/pseudo-probe-emit.c b/clang/test/CodeGen/pseudo-probe-emit.c index c7a3f7e6d5b02b..360f831e842945 100644 --- a/clang/test/CodeGen/pseudo-probe-emit.c +++ b/clang/test/CodeGen/pseudo-probe-emit.c @@ -10,9 +10,9 @@ void foo(int x) { // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1) if (x == 0) // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0, i64 -1) - bar(); + bar(); // probe id : 3 else - // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0, i64 -1) - go(); - // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1) + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1) + go(); // probe id : 5 + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 6, i32 0, i64 -1) } diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h index 0f2729a9462de2..69b87adf105fd1 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h @@ -82,8 +82,7 @@ class SampleProfileProber { uint32_t getBlockId(const BasicBlock *BB) const; uint32_t getCallsiteId(const Instruction *Call) const; void computeCFGHash(); - void computeProbeIdForBlocks(); - void computeProbeIdForCallsites(); + void computeProbeId(); Function *F; diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp index 8f0b12d0cfedfc..6c6bb18bcb3c9d 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -173,8 +173,7 @@ SampleProfileProber::SampleProfileProber(Function &Func, BlockProbeIds.clear(); CallProbeIds.clear(); LastProbeId = (uint32_t)PseudoProbeReservedId::Last; - computeProbeIdForBlocks(); - computeProbeIdForCallsites(); + computeProbeId(); computeCFGHash(); } @@ -209,7 +208,10 @@ void SampleProfileProber::computeCFGHash() { << ", Hash = " << FunctionHash << "\n"); } -void SampleProfileProber::computeProbeIdForBlocks() { +void SampleProfileProber::computeProbeId() { + LLVMContext &Ctx = F->getContext(); + Module *M = F->getParent(); + DenseSet<BasicBlock *> KnownColdBlocks; computeEHOnlyBlocks(*F, KnownColdBlocks); // Insert pseudo probe to non-cold blocks only. This will reduce IR size as @@ -218,18 +220,9 @@ void SampleProfileProber::computeProbeIdForBlocks() { ++LastProbeId; if (!KnownColdBlocks.contains(&BB)) BlockProbeIds[&BB] = LastProbeId; - } -} -void SampleProfileProber::computeProbeIdForCallsites() { - LLVMContext &Ctx = F->getContext(); - Module *M = F->getParent(); - - for (auto &BB : *F) { for (auto &I : BB) { - if (!isa<CallBase>(I)) - continue; - if (isa<IntrinsicInst>(&I)) + if (!isa<CallBase>(I) || isa<IntrinsicInst>(&I)) continue; // The current implementation uses the lower 16 bits of the discriminator diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof index ba4c6117dc96ab..d3847946b94033 100644 --- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof @@ -1,8 +1,8 @@ foo:3200:13 1: 13 2: 7 - 3: 6 - 4: 13 - 5: 7 _Z3barv:2 _Z3foov:5 - 6: 6 _Z3barv:4 _Z3foov:2 + 4: 6 + 6: 13 + 3: 7 _Z3barv:2 _Z3foov:5 + 5: 6 _Z3barv:4 _Z3foov:2 !CFGChecksum: 563022570642068 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof index 62f9bd5992e735..213bf0b6f81cc4 100644 --- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof @@ -1,8 +1,8 @@ foo:3200:13 1: 13 2: 7 - 3: 6 - 4: 13 - 5: 7 - 6: 6 + 4: 6 + 6: 13 + 7: 7 + 9: 6 !CFGChecksum: 844530426352218 diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll index 4647a34fc2f620..f0b6fdf62d9696 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll @@ -23,21 +23,21 @@ Merge: ; JT-LABEL-NO: T ; JT-LABEL-NO: F ; JT-LABEL: Merge +; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4 ; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3 -; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2 -; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) +; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1) +; ASM-NOT: .pseudoprobe 6699318081062747564 4 ; ASM-NOT: .pseudoprobe 6699318081062747564 3 -; ASM-NOT: .pseudoprobe 6699318081062747564 2 -; ASM: .pseudoprobe 6699318081062747564 4 0 0 +; ASM: .pseudoprobe 6699318081062747564 5 0 0 ret i32 %call } ;; Check block T and F are gone, and their probes (probe 2 and 3) are gone too. ; MIR-tail: bb.0 ; MIR-tail: PSEUDO_PROBE [[#GUID:]], 1, 0, 0 -; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 2 ; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 3 -; MIR-tail: PSEUDO_PROBE [[#GUID:]], 4, 0, 0 +; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 4 +; MIR-tail: PSEUDO_PROBE [[#GUID:]], 5, 0, 0 define i32 @test(i32 %a, i32 %b, i32 %c) { diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll index 62f0737875aec3..97b0ed600ad106 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll @@ -62,10 +62,10 @@ attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "fra ; DEBUG: ![[INST]] = !DILocation(line: 4, column: 15, scope: ![[INSTBLOCK:[0-9]+]]) ; DEBUG: ![[INSTBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4) - + ; PROBE: ![[CALL1]] = !DILocation(line: 4, column: 3, scope: ![[CALL1BLOCK:[0-9]+]]) -; PROBE: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646575) +; PROBE: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646559) ; PROBE: ![[CALL2]] = !DILocation(line: 4, column: 9, scope: ![[CALL2BLOCK:[0-9]+]]) -; PROBE: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646583) +; PROBE: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646567) ; PROBE: ![[INST]] = !DILocation(line: 4, column: 15, scope: ![[INSTBLOCK:[0-9]+]]) ; PROBE: ![[INSTBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4) diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll index 148f3ede5ab48a..379dcfcab338d9 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll @@ -29,7 +29,7 @@ if.else: br label %return return: - call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1) + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 6, i32 0, i64 -1) %1 = load i32, ptr %retval, align 4 ret i32 %1 } @@ -55,13 +55,12 @@ attributes #0 = {"use-sample-profile"} !9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !5, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug) !10 = !{!"function_entry_count", i64 14} !11 = !{!"branch_weights", i32 100, i32 0} -;; A discriminator of 186646575 which is 0x6f80057 in hexdecimal, stands for an indirect call probe -;; with an index of 5 and probe factor of 1.0. -!12 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646575) +;; A discriminator of 186646559 which is 0xB20001F in hexdecimal, stands for an indirect call probe +;; with an index of 3 and probe factor of 1.0. +!12 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646559) !13 = distinct !DILocation(line: 10, column: 11, scope: !12) -;; A discriminator of 134217775 which is 0x6f80057 in hexdecimal, stands for an indirect call probe -;; with an index of 5 and probe factor of 0. -!14 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 134217775) +;; A discriminator of 134217759 which is 0x800001F in hexdecimal, stands for an indirect call probe +;; with an index of 3 and probe factor of 0. +!14 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 134217759) !15 = distinct !DILocation(line: 10, column: 11, scope: !14) !16 = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2} - diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll index 474b6668b0a7a7..867a49dbaed2ee 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll @@ -22,12 +22,12 @@ if.then: if.else: ; CHECK: call {{.*}}, !dbg ![[#PROBE2:]], !prof ![[PROF2:[0-9]+]] call void %f(i32 2) - ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1) + ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) store i32 2, ptr %retval, align 4 br label %return return: - ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) + ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1) %1 = load i32, ptr %retval, align 4 ret i32 %1 } @@ -36,14 +36,14 @@ attributes #0 = {"use-sample-profile"} ; CHECK: ![[PD1]] = !{!"branch_weights", i32 8, i32 7} ; CHECK: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]]) +;; A discriminator of 119537695 which is 0x720001f in hexdecimal, stands for an indirect call probe +;; with an index of 3. +; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537695) +; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2} ;; A discriminator of 119537711 which is 0x720002f in hexdecimal, stands for an indirect call probe ;; with an index of 5. -; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711) -; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2} -;; A discriminator of 119537719 which is 0x7200037 in hexdecimal, stands for an indirect call probe -;; with an index of 6. ; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]]) -; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537719) +; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711) ; CHECK: ![[PROF2]] = !{!"VP", i32 0, i64 6, i64 -1069303473483922844, i64 4, i64 9191153033785521275, i64 2} !llvm.module.flags = !{!9, !10} @@ -83,7 +83,7 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - String: 'Applied ' ;YAML-NEXT: - NumSamples: '7' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' -;YAML-NEXT: - ProbeId: '5' +;YAML-NEXT: - ProbeId: '3' ;YAML-NEXT: - String: ', Factor=' ;YAML-NEXT: - Factor: '1.000000e+00' ;YAML-NEXT: - String: ', OriginalSamples=' @@ -113,7 +113,7 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - String: 'Applied ' ;YAML-NEXT: - NumSamples: '6' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' -;YAML-NEXT: - ProbeId: '6' +;YAML-NEXT: - ProbeId: '5' ;YAML-NEXT: - String: ', Factor=' ;YAML-NEXT: - Factor: '1.000000e+00' ;YAML-NEXT: - String: ', OriginalSamples=' @@ -128,7 +128,7 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - String: 'Applied ' ;YAML-NEXT: - NumSamples: '6' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' -;YAML-NEXT: - ProbeId: '3' +;YAML-NEXT: - ProbeId: '4' ;YAML-NEXT: - String: ', Factor=' ;YAML-NEXT: - Factor: '1.000000e+00' ;YAML-NEXT: - String: ', OriginalSamples=' @@ -143,7 +143,7 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - String: 'Applied ' ;YAML-NEXT: - NumSamples: '13' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' -;YAML-NEXT: - ProbeId: '4' +;YAML-NEXT: - ProbeId: '6' ;YAML-NEXT: - String: ', Factor=' ;YAML-NEXT: - Factor: '1.000000e+00' ;YAML-NEXT: - String: ', OriginalSamples=' diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll index 992afedd14f75f..217b61970933dc 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll @@ -14,15 +14,15 @@ T1: %v1 = call i32 @f1() ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1) ;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7. -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -8513881372706734080) %cond3 = icmp eq i32 %v1, 412 br label %Merge F1: ; CHECK: %v2 = call i32 @f2(), !prof ![[#PROF2:]] %v2 = call i32 @f2() -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) ;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7. -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 8513881922462547968) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 8513881922462547968) br label %Merge Merge: @@ -30,11 +30,11 @@ Merge: %B = phi i32 [%v1, %T1], [%v2, %F1] br i1 %A, label %T2, label %F2 T2: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 7, i32 0, i64 -1) call void @f3() ret i32 %B F2: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 9, i32 0, i64 -1) ret i32 %B } @@ -42,4 +42,3 @@ F2: ; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 6} attributes #0 = {"use-sample-profile"} - diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll index f70e5189ab1293..b622cfbd6634ef 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll @@ -4,7 +4,7 @@ ; VERIFY: *** Pseudo Probe Verification After LoopFullUnrollPass *** ; VERIFY: Function foo: -; VERIFY-DAG: Probe 6 previous factor 1.00 current factor 5.00 +; VERIFY-DAG: Probe 5 previous factor 1.00 current factor 5.00 ; VERIFY-DAG: Probe 4 previous factor 1.00 current factor 5.00 declare void @foo2() nounwind @@ -27,15 +27,15 @@ bb7.preheader: bb10: ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) -; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) -; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) -; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) -; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) -; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1) %indvars.iv = phi i64 [ 0, %bb7.preheader ], [ %indvars.iv.next, %bb10 ] %tmp1.14 = phi i32 [ %tmp1.06, %bb7.preheader ], [ %spec.select, %bb10 ] @@ -50,14 +50,14 @@ bb10: br i1 %exitcond.not, label %bb3.loopexit, label %bb10, !llvm.loop !13 bb24: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1) ret void } ;; A discriminator of 186646583 which is 0xb200037 in hexdecimal, stands for a direct call probe ;; with an index of 6 and a scale of -1%. ; CHECK: ![[#PROBE6]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE:]]) -; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646583) +; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646575) !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!9, !10} >From 6ff88479ab47186372ce59a2eb13d420a53e3d80 Mon Sep 17 00:00:00 2001 From: wlei <w...@fb.com> Date: Wed, 10 Jan 2024 10:10:34 -0800 Subject: [PATCH 2/2] Detect the order and add a summary flag to the profile --- llvm/include/llvm/ProfileData/SampleProf.h | 10 +++- .../llvm/ProfileData/SampleProfReader.h | 3 + llvm/lib/ProfileData/SampleProf.cpp | 1 + llvm/lib/ProfileData/SampleProfReader.cpp | 5 ++ llvm/lib/ProfileData/SampleProfWriter.cpp | 4 ++ llvm/lib/Transforms/IPO/SampleProfile.cpp | 10 +++- llvm/tools/llvm-profgen/ProfileGenerator.cpp | 58 +++++++++++++++++++ 7 files changed, 88 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 66aaf602d0e1d9..4e5191ef076a52 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -201,6 +201,10 @@ enum class SecProfSummaryFlags : uint32_t { /// SecFlagIsPreInlined means this profile contains ShouldBeInlined /// contexts thus this is CS preinliner computed. SecFlagIsPreInlined = (1 << 4), + /// SecFlagIsMixedProbeOrder means in a pseude-probe based profile, the + /// callsite and BB probe IDs are mixed and sorted in lexcial order instead of + /// the order that callsite probe IDs are always after the BB probe IDs. + SecFlagIsMixedProbeOrder = (1 << 5), }; enum class SecFuncMetadataFlags : uint32_t { @@ -466,7 +470,7 @@ struct SampleContextFrame { LineLocation Location; SampleContextFrame() : Location(0, 0) {} - + SampleContextFrame(FunctionId Func, LineLocation Location) : Func(Func), Location(Location) {} @@ -527,7 +531,7 @@ class SampleContext { : Func(Name), State(UnknownContext), Attributes(ContextNone) { assert(!Name.empty() && "Name is empty"); } - + SampleContext(FunctionId Func) : Func(Func), State(UnknownContext), Attributes(ContextNone) {} @@ -1178,6 +1182,8 @@ class FunctionSamples { static bool ProfileIsProbeBased; + static bool ProfileIsMixedProbeOrder; + static bool ProfileIsCS; static bool ProfileIsPreInlined; diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index 9e8f543909cdbd..6551f9374d4aa7 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -525,6 +525,9 @@ class SampleProfileReader { /// \brief Whether samples are collected based on pseudo probes. bool ProfileIsProbeBased = false; + /// Whether profiles are in mixed BB and callsite probe order. + bool ProfileIsMixedProbeOrder = false; + /// Whether function profiles are context-sensitive flat profiles. bool ProfileIsCS = false; diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp index 59fa71899ed47b..830b83a25a3e42 100644 --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -41,6 +41,7 @@ static cl::opt<bool> GenerateMergedBaseProfiles( namespace llvm { namespace sampleprof { bool FunctionSamples::ProfileIsProbeBased = false; +bool FunctionSamples::ProfileIsMixedProbeOrder = false; bool FunctionSamples::ProfileIsCS = false; bool FunctionSamples::ProfileIsPreInlined = false; bool FunctionSamples::UseMD5 = false; diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index 98d0aa794529c5..6476f6cb7ca141 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -704,6 +704,9 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection( FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true; if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) FunctionSamples::ProfileIsFS = ProfileIsFS = true; + if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsMixedProbeOrder)) + FunctionSamples::ProfileIsMixedProbeOrder = ProfileIsMixedProbeOrder = + true; break; case SecNameTable: { bool FixedLengthMD5 = @@ -1369,6 +1372,8 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) { Flags.append("preInlined,"); if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) Flags.append("fs-discriminator,"); + if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsMixedProbeOrder)) + Flags.append("mixed-probe-order,"); break; case SecFuncOffsetTable: if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp index 625e523f13cec0..4a708c94dac518 100644 --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -437,6 +437,10 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection( addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagIsPreInlined); if (Type == SecProfSummary && FunctionSamples::ProfileIsFS) addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator); + if (Type == SecProfSummary && FunctionSamples::ProfileIsProbeBased && + FunctionSamples::ProfileIsMixedProbeOrder) + addSectionFlag(SecProfSummary, + SecProfSummaryFlags::SecFlagIsMixedProbeOrder); uint64_t SectionStart = markSectionStart(Type, LayoutIdx); switch (Type) { diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 2fd8668d15e200..2baf7f47aaea7d 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1129,7 +1129,7 @@ void SampleProfileLoader::findExternalInlineCandidate( CalleeSample->getContext().hasAttribute(ContextShouldBeInlined); if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold) continue; - + Function *Func = SymbolMap.lookup(CalleeSample->getFunction()); // Add to the import list only when it's defined out of module. if (!Func || Func->isDeclaration()) @@ -2112,6 +2112,14 @@ bool SampleProfileLoader::doInitialization(Module &M, DS_Warning)); return false; } + + if (!FunctionSamples::ProfileIsMixedProbeOrder) { + const char *Msg = + "Pseudo-probe-based profile is on an old version ID order which " + "could cause profile mismatch(performance regression)"; + Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg, + DS_Warning)); + } } if (ReportProfileStaleness || PersistProfileStaleness || diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index c4028e6b132871..dbc1ea79691c87 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -559,6 +559,54 @@ void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( } } +// Use a heuristic to determine probe order by checking callsite insertion +// position relative to the BB probes. Sort all the BB probes is in a list, for +// each calliste, compute "ratio = insert position / length of the list". For +// the old order, the probe ids for BB should be all before(smaller than) the +// probe ids for callsite, this ratio should be equal to or close to 1. +inline bool checkProbeIDIsInMixedOrder(const SampleProfileMap &Profiles) { + // Use flattned profile to maximize the callsites in the profile. + SampleProfileMap flattenProfile; + ProfileConverter::flattenProfile(Profiles, flattenProfile); + + uint32_t PossibleOldOrderNum = 0; + uint32_t PossibleNewOrderNum = 0; + + for (const auto &I : flattenProfile) { + const FunctionSamples &FS = I.second; + // Skip small functions whose profile order are likely random. + if (FS.getBodySamples().size() < 10) + continue; + + std::set<uint32_t> PossibleBBProbeIDs; + std::set<uint32_t> CallsiteIDs; + for (const auto &I : FS.getBodySamples()) { + if (I.second.getCallTargets().empty()) + PossibleBBProbeIDs.insert(I.first.LineOffset); + else + CallsiteIDs.insert(I.first.LineOffset); + } + + if (PossibleBBProbeIDs.empty() || CallsiteIDs.empty()) + continue; + + uint32_t DistanceToBeginSum = 0; + for (const auto &ID : CallsiteIDs) + DistanceToBeginSum += std::distance(PossibleBBProbeIDs.begin(), + PossibleBBProbeIDs.upper_bound(ID)); + uint32_t LengthSum = PossibleBBProbeIDs.size() * CallsiteIDs.size(); + + // Note that PossibleBBProbeIDs could contains some callsite probe id, the + // ratio is not exactly 1 for the old order, hence use a smaller threshold + // to determine. + if ((float)DistanceToBeginSum / LengthSum > 0.8) + PossibleOldOrderNum++; + else + PossibleNewOrderNum++; + } + return PossibleNewOrderNum >= PossibleOldOrderNum; +} + void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions( const BranchSample &BranchCounters) { for (const auto &Entry : BranchCounters) { @@ -1024,6 +1072,16 @@ void CSProfileGenerator::postProcessProfiles() { CSConverter.convertCSProfiles(); FunctionSamples::ProfileIsCS = false; } + + if (FunctionSamples::ProfileIsProbeBased) { + FunctionSamples::ProfileIsMixedProbeOrder = true; + if (!checkProbeIDIsInMixedOrder(ProfileMap)) { + WithColor::warning() + << "Pseudo-probe-based profile is on an old version ID order which " + "could cause profile mismatch(performance regression)\n"; + FunctionSamples::ProfileIsMixedProbeOrder = false; + } + } } void ProfileGeneratorBase::computeSummaryAndThreshold( _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits