https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/75092
>From e6f735a336f87659ee3236fc795ad4b7107dff4d Mon Sep 17 00:00:00 2001 From: wlei <w...@fb.com> Date: Sun, 10 Dec 2023 18:30:42 -0800 Subject: [PATCH 1/4] [PseudoProbe] Mix and reorder block and call probe ID in lexical order --- clang/test/CodeGen/pseudo-probe-emit.c | 8 +++---- .../llvm/Transforms/IPO/SampleProfileProbe.h | 3 +-- .../lib/Transforms/IPO/SampleProfileProbe.cpp | 19 +++++----------- .../Inputs/pseudo-probe-profile.prof | 8 +++---- .../Inputs/pseudo-probe-update.prof | 8 +++---- .../SampleProfile/pseudo-probe-dangle.ll | 12 +++++----- .../pseudo-probe-discriminator.ll | 6 ++--- .../pseudo-probe-profile-metadata-2.ll | 15 ++++++------- .../SampleProfile/pseudo-probe-profile.ll | 22 +++++++++---------- .../SampleProfile/pseudo-probe-update.ll | 11 +++++----- .../SampleProfile/pseudo-probe-verify.ll | 16 +++++++------- 11 files changed, 59 insertions(+), 69 deletions(-) diff --git a/clang/test/CodeGen/pseudo-probe-emit.c b/clang/test/CodeGen/pseudo-probe-emit.c index c7a3f7e6d5b02b..360f831e842945 100644 --- a/clang/test/CodeGen/pseudo-probe-emit.c +++ b/clang/test/CodeGen/pseudo-probe-emit.c @@ -10,9 +10,9 @@ void foo(int x) { // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1) if (x == 0) // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0, i64 -1) - bar(); + bar(); // probe id : 3 else - // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0, i64 -1) - go(); - // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1) + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1) + go(); // probe id : 5 + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 6, i32 0, i64 -1) } diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h index 0f2729a9462de2..69b87adf105fd1 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h @@ -82,8 +82,7 @@ class SampleProfileProber { uint32_t getBlockId(const BasicBlock *BB) const; uint32_t getCallsiteId(const Instruction *Call) const; void computeCFGHash(); - void computeProbeIdForBlocks(); - void computeProbeIdForCallsites(); + void computeProbeId(); Function *F; diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp index 090e5560483edb..975c1bc2347aa4 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -173,8 +173,7 @@ SampleProfileProber::SampleProfileProber(Function &Func, BlockProbeIds.clear(); CallProbeIds.clear(); LastProbeId = (uint32_t)PseudoProbeReservedId::Last; - computeProbeIdForBlocks(); - computeProbeIdForCallsites(); + computeProbeId(); computeCFGHash(); } @@ -207,7 +206,10 @@ void SampleProfileProber::computeCFGHash() { << ", Hash = " << FunctionHash << "\n"); } -void SampleProfileProber::computeProbeIdForBlocks() { +void SampleProfileProber::computeProbeId() { + LLVMContext &Ctx = F->getContext(); + Module *M = F->getParent(); + DenseSet<BasicBlock *> KnownColdBlocks; computeEHOnlyBlocks(*F, KnownColdBlocks); // Insert pseudo probe to non-cold blocks only. This will reduce IR size as @@ -216,18 +218,9 @@ void SampleProfileProber::computeProbeIdForBlocks() { ++LastProbeId; if (!KnownColdBlocks.contains(&BB)) BlockProbeIds[&BB] = LastProbeId; - } -} -void SampleProfileProber::computeProbeIdForCallsites() { - LLVMContext &Ctx = F->getContext(); - Module *M = F->getParent(); - - for (auto &BB : *F) { for (auto &I : BB) { - if (!isa<CallBase>(I)) - continue; - if (isa<IntrinsicInst>(&I)) + if (!isa<CallBase>(I) || isa<IntrinsicInst>(&I)) continue; // The current implementation uses the lower 16 bits of the discriminator diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof index ba4c6117dc96ab..d3847946b94033 100644 --- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof @@ -1,8 +1,8 @@ foo:3200:13 1: 13 2: 7 - 3: 6 - 4: 13 - 5: 7 _Z3barv:2 _Z3foov:5 - 6: 6 _Z3barv:4 _Z3foov:2 + 4: 6 + 6: 13 + 3: 7 _Z3barv:2 _Z3foov:5 + 5: 6 _Z3barv:4 _Z3foov:2 !CFGChecksum: 563022570642068 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof index 62f9bd5992e735..213bf0b6f81cc4 100644 --- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof @@ -1,8 +1,8 @@ foo:3200:13 1: 13 2: 7 - 3: 6 - 4: 13 - 5: 7 - 6: 6 + 4: 6 + 6: 13 + 7: 7 + 9: 6 !CFGChecksum: 844530426352218 diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll index 4647a34fc2f620..f0b6fdf62d9696 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll @@ -23,21 +23,21 @@ Merge: ; JT-LABEL-NO: T ; JT-LABEL-NO: F ; JT-LABEL: Merge +; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4 ; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3 -; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2 -; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) +; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1) +; ASM-NOT: .pseudoprobe 6699318081062747564 4 ; ASM-NOT: .pseudoprobe 6699318081062747564 3 -; ASM-NOT: .pseudoprobe 6699318081062747564 2 -; ASM: .pseudoprobe 6699318081062747564 4 0 0 +; ASM: .pseudoprobe 6699318081062747564 5 0 0 ret i32 %call } ;; Check block T and F are gone, and their probes (probe 2 and 3) are gone too. ; MIR-tail: bb.0 ; MIR-tail: PSEUDO_PROBE [[#GUID:]], 1, 0, 0 -; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 2 ; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 3 -; MIR-tail: PSEUDO_PROBE [[#GUID:]], 4, 0, 0 +; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 4 +; MIR-tail: PSEUDO_PROBE [[#GUID:]], 5, 0, 0 define i32 @test(i32 %a, i32 %b, i32 %c) { diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll index 62f0737875aec3..97b0ed600ad106 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll @@ -62,10 +62,10 @@ attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "fra ; DEBUG: ![[INST]] = !DILocation(line: 4, column: 15, scope: ![[INSTBLOCK:[0-9]+]]) ; DEBUG: ![[INSTBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4) - + ; PROBE: ![[CALL1]] = !DILocation(line: 4, column: 3, scope: ![[CALL1BLOCK:[0-9]+]]) -; PROBE: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646575) +; PROBE: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646559) ; PROBE: ![[CALL2]] = !DILocation(line: 4, column: 9, scope: ![[CALL2BLOCK:[0-9]+]]) -; PROBE: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646583) +; PROBE: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646567) ; PROBE: ![[INST]] = !DILocation(line: 4, column: 15, scope: ![[INSTBLOCK:[0-9]+]]) ; PROBE: ![[INSTBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4) diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll index 148f3ede5ab48a..379dcfcab338d9 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll @@ -29,7 +29,7 @@ if.else: br label %return return: - call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1) + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 6, i32 0, i64 -1) %1 = load i32, ptr %retval, align 4 ret i32 %1 } @@ -55,13 +55,12 @@ attributes #0 = {"use-sample-profile"} !9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !5, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug) !10 = !{!"function_entry_count", i64 14} !11 = !{!"branch_weights", i32 100, i32 0} -;; A discriminator of 186646575 which is 0x6f80057 in hexdecimal, stands for an indirect call probe -;; with an index of 5 and probe factor of 1.0. -!12 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646575) +;; A discriminator of 186646559 which is 0xB20001F in hexdecimal, stands for an indirect call probe +;; with an index of 3 and probe factor of 1.0. +!12 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646559) !13 = distinct !DILocation(line: 10, column: 11, scope: !12) -;; A discriminator of 134217775 which is 0x6f80057 in hexdecimal, stands for an indirect call probe -;; with an index of 5 and probe factor of 0. -!14 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 134217775) +;; A discriminator of 134217759 which is 0x800001F in hexdecimal, stands for an indirect call probe +;; with an index of 3 and probe factor of 0. +!14 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 134217759) !15 = distinct !DILocation(line: 10, column: 11, scope: !14) !16 = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2} - diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll index 474b6668b0a7a7..867a49dbaed2ee 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll @@ -22,12 +22,12 @@ if.then: if.else: ; CHECK: call {{.*}}, !dbg ![[#PROBE2:]], !prof ![[PROF2:[0-9]+]] call void %f(i32 2) - ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1) + ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) store i32 2, ptr %retval, align 4 br label %return return: - ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) + ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1) %1 = load i32, ptr %retval, align 4 ret i32 %1 } @@ -36,14 +36,14 @@ attributes #0 = {"use-sample-profile"} ; CHECK: ![[PD1]] = !{!"branch_weights", i32 8, i32 7} ; CHECK: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]]) +;; A discriminator of 119537695 which is 0x720001f in hexdecimal, stands for an indirect call probe +;; with an index of 3. +; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537695) +; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2} ;; A discriminator of 119537711 which is 0x720002f in hexdecimal, stands for an indirect call probe ;; with an index of 5. -; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711) -; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2} -;; A discriminator of 119537719 which is 0x7200037 in hexdecimal, stands for an indirect call probe -;; with an index of 6. ; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]]) -; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537719) +; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711) ; CHECK: ![[PROF2]] = !{!"VP", i32 0, i64 6, i64 -1069303473483922844, i64 4, i64 9191153033785521275, i64 2} !llvm.module.flags = !{!9, !10} @@ -83,7 +83,7 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - String: 'Applied ' ;YAML-NEXT: - NumSamples: '7' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' -;YAML-NEXT: - ProbeId: '5' +;YAML-NEXT: - ProbeId: '3' ;YAML-NEXT: - String: ', Factor=' ;YAML-NEXT: - Factor: '1.000000e+00' ;YAML-NEXT: - String: ', OriginalSamples=' @@ -113,7 +113,7 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - String: 'Applied ' ;YAML-NEXT: - NumSamples: '6' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' -;YAML-NEXT: - ProbeId: '6' +;YAML-NEXT: - ProbeId: '5' ;YAML-NEXT: - String: ', Factor=' ;YAML-NEXT: - Factor: '1.000000e+00' ;YAML-NEXT: - String: ', OriginalSamples=' @@ -128,7 +128,7 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - String: 'Applied ' ;YAML-NEXT: - NumSamples: '6' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' -;YAML-NEXT: - ProbeId: '3' +;YAML-NEXT: - ProbeId: '4' ;YAML-NEXT: - String: ', Factor=' ;YAML-NEXT: - Factor: '1.000000e+00' ;YAML-NEXT: - String: ', OriginalSamples=' @@ -143,7 +143,7 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - String: 'Applied ' ;YAML-NEXT: - NumSamples: '13' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' -;YAML-NEXT: - ProbeId: '4' +;YAML-NEXT: - ProbeId: '6' ;YAML-NEXT: - String: ', Factor=' ;YAML-NEXT: - Factor: '1.000000e+00' ;YAML-NEXT: - String: ', OriginalSamples=' diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll index 992afedd14f75f..217b61970933dc 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll @@ -14,15 +14,15 @@ T1: %v1 = call i32 @f1() ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1) ;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7. -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -8513881372706734080) %cond3 = icmp eq i32 %v1, 412 br label %Merge F1: ; CHECK: %v2 = call i32 @f2(), !prof ![[#PROF2:]] %v2 = call i32 @f2() -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) ;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7. -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 8513881922462547968) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 8513881922462547968) br label %Merge Merge: @@ -30,11 +30,11 @@ Merge: %B = phi i32 [%v1, %T1], [%v2, %F1] br i1 %A, label %T2, label %F2 T2: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 7, i32 0, i64 -1) call void @f3() ret i32 %B F2: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 9, i32 0, i64 -1) ret i32 %B } @@ -42,4 +42,3 @@ F2: ; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 6} attributes #0 = {"use-sample-profile"} - diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll index f70e5189ab1293..b622cfbd6634ef 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll @@ -4,7 +4,7 @@ ; VERIFY: *** Pseudo Probe Verification After LoopFullUnrollPass *** ; VERIFY: Function foo: -; VERIFY-DAG: Probe 6 previous factor 1.00 current factor 5.00 +; VERIFY-DAG: Probe 5 previous factor 1.00 current factor 5.00 ; VERIFY-DAG: Probe 4 previous factor 1.00 current factor 5.00 declare void @foo2() nounwind @@ -27,15 +27,15 @@ bb7.preheader: bb10: ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) -; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) -; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) -; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) -; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) -; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1) %indvars.iv = phi i64 [ 0, %bb7.preheader ], [ %indvars.iv.next, %bb10 ] %tmp1.14 = phi i32 [ %tmp1.06, %bb7.preheader ], [ %spec.select, %bb10 ] @@ -50,14 +50,14 @@ bb10: br i1 %exitcond.not, label %bb3.loopexit, label %bb10, !llvm.loop !13 bb24: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1) ret void } ;; A discriminator of 186646583 which is 0xb200037 in hexdecimal, stands for a direct call probe ;; with an index of 6 and a scale of -1%. ; CHECK: ![[#PROBE6]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE:]]) -; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646583) +; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646575) !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!9, !10} >From 5e6b16f041101d4db38748bc9bfc3ec60536f73c Mon Sep 17 00:00:00 2001 From: wlei <w...@fb.com> Date: Wed, 10 Jan 2024 10:10:34 -0800 Subject: [PATCH 2/4] Detect the order and add a summary flag to the profile --- llvm/include/llvm/ProfileData/SampleProf.h | 10 ++- .../llvm/ProfileData/SampleProfReader.h | 3 + llvm/lib/ProfileData/SampleProf.cpp | 1 + llvm/lib/ProfileData/SampleProfReader.cpp | 5 ++ llvm/lib/ProfileData/SampleProfWriter.cpp | 4 ++ llvm/lib/Transforms/IPO/SampleProfile.cpp | 8 +++ llvm/tools/llvm-profgen/ProfileGenerator.cpp | 62 +++++++++++++++++++ 7 files changed, 91 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 8ac84d4b933f20..a5ac05c9cdff75 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -201,6 +201,10 @@ enum class SecProfSummaryFlags : uint32_t { /// SecFlagIsPreInlined means this profile contains ShouldBeInlined /// contexts thus this is CS preinliner computed. SecFlagIsPreInlined = (1 << 4), + /// SecFlagIsMixedProbeOrder means in a pseude-probe based profile, the + /// callsite and BB probe IDs are mixed and sorted in lexcial order instead of + /// the order that callsite probe IDs are always after the BB probe IDs. + SecFlagIsMixedProbeOrder = (1 << 5), }; enum class SecFuncMetadataFlags : uint32_t { @@ -466,7 +470,7 @@ struct SampleContextFrame { LineLocation Location; SampleContextFrame() : Location(0, 0) {} - + SampleContextFrame(FunctionId Func, LineLocation Location) : Func(Func), Location(Location) {} @@ -527,7 +531,7 @@ class SampleContext { : Func(Name), State(UnknownContext), Attributes(ContextNone) { assert(!Name.empty() && "Name is empty"); } - + SampleContext(FunctionId Func) : Func(Func), State(UnknownContext), Attributes(ContextNone) {} @@ -1178,6 +1182,8 @@ class FunctionSamples { static bool ProfileIsProbeBased; + static bool ProfileIsMixedProbeOrder; + static bool ProfileIsCS; static bool ProfileIsPreInlined; diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index 9e8f543909cdbd..6551f9374d4aa7 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -525,6 +525,9 @@ class SampleProfileReader { /// \brief Whether samples are collected based on pseudo probes. bool ProfileIsProbeBased = false; + /// Whether profiles are in mixed BB and callsite probe order. + bool ProfileIsMixedProbeOrder = false; + /// Whether function profiles are context-sensitive flat profiles. bool ProfileIsCS = false; diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp index 59fa71899ed47b..830b83a25a3e42 100644 --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -41,6 +41,7 @@ static cl::opt<bool> GenerateMergedBaseProfiles( namespace llvm { namespace sampleprof { bool FunctionSamples::ProfileIsProbeBased = false; +bool FunctionSamples::ProfileIsMixedProbeOrder = false; bool FunctionSamples::ProfileIsCS = false; bool FunctionSamples::ProfileIsPreInlined = false; bool FunctionSamples::UseMD5 = false; diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index 98d0aa794529c5..6476f6cb7ca141 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -704,6 +704,9 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection( FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true; if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) FunctionSamples::ProfileIsFS = ProfileIsFS = true; + if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsMixedProbeOrder)) + FunctionSamples::ProfileIsMixedProbeOrder = ProfileIsMixedProbeOrder = + true; break; case SecNameTable: { bool FixedLengthMD5 = @@ -1369,6 +1372,8 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) { Flags.append("preInlined,"); if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) Flags.append("fs-discriminator,"); + if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsMixedProbeOrder)) + Flags.append("mixed-probe-order,"); break; case SecFuncOffsetTable: if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp index 625e523f13cec0..4a708c94dac518 100644 --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -437,6 +437,10 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection( addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagIsPreInlined); if (Type == SecProfSummary && FunctionSamples::ProfileIsFS) addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator); + if (Type == SecProfSummary && FunctionSamples::ProfileIsProbeBased && + FunctionSamples::ProfileIsMixedProbeOrder) + addSectionFlag(SecProfSummary, + SecProfSummaryFlags::SecFlagIsMixedProbeOrder); uint64_t SectionStart = markSectionStart(Type, LayoutIdx); switch (Type) { diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index ffc26f1a0a972d..a8a2697aa6a7ed 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -2173,6 +2173,14 @@ bool SampleProfileLoader::doInitialization(Module &M, DS_Warning)); return false; } + + if (!FunctionSamples::ProfileIsMixedProbeOrder) { + const char *Msg = + "Pseudo-probe-based profile is on an old version ID order which " + "could cause profile mismatch(performance regression)"; + Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg, + DS_Warning)); + } } if (ReportProfileStaleness || PersistProfileStaleness || diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 5aa44108f96605..4771dda88d89c0 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -513,6 +513,66 @@ ProfileGenerator::getTopLevelFunctionProfile(FunctionId FuncName) { return ProfileMap.Create(Context); } +// Use a heuristic to determine probe order by checking callsite insertion +// position relative to the BB probes. Sort all the BB probes is in a list, for +// each calliste, compute "ratio = insert position / length of the list". For +// the old order, the probe ids for BB should be all before(smaller than) the +// probe ids for callsite, this ratio should be equal to or close to 1. +bool checkProbeIDIsInMixedOrder(const SampleProfileMap &Profiles) { + // Use flattned profile to maximize the callsites in the profile. + SampleProfileMap flattenProfile; + ProfileConverter::flattenProfile(Profiles, flattenProfile); + + uint32_t PossibleOldOrderNum = 0; + uint32_t PossibleNewOrderNum = 0; + + for (const auto &I : flattenProfile) { + const FunctionSamples &FS = I.second; + // Skip small functions whose profile order are likely random. + if (FS.getBodySamples().size() < 10) + continue; + + std::set<uint32_t> PossibleBBProbeIDs; + std::set<uint32_t> CallsiteIDs; + for (const auto &I : FS.getBodySamples()) { + if (I.second.getCallTargets().empty()) + PossibleBBProbeIDs.insert(I.first.LineOffset); + else + CallsiteIDs.insert(I.first.LineOffset); + } + + if (PossibleBBProbeIDs.empty() || CallsiteIDs.empty()) + continue; + + uint32_t DistanceToBeginSum = 0; + for (const auto &ID : CallsiteIDs) + DistanceToBeginSum += std::distance(PossibleBBProbeIDs.begin(), + PossibleBBProbeIDs.upper_bound(ID)); + uint32_t LengthSum = PossibleBBProbeIDs.size() * CallsiteIDs.size(); + + // Note that PossibleBBProbeIDs could contains some callsite probe id, the + // ratio is not exactly 1 for the old order, hence use a smaller threshold + // to determine. + if ((float)DistanceToBeginSum / LengthSum > 0.8) + PossibleOldOrderNum++; + else + PossibleNewOrderNum++; + } + return PossibleNewOrderNum >= PossibleOldOrderNum; +} + +void warnOrWriteProbeOrderFlag(const SampleProfileMap &Profiles) { + if (FunctionSamples::ProfileIsProbeBased) { + FunctionSamples::ProfileIsMixedProbeOrder = true; + if (!checkProbeIDIsInMixedOrder(Profiles)) { + WithColor::warning() + << "Pseudo-probe-based profile is on an old version ID order which " + "could cause profile mismatch(performance regression)\n"; + FunctionSamples::ProfileIsMixedProbeOrder = false; + } + } +} + void ProfileGenerator::generateProfile() { collectProfiledFunctions(); @@ -535,6 +595,7 @@ void ProfileGenerator::postProcessProfiles() { trimColdProfiles(ProfileMap, ColdCountThreshold); filterAmbiguousProfile(ProfileMap); calculateAndShowDensity(ProfileMap); + warnOrWriteProbeOrderFlag(ProfileMap); } void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles, @@ -1068,6 +1129,7 @@ void CSProfileGenerator::postProcessProfiles() { FunctionSamples::ProfileIsCS = false; } filterAmbiguousProfile(ProfileMap); + warnOrWriteProbeOrderFlag(ProfileMap); } void ProfileGeneratorBase::computeSummaryAndThreshold( >From 6fe180efa591e8282d7d9e72c6a7645ce34b2b74 Mon Sep 17 00:00:00 2001 From: wlei <w...@fb.com> Date: Wed, 28 Feb 2024 21:26:13 -0800 Subject: [PATCH 3/4] Revert "Detect the order and add a summary flag to the profile" This reverts commit 28b0374b93f2072d03804ff1d8a8b2fd81c5a218. --- llvm/include/llvm/ProfileData/SampleProf.h | 10 +-- .../llvm/ProfileData/SampleProfReader.h | 3 - llvm/lib/ProfileData/SampleProf.cpp | 1 - llvm/lib/ProfileData/SampleProfReader.cpp | 5 -- llvm/lib/ProfileData/SampleProfWriter.cpp | 4 -- llvm/lib/Transforms/IPO/SampleProfile.cpp | 10 +-- llvm/tools/llvm-profgen/ProfileGenerator.cpp | 62 ------------------- 7 files changed, 3 insertions(+), 92 deletions(-) diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index a5ac05c9cdff75..8ac84d4b933f20 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -201,10 +201,6 @@ enum class SecProfSummaryFlags : uint32_t { /// SecFlagIsPreInlined means this profile contains ShouldBeInlined /// contexts thus this is CS preinliner computed. SecFlagIsPreInlined = (1 << 4), - /// SecFlagIsMixedProbeOrder means in a pseude-probe based profile, the - /// callsite and BB probe IDs are mixed and sorted in lexcial order instead of - /// the order that callsite probe IDs are always after the BB probe IDs. - SecFlagIsMixedProbeOrder = (1 << 5), }; enum class SecFuncMetadataFlags : uint32_t { @@ -470,7 +466,7 @@ struct SampleContextFrame { LineLocation Location; SampleContextFrame() : Location(0, 0) {} - + SampleContextFrame(FunctionId Func, LineLocation Location) : Func(Func), Location(Location) {} @@ -531,7 +527,7 @@ class SampleContext { : Func(Name), State(UnknownContext), Attributes(ContextNone) { assert(!Name.empty() && "Name is empty"); } - + SampleContext(FunctionId Func) : Func(Func), State(UnknownContext), Attributes(ContextNone) {} @@ -1182,8 +1178,6 @@ class FunctionSamples { static bool ProfileIsProbeBased; - static bool ProfileIsMixedProbeOrder; - static bool ProfileIsCS; static bool ProfileIsPreInlined; diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index 6551f9374d4aa7..9e8f543909cdbd 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -525,9 +525,6 @@ class SampleProfileReader { /// \brief Whether samples are collected based on pseudo probes. bool ProfileIsProbeBased = false; - /// Whether profiles are in mixed BB and callsite probe order. - bool ProfileIsMixedProbeOrder = false; - /// Whether function profiles are context-sensitive flat profiles. bool ProfileIsCS = false; diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp index 830b83a25a3e42..59fa71899ed47b 100644 --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -41,7 +41,6 @@ static cl::opt<bool> GenerateMergedBaseProfiles( namespace llvm { namespace sampleprof { bool FunctionSamples::ProfileIsProbeBased = false; -bool FunctionSamples::ProfileIsMixedProbeOrder = false; bool FunctionSamples::ProfileIsCS = false; bool FunctionSamples::ProfileIsPreInlined = false; bool FunctionSamples::UseMD5 = false; diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index 6476f6cb7ca141..98d0aa794529c5 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -704,9 +704,6 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection( FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true; if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) FunctionSamples::ProfileIsFS = ProfileIsFS = true; - if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsMixedProbeOrder)) - FunctionSamples::ProfileIsMixedProbeOrder = ProfileIsMixedProbeOrder = - true; break; case SecNameTable: { bool FixedLengthMD5 = @@ -1372,8 +1369,6 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) { Flags.append("preInlined,"); if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) Flags.append("fs-discriminator,"); - if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsMixedProbeOrder)) - Flags.append("mixed-probe-order,"); break; case SecFuncOffsetTable: if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp index 4a708c94dac518..625e523f13cec0 100644 --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -437,10 +437,6 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection( addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagIsPreInlined); if (Type == SecProfSummary && FunctionSamples::ProfileIsFS) addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator); - if (Type == SecProfSummary && FunctionSamples::ProfileIsProbeBased && - FunctionSamples::ProfileIsMixedProbeOrder) - addSectionFlag(SecProfSummary, - SecProfSummaryFlags::SecFlagIsMixedProbeOrder); uint64_t SectionStart = markSectionStart(Type, LayoutIdx); switch (Type) { diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index a8a2697aa6a7ed..402920d3a9a2d8 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1190,7 +1190,7 @@ void SampleProfileLoader::findExternalInlineCandidate( CalleeSample->getContext().hasAttribute(ContextShouldBeInlined); if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold) continue; - + Function *Func = SymbolMap.lookup(CalleeSample->getFunction()); // Add to the import list only when it's defined out of module. if (!Func || Func->isDeclaration()) @@ -2173,14 +2173,6 @@ bool SampleProfileLoader::doInitialization(Module &M, DS_Warning)); return false; } - - if (!FunctionSamples::ProfileIsMixedProbeOrder) { - const char *Msg = - "Pseudo-probe-based profile is on an old version ID order which " - "could cause profile mismatch(performance regression)"; - Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg, - DS_Warning)); - } } if (ReportProfileStaleness || PersistProfileStaleness || diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 4771dda88d89c0..5aa44108f96605 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -513,66 +513,6 @@ ProfileGenerator::getTopLevelFunctionProfile(FunctionId FuncName) { return ProfileMap.Create(Context); } -// Use a heuristic to determine probe order by checking callsite insertion -// position relative to the BB probes. Sort all the BB probes is in a list, for -// each calliste, compute "ratio = insert position / length of the list". For -// the old order, the probe ids for BB should be all before(smaller than) the -// probe ids for callsite, this ratio should be equal to or close to 1. -bool checkProbeIDIsInMixedOrder(const SampleProfileMap &Profiles) { - // Use flattned profile to maximize the callsites in the profile. - SampleProfileMap flattenProfile; - ProfileConverter::flattenProfile(Profiles, flattenProfile); - - uint32_t PossibleOldOrderNum = 0; - uint32_t PossibleNewOrderNum = 0; - - for (const auto &I : flattenProfile) { - const FunctionSamples &FS = I.second; - // Skip small functions whose profile order are likely random. - if (FS.getBodySamples().size() < 10) - continue; - - std::set<uint32_t> PossibleBBProbeIDs; - std::set<uint32_t> CallsiteIDs; - for (const auto &I : FS.getBodySamples()) { - if (I.second.getCallTargets().empty()) - PossibleBBProbeIDs.insert(I.first.LineOffset); - else - CallsiteIDs.insert(I.first.LineOffset); - } - - if (PossibleBBProbeIDs.empty() || CallsiteIDs.empty()) - continue; - - uint32_t DistanceToBeginSum = 0; - for (const auto &ID : CallsiteIDs) - DistanceToBeginSum += std::distance(PossibleBBProbeIDs.begin(), - PossibleBBProbeIDs.upper_bound(ID)); - uint32_t LengthSum = PossibleBBProbeIDs.size() * CallsiteIDs.size(); - - // Note that PossibleBBProbeIDs could contains some callsite probe id, the - // ratio is not exactly 1 for the old order, hence use a smaller threshold - // to determine. - if ((float)DistanceToBeginSum / LengthSum > 0.8) - PossibleOldOrderNum++; - else - PossibleNewOrderNum++; - } - return PossibleNewOrderNum >= PossibleOldOrderNum; -} - -void warnOrWriteProbeOrderFlag(const SampleProfileMap &Profiles) { - if (FunctionSamples::ProfileIsProbeBased) { - FunctionSamples::ProfileIsMixedProbeOrder = true; - if (!checkProbeIDIsInMixedOrder(Profiles)) { - WithColor::warning() - << "Pseudo-probe-based profile is on an old version ID order which " - "could cause profile mismatch(performance regression)\n"; - FunctionSamples::ProfileIsMixedProbeOrder = false; - } - } -} - void ProfileGenerator::generateProfile() { collectProfiledFunctions(); @@ -595,7 +535,6 @@ void ProfileGenerator::postProcessProfiles() { trimColdProfiles(ProfileMap, ColdCountThreshold); filterAmbiguousProfile(ProfileMap); calculateAndShowDensity(ProfileMap); - warnOrWriteProbeOrderFlag(ProfileMap); } void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles, @@ -1129,7 +1068,6 @@ void CSProfileGenerator::postProcessProfiles() { FunctionSamples::ProfileIsCS = false; } filterAmbiguousProfile(ProfileMap); - warnOrWriteProbeOrderFlag(ProfileMap); } void ProfileGeneratorBase::computeSummaryAndThreshold( >From f25b13f071aab68bcc9ad3d98fdde871670a739c Mon Sep 17 00:00:00 2001 From: wlei <w...@fb.com> Date: Thu, 29 Feb 2024 18:04:37 -0800 Subject: [PATCH 4/4] Error out if the checksum mismatch is high --- llvm/lib/Transforms/IPO/SampleProfile.cpp | 84 ++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 402920d3a9a2d8..b599645cbc7df3 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -234,6 +234,25 @@ static cl::opt<unsigned> ProfileICPRelativeHotnessSkip( cl::desc( "Skip relative hotness check for ICP up to given number of targets.")); +static cl::opt<unsigned> ChecksumMismatchFuncHotBlockSkip( + "checksum-mismatch-func-hot-block-skip", cl::Hidden, cl::init(100), + cl::desc("For checksum-mismatch error check, skip checking the function " + "that the num of its hot(on average) blocks is smaller than the " + "given number.")); + +static cl::opt<unsigned> ChecksumMismatchNumFuncSkip( + "checksum-mismatch-num-func-skip", cl::Hidden, cl::init(100), + cl::desc( + "For checksum-mismatch error check, skip the check if the number of " + "total selected function is smaller than the given number.")); + +static cl::opt<unsigned> ChecksumMismatchErrorThreshold( + "checksum-mismatch-error-threshold", cl::Hidden, cl::init(90), + cl::desc( + "For checksum-mismatch error check, error out if the percentage of " + "function mismatched-checksum is higher than the given percentage " + "threshold")); + static cl::opt<bool> CallsitePrioritizedInline( "sample-profile-prioritized-inline", cl::Hidden, @@ -630,6 +649,8 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> { std::vector<Function *> buildFunctionOrder(Module &M, LazyCallGraph &CG); std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(Module &M); void generateMDProfMetadata(Function &F); + bool errorIfHighChecksumMismatch(Module &M, ProfileSummaryInfo *PSI, + const SampleProfileMap &Profiles); /// Map from function name to Function *. Used to find the function from /// the function name. If the function name contains suffix, additional @@ -1190,7 +1211,7 @@ void SampleProfileLoader::findExternalInlineCandidate( CalleeSample->getContext().hasAttribute(ContextShouldBeInlined); if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold) continue; - + Function *Func = SymbolMap.lookup(CalleeSample->getFunction()); // Add to the import list only when it's defined out of module. if (!Func || Func->isDeclaration()) @@ -2184,6 +2205,61 @@ bool SampleProfileLoader::doInitialization(Module &M, return true; } +// Note that this is a module-level check. Even if one module is errored out, +// the entire build will be errored out. However, the user could make big +// changes to functions in single module but those changes might not be +// performance significant to the whole binary. Therefore, we use a conservative +// approach to make sure we only error out if it globally impacts the binary +// performance. To achieve this, we use heuristics to select a reasonable +// big set of functions that are supposed to be globally performance +// significant, only compute and check the mismatch within those functions. The +// function selection is based on two criteria: 1) The function is "hot" enough, +// which is tuned by a hotness-based flag(ChecksumMismatchFuncHotBlockSkip). 2) +// The num of function is large enough which is tuned by the +// ChecksumMismatchNumFuncSkip flag. +bool SampleProfileLoader::errorIfHighChecksumMismatch( + Module &M, ProfileSummaryInfo *PSI, const SampleProfileMap &Profiles) { + assert(FunctionSamples::ProfileIsProbeBased && + "Only support for probe-based profile"); + uint64_t TotalSelectedFunc = 0; + uint64_t NumMismatchedFunc = 0; + for (const auto &I : Profiles) { + const auto &FS = I.second; + const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID()); + if (!FuncDesc) + continue; + + // We want to select a set of functions that are globally performance + // significant, in other words, if those functions profiles are + // checksum-mismatched and dropped, the whole binary will likely be + // impacted, so here we use a hotness-based threshold to control the + // selection. + if (FS.getTotalSamples() >= + ChecksumMismatchFuncHotBlockSkip * PSI->getOrCompHotCountThreshold()) + continue; + + TotalSelectedFunc++; + if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) + NumMismatchedFunc++; + } + // Make sure that the num of selected function is not too small to distinguish + // from the user's benign changes. + if (TotalSelectedFunc < ChecksumMismatchNumFuncSkip) + return false; + + // Finally check the mismatch percentage against the threshold. + if (NumMismatchedFunc * 100 >= + TotalSelectedFunc * ChecksumMismatchErrorThreshold) { + auto &Ctx = M.getContext(); + const char *Msg = + "The FDO profile is too old and will cause big performance regression, " + "please drop the profile and re-collect a new one."; + Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg)); + return true; + } + return false; +} + void SampleProfileMatcher::findIRAnchors( const Function &F, std::map<LineLocation, StringRef> &IRAnchors) { // For inlined code, recover the original callsite and callee by finding the @@ -2715,6 +2791,12 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, ProfileSummary::PSK_Sample); PSI->refresh(); } + + // Error out if the profile checksum mismatch is too high. + if (FunctionSamples::ProfileIsProbeBased && + errorIfHighChecksumMismatch(M, PSI, Reader->getProfiles())) + return false; + // Compute the total number of samples collected in this profile. for (const auto &I : Reader->getProfiles()) TotalCollectedSamples += I.second.getTotalSamples(); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits