https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/145578
>From e2f673682594ceafc73e5b7765934caabffd4907 Mon Sep 17 00:00:00 2001 From: Mircea Trofin <mtro...@google.com> Date: Tue, 24 Jun 2025 09:50:40 -0700 Subject: [PATCH] [pgo] add means to specify "unknown" MD_prof --- llvm/include/llvm/IR/ProfDataUtils.h | 12 +++ llvm/lib/IR/ProfDataUtils.cpp | 22 +++++ llvm/lib/IR/Verifier.cpp | 46 +++++++--- llvm/test/Verifier/branch-weight.ll | 130 +++++++++++++++++++++++++-- 4 files changed, 187 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h index 8e8d069b836f1..89fa7f735f5d4 100644 --- a/llvm/include/llvm/IR/ProfDataUtils.h +++ b/llvm/include/llvm/IR/ProfDataUtils.h @@ -133,6 +133,18 @@ LLVM_ABI bool extractProfTotalWeight(const Instruction &I, LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights, bool IsExpected); +/// Specify that the branch weights for this terminator cannot be known at +/// compile time. This should only be called by passes, and never as a default +/// behavior in e.g. MDBuilder. The goal is to use this info to validate passes +/// do not accidentally drop profile info, and this API is called in cases where +/// the pass explicitly cannot provide that info. Defaulting it in would hide +/// bugs where the pass forgets to transfer over or otherwise specify profile +/// info. +LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I); + +LLVM_ABI bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD); +LLVM_ABI bool hasExplicitlyUnknownBranchWeights(const Instruction &I); + /// Scaling the profile data attached to 'I' using the ratio of S/T. LLVM_ABI void scaleProfData(Instruction &I, uint64_t S, uint64_t T); diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index 21524eb840539..1585771c0d0ae 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -44,6 +44,8 @@ constexpr unsigned MinBWOps = 3; // the minimum number of operands for MD_prof nodes with value profiles constexpr unsigned MinVPOps = 5; +const char *UnknownBranchWeightsMarker = "unknown"; + // We may want to add support for other MD_prof types, so provide an abstraction // for checking the metadata type. bool isTargetMD(const MDNode *ProfData, const char *Name, unsigned MinOps) { @@ -232,6 +234,26 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalVal) { return extractProfTotalWeight(I.getMetadata(LLVMContext::MD_prof), TotalVal); } +void setExplicitlyUnknownBranchWeights(Instruction &I) { + MDBuilder MDB(I.getContext()); + I.setMetadata(LLVMContext::MD_prof, + MDNode::get(I.getContext(), + MDB.createString(UnknownBranchWeightsMarker))); +} + +bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD) { + if (MD.getNumOperands() != 1) + return false; + return MD.getOperand(0).equalsStr(UnknownBranchWeightsMarker); +} + +bool hasExplicitlyUnknownBranchWeights(const Instruction &I) { + auto *MD = I.getMetadata(LLVMContext::MD_prof); + if (!MD) + return false; + return isExplicitlyUnknownBranchWeightsMetadata(*MD); +} + void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights, bool IsExpected) { MDBuilder MDB(I.getContext()); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index ae95e3e2bff8d..98fc31f75a031 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2508,6 +2508,12 @@ void Verifier::verifyFunctionMetadata( for (const auto &Pair : MDs) { if (Pair.first == LLVMContext::MD_prof) { MDNode *MD = Pair.second; + if (isExplicitlyUnknownBranchWeightsMetadata(*MD)) { + CheckFailed("'unknown' !prof metadata should appear only on " + "instructions supporting the 'branch_weights' metadata", + MD); + continue; + } Check(MD->getNumOperands() >= 2, "!prof annotations should have no less than 2 operands", MD); @@ -4964,6 +4970,30 @@ void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) { } void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { + auto GetBranchingTerminatorNumOperands = [&]() { + unsigned ExpectedNumOperands = 0; + if (BranchInst *BI = dyn_cast<BranchInst>(&I)) + ExpectedNumOperands = BI->getNumSuccessors(); + else if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) + ExpectedNumOperands = SI->getNumSuccessors(); + else if (isa<CallInst>(&I)) + ExpectedNumOperands = 1; + else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(&I)) + ExpectedNumOperands = IBI->getNumDestinations(); + else if (isa<SelectInst>(&I)) + ExpectedNumOperands = 2; + else if (CallBrInst *CI = dyn_cast<CallBrInst>(&I)) + ExpectedNumOperands = CI->getNumSuccessors(); + return ExpectedNumOperands; + }; + if (isExplicitlyUnknownBranchWeightsMetadata(*MD)) { + Check(GetBranchingTerminatorNumOperands() != 0 || isa<InvokeInst>(I), + "'unknown' !prof should only appear on instructions on which " + "'branch_weigths' would", + MD); + return; + } + Check(MD->getNumOperands() >= 2, "!prof annotations should have no less than 2 operands", MD); @@ -4981,20 +5011,8 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { Check(NumBranchWeights == 1 || NumBranchWeights == 2, "Wrong number of InvokeInst branch_weights operands", MD); } else { - unsigned ExpectedNumOperands = 0; - if (BranchInst *BI = dyn_cast<BranchInst>(&I)) - ExpectedNumOperands = BI->getNumSuccessors(); - else if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) - ExpectedNumOperands = SI->getNumSuccessors(); - else if (isa<CallInst>(&I)) - ExpectedNumOperands = 1; - else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(&I)) - ExpectedNumOperands = IBI->getNumDestinations(); - else if (isa<SelectInst>(&I)) - ExpectedNumOperands = 2; - else if (CallBrInst *CI = dyn_cast<CallBrInst>(&I)) - ExpectedNumOperands = CI->getNumSuccessors(); - else + const unsigned ExpectedNumOperands = GetBranchingTerminatorNumOperands(); + if (ExpectedNumOperands == 0) CheckFailed("!prof branch_weights are not allowed for this instruction", MD); diff --git a/llvm/test/Verifier/branch-weight.ll b/llvm/test/Verifier/branch-weight.ll index 18e64b01a3d42..5be33098f4938 100644 --- a/llvm/test/Verifier/branch-weight.ll +++ b/llvm/test/Verifier/branch-weight.ll @@ -1,21 +1,65 @@ ; Test MD_prof validation ; RUN: split-file %s %t + ; RUN: opt -passes=verify %t/valid.ll --disable-output -; RUN: not opt -passes=verify %t/invalid1.ll --disable-output 2>&1 | FileCheck %s -; RUN: not opt -passes=verify %t/invalid2.ll --disable-output 2>&1 | FileCheck %s + +; RUN: not opt -passes=verify %t/wrong-count.ll --disable-output 2>&1 | FileCheck %s --check-prefix=WRONG-COUNT +; RUN: not opt -passes=verify %t/invalid-name1.ll --disable-output 2>&1 | FileCheck %s +; RUN: not opt -passes=verify %t/invalid-name2.ll --disable-output 2>&1 | FileCheck %s + +; RUN: opt -passes=verify %t/unknown-correct.ll --disable-output + +; RUN: not opt -passes=verify %t/unknown-invalid.ll --disable-output 2>&1 | FileCheck %s +; RUN: not opt -passes=verify %t/unknown-on-function1.ll --disable-output 2>&1 | FileCheck %s --check-prefix=ON-FUNCTION1 +; RUN: not opt -passes=verify %t/unknown-on-function2.ll --disable-output 2>&1 | FileCheck %s --check-prefix=ON-FUNCTION2 +; RUN: not opt -passes=verify %t/invalid-unknown-placement.ll --disable-output 2>&1 | FileCheck %s --check-prefix=INVALID-UNKNOWN-PLACEMENT ;--- valid.ll -define void @test(i1 %0) { - br i1 %0, label %2, label %3, !prof !0 -2: +declare void @to_invoke() +declare i32 @__gxx_personality_v0(...) + +define void @invoker() personality ptr @__gxx_personality_v0 { + invoke void @to_invoke() to label %exit unwind label %lpad, !prof !0 +lpad: + %ll = landingpad {ptr, i32} + cleanup ret void -3: +exit: ret void } + +define i32 @test(i32 %a) { + %c = icmp eq i32 %a, 0 + br i1 %c, label %yes, label %exit, !prof !0 +yes: + switch i32 %a, label %exit [ i32 1, label %case_b + i32 2, label %case_c], !prof !1 +case_b: + br label %exit +case_c: + br label %exit +exit: + %r = select i1 %c, i32 1, i32 2, !prof !0 + ret i32 %r +} !0 = !{!"branch_weights", i32 1, i32 2} +!1 = !{!"branch_weights", i32 1, i32 2, i32 3} + +;--- wrong-count.ll +define void @test(i32 %a) { + %c = icmp eq i32 %a, 0 + br i1 %c, label %yes, label %no, !prof !0 +yes: + ret void +no: + ret void +} +!0 = !{!"branch_weights", i32 1, i32 2, i32 3} -;--- invalid1.ll +; WRONG-COUNT: Wrong number of operands + +;--- invalid-name1.ll define void @test(i1 %0) { br i1 %0, label %2, label %3, !prof !0 2: @@ -25,7 +69,7 @@ define void @test(i1 %0) { } !0 = !{!"invalid", i32 1, i32 2} -;--- invalid2.ll +;--- invalid-name2.ll define void @test(i1 %0) { br i1 %0, label %2, label %3, !prof !0 2: @@ -36,4 +80,72 @@ define void @test(i1 %0) { !0 = !{!"function_entry_count", i32 1} -; CHECK: expected either branch_weights or VP profile name \ No newline at end of file +; CHECK: expected either branch_weights or VP profile name + +;--- unknown-correct.ll +declare void @to_invoke() +declare i32 @__gxx_personality_v0(...) + +define void @invoker() personality ptr @__gxx_personality_v0 { + invoke void @to_invoke() to label %exit unwind label %lpad, !prof !0 +lpad: + %ll = landingpad {ptr, i32} + cleanup + ret void +exit: + ret void +} + +define i32 @test(i32 %a) { + %c = icmp eq i32 %a, 0 + br i1 %c, label %yes, label %exit, !prof !0 +yes: + switch i32 %a, label %exit [ i32 1, label %case_b + i32 2, label %case_c], !prof !0 +case_b: + br label %exit +case_c: + br label %exit +exit: + %r = select i1 %c, i32 1, i32 2, !prof !0 + ret i32 %r +} + +!0 = !{!"unknown"} + +;--- unknown-invalid.ll +define void @test(i32 %a) { + %c = icmp eq i32 %a, 0 + br i1 %c, label %yes, label %no, !prof !0 +yes: + ret void +no: + ret void +} + +!0 = !{!"unknown", i32 12, i32 67} + +;--- unknown-on-function1.ll +define void @test() !prof !0 { + ret void +} + +!0 = !{!"unknown"} +; ON-FUNCTION1: 'unknown' !prof metadata should appear only on instructions supporting the 'branch_weights' metadata + +;--- unknown-on-function2.ll +define void @test() !prof !0 { + ret void +} + +!0 = !{!"unknown", i64 123} +; ON-FUNCTION2: first operand should be 'function_entry_count' or 'synthetic_function_entry_count' + +;--- invalid-unknown-placement.ll +define i32 @test() { + %r = add i32 1, 2, !prof !0 + ret i32 %r +} +!0 = !{!"unknown"} + +; INVALID-UNKNOWN-PLACEMENT: 'unknown' !prof should only appear on instructions on which 'branch_weigths' would \ No newline at end of file _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits