spatel created this revision. spatel added reviewers: hfinkel, davidxl, bkramer. spatel added a subscriber: cfe-commits. Herald added subscribers: mcrosier, aemerson.
__builtin_expect() is a GCC-derived builtin that's used as a hint for branch prediction: https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html The Clang/LLVM implementation of this feature introduced an LLVM intrinsic to convey the hint to the optimizer: https://marc.info/?l=llvm-commits&m=130997676129580&w=4 There are problems with this (and several were noted in the above thread, but it didn't change the outcome): 1. We created an intrinsic to improve perf, but the intrinsic can harm optimization by interfering with other passes. 2. To solve that, create a pass to always transform the intrinsic into metadata at a very early stage. But now every program is paying a compile-time tax for a feature that is rarely used. 3. The IR lowering uses profile weight metadata as the means for conveying the hint. But the hint is meant to be a programmer override for profile data. That is, "I don't care what the profile says; I want my code to use this source-level hint." So it should use a different kind of metadata, not profile weight. We added the inverse programmer hint as metadata - __builtin_unpredictable(): http://llvm.org/docs/LangRef.html#unpredictable-metadata http://reviews.llvm.org/D12341 so I think we can enhance that to solve this problem. This patch is an intermediate step. It doesn't try to solve #3 above, but it handles #1 and clears the way to deprecate the llvm.expect intrinsic and delete the LowerExpectIntrinsic pass (problem #2). This is part of solving: https://llvm.org/bugs/show_bug.cgi?id=27344 But to complete that, we need to make changes in SimplifyCFG and possibly other places, so that we're propagating and using the expect/unpredictable metadata as intended. Ref: D18133, D18220, rL264527, rL266442 http://reviews.llvm.org/D19299 Files: lib/CodeGen/CGBuiltin.cpp lib/CodeGen/CGStmt.cpp lib/CodeGen/CodeGenFunction.cpp test/CodeGen/builtin-expect.c
Index: test/CodeGen/builtin-expect.c =================================================================== --- test/CodeGen/builtin-expect.c +++ test/CodeGen/builtin-expect.c @@ -1,13 +1,15 @@ // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -O1 -disable-llvm-optzns | FileCheck %s --check-prefix=ALL --check-prefix=O1 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -O0 | FileCheck %s --check-prefix=ALL --check-prefix=O0 -// In all tests, make sure that no expect is generated if optimizations are off. -// If optimizations are on, generate the correct expect and preserve other necessary operations. +// In all tests, make sure that the builtin is gone. +// If optimizations are on, generate the correct expect metadata and preserve other necessary operations. +// If optimizations are off, no expect metadata is generated but other operations should be preserved. int expect_taken(int x) { // ALL-LABEL: define i32 @expect_taken -// O1: call i64 @llvm.expect.i64(i64 {{%.*}}, i64 1) -// O0-NOT: @llvm.expect +// ALL-NOT: builtin_expect +// O1: !prof [[BR_TRUE_METADATA:.+]] +// O0-NOT: !prof if (__builtin_expect (x, 1)) return 0; @@ -17,8 +19,9 @@ int expect_not_taken(int x) { // ALL-LABEL: define i32 @expect_not_taken -// O1: call i64 @llvm.expect.i64(i64 {{%.*}}, i64 0) -// O0-NOT: @llvm.expect +// ALL-NOT: builtin_expect +// O1: !prof [[BR_FALSE_METADATA:.+]] +// O0-NOT: !prof if (__builtin_expect (x, 0)) return 0; @@ -33,9 +36,10 @@ void expect_value_side_effects() { // ALL-LABEL: define void @expect_value_side_effects() // ALL: [[CALL:%.*]] = call i32 @y +// ALL-NOT: builtin_expect // O1: [[SEXT:%.*]] = sext i32 [[CALL]] to i64 -// O1: call i64 @llvm.expect.i64(i64 {{%.*}}, i64 [[SEXT]]) -// O0-NOT: @llvm.expect +// O1: !prof [[BR_TRUE_METADATA:.+]] +// O0-NOT: !prof if (__builtin_expect (x, y())) foo (); @@ -52,17 +56,18 @@ // ALL-LABEL: define i32 @main() // ALL: call void @isigprocmask() // ALL: [[CALL:%.*]] = call i64 (...) @bar() -// O1: call i64 @llvm.expect.i64(i64 0, i64 [[CALL]]) -// O0-NOT: @llvm.expect +// ALL-NOT: builtin_expect +// ALL-NOT: !prof (void) __builtin_expect((isigprocmask(), 0), bar()); } int switch_cond(int x) { // ALL-LABEL: define i32 @switch_cond -// O1: call i64 @llvm.expect.i64(i64 {{%.*}}, i64 5) -// O0-NOT: @llvm.expect +// ALL-NOT: builtin_expect +// O1: !prof [[SWITCH_METADATA:.+]] +// O0-NOT: !prof switch(__builtin_expect(x, 5)) { default: @@ -78,3 +83,7 @@ return 0; } +// O1: [[BR_TRUE_METADATA]] = !{!"branch_weights", i32 64, i32 4} +// O1: [[BR_FALSE_METADATA]] = !{!"branch_weights", i32 4, i32 64} +// O1: [[SWITCH_METADATA]] = !{!"branch_weights", i32 4, i32 4, i32 4, i32 4, i32 64} + Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -1313,20 +1313,45 @@ // create metadata that specifies that the branch is unpredictable. // Don't bother if not optimizing because that metadata would not be used. llvm::MDNode *Unpredictable = nullptr; + llvm::MDNode *Weights = nullptr; auto *Call = dyn_cast<CallExpr>(Cond); if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) { - auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl()); - if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) { + if (auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl())) { llvm::MDBuilder MDHelper(getLLVMContext()); - Unpredictable = MDHelper.createUnpredictable(); + if (FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) { + Unpredictable = MDHelper.createUnpredictable(); + } else if (FD->getBuiltinID() == Builtin::BI__builtin_expect) { + + // FIXME: builtin_expect should use the same metadata type as + // builtin_unpredictable and be handled above. For now, we're mimicking + // the LLVM behavior of the 'LowerExpectIntrinsic' pass. + + // HACK: Hardcode the taken/not-taken weights based on the existing LLVM + // default values. This code is expected to be very temporary. Once we + // have prepared LLVM to handle builtin_expect using 'unpredictable' + // metadata, this gets deleted. + + const int LikelyWeight = 64; + const int UnlikelyWeight = 4; + + llvm::Value *ExpectedVal = EmitScalarExpr(Call->getArg(1)); + auto *ExpectedConst = dyn_cast<llvm::ConstantInt>(ExpectedVal); + // If expecting the false case, set that side to the heavy weight. + if (ExpectedConst && ExpectedConst->isNullValue()) + Weights = MDHelper.createBranchWeights(UnlikelyWeight, LikelyWeight); + else + Weights = MDHelper.createBranchWeights(LikelyWeight, UnlikelyWeight); + + } } } // Create branch weights based on the number of times we get here and the // number of times the condition should be true. - uint64_t CurrentCount = std::max(getCurrentProfileCount(), TrueCount); - llvm::MDNode *Weights = - createProfileWeights(TrueCount, CurrentCount - TrueCount); + if (!Weights) { + uint64_t CurrentCount = std::max(getCurrentProfileCount(), TrueCount); + Weights = createProfileWeights(TrueCount, CurrentCount - TrueCount); + } // Emit the code with the fully general case. llvm::Value *CondV; Index: lib/CodeGen/CGStmt.cpp =================================================================== --- lib/CodeGen/CGStmt.cpp +++ lib/CodeGen/CGStmt.cpp @@ -1552,15 +1552,44 @@ // Don't bother if not optimizing because that metadata would not be used. auto *Call = dyn_cast<CallExpr>(S.getCond()); if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) { - auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl()); - if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) { + if (auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl())) { llvm::MDBuilder MDHelper(getLLVMContext()); - SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable, - MDHelper.createUnpredictable()); + if (FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) { + SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable, + MDHelper.createUnpredictable()); + } else if (FD->getBuiltinID() == Builtin::BI__builtin_expect) { + + // FIXME: builtin_expect should use the same metadata type as + // builtin_unpredictable and be handled above. For now, we're mimicking + // the LLVM behavior of the 'LowerExpectIntrinsic' pass. + + // HACK: Hardcode the taken/not-taken weights based on the existing LLVM + // default values. This code is expected to be very temporary. Once we + // have prepared LLVM to handle builtin_expect using 'unpredictable' + // metadata, this gets deleted. + + const int LikelyWeight = 64; + const int UnlikelyWeight = 4; + + llvm::Value *ExpectedVal = EmitScalarExpr(Call->getArg(1)); + if (auto *ExpectConst = dyn_cast<llvm::ConstantInt>(ExpectedVal)) { + // The +1 is for the default case. + SmallVector<uint32_t, 16> Weights(SwitchInsn->getNumCases() + 1, + UnlikelyWeight); + auto ExpectedCase = SwitchInsn->findCaseValue(ExpectConst); + if (ExpectedCase == SwitchInsn->case_default()) + Weights[0] = LikelyWeight; + else + Weights[ExpectedCase.getCaseIndex() + 1] = LikelyWeight; + + SwitchInsn->setMetadata(llvm::LLVMContext::MD_prof, + MDHelper.createBranchWeights(Weights)); + } + } } } - if (SwitchWeights) { + if (!SwitchInsn->getMetadata(llvm::LLVMContext::MD_prof) && SwitchWeights) { assert(SwitchWeights->size() == 1 + SwitchInsn->getNumCases() && "switch weights do not match switch cases"); // If there's only one jump destination there's no sense weighting it. Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -631,28 +631,22 @@ "cast"); return RValue::get(Result); } - case Builtin::BI__builtin_unpredictable: { - // Always return the argument of __builtin_unpredictable. LLVM does not - // handle this builtin. Metadata for this builtin should be added directly - // to instructions such as branches or switches that use it. - return RValue::get(EmitScalarExpr(E->getArg(0))); - } + + case Builtin::BI__builtin_unpredictable: case Builtin::BI__builtin_expect: { - Value *ArgValue = EmitScalarExpr(E->getArg(0)); - llvm::Type *ArgType = ArgValue->getType(); + // Always return the first argument. LLVM does not handle these builtins. + // Metadata for these builtins should be added directly to instructions such + // as branches or switches that use the builtin. + Value *Arg0 = EmitScalarExpr(E->getArg(0)); - Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); - // Don't generate llvm.expect on -O0 as the backend won't use it for - // anything. - // Note, we still IRGen ExpectedValue because it could have side-effects. - if (CGM.getCodeGenOpts().OptimizationLevel == 0) - return RValue::get(ArgValue); + // We must IRGen the expected value of builtin_expect because it could have + // side-effects. + if (BuiltinID == Builtin::BI__builtin_expect) + EmitScalarExpr(E->getArg(1)); - Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); - Value *Result = - Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); - return RValue::get(Result); + return RValue::get(Arg0); } + case Builtin::BI__builtin_assume_aligned: { Value *PtrValue = EmitScalarExpr(E->getArg(0)); Value *OffsetValue =
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits