gulfem updated this revision to Diff 324144. gulfem marked an inline comment as not done. gulfem added a comment. Herald added a subscriber: mgorny.
Implement it as a separate pass and apply it to user-defined lookup tables as well. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D94355/new/ https://reviews.llvm.org/D94355 Files: clang/test/CodeGen/switch-to-lookup-table.c llvm/docs/Passes.rst llvm/include/llvm/InitializePasses.h llvm/include/llvm/Transforms/Scalar.h llvm/include/llvm/Transforms/Utils/RelLookupTableGenerator.h llvm/lib/Passes/PassBuilder.cpp llvm/lib/Passes/PassRegistry.def llvm/lib/Transforms/IPO/PassManagerBuilder.cpp llvm/lib/Transforms/Utils/CMakeLists.txt llvm/lib/Transforms/Utils/RelLookupTableGenerator.cpp llvm/lib/Transforms/Utils/Utils.cpp llvm/test/Other/pass-pipelines.ll llvm/test/Transforms/SimplifyCFG/X86/relative_lookup_table.ll llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn
Index: llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn =================================================================== --- llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn +++ llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn @@ -60,6 +60,7 @@ "NameAnonGlobals.cpp", "PredicateInfo.cpp", "PromoteMemoryToRegister.cpp", + "RelLookupTableGenerator.cpp" "SSAUpdater.cpp", "SSAUpdaterBulk.cpp", "SampleProfileLoaderBaseUtil.cpp", Index: llvm/test/Transforms/SimplifyCFG/X86/relative_lookup_table.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimplifyCFG/X86/relative_lookup_table.ll @@ -0,0 +1,187 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -rel-lookup-table-generator -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +; RUN: opt < %s -passes=rel-lookup-table-generator -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@.str = private unnamed_addr constant [5 x i8] c"zero\00", align 1 +@.str.1 = private unnamed_addr constant [4 x i8] c"one\00", align 1 +@.str.2 = private unnamed_addr constant [4 x i8] c"two\00", align 1 +@.str.3 = private unnamed_addr constant [8 x i8] c"default\00", align 1 +@.str.4 = private unnamed_addr constant [6 x i8] c"three\00", align 1 +@.str.5 = private unnamed_addr constant [5 x i8] c"str1\00", align 1 +@.str.6 = private unnamed_addr constant [5 x i8] c"str2\00", align 1 +@.str.7 = private unnamed_addr constant [12 x i8] c"singlevalue\00", align 1 + +@switch.table.string_table = private unnamed_addr constant [3 x i8*] + [ + i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), + i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), + i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0) + ], align 8 + +@switch.table.string_table_holes = private unnamed_addr constant [4 x i8*] + [ + i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), + i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.3, i64 0, i64 0), + i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), + i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.4, i64 0, i64 0) + ], align 8 + +@switch.table.no_dso_local = private unnamed_addr constant [3 x i32*] [i32* @a, i32* @b, i32* @c], align 8 + +@switch.table.single_value = private unnamed_addr constant [3 x i8*] + [ + i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), + i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), + i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0) + ], align 8 + +; Integer pointer table lookup +; CHECK: @switch.table.no_dso_local = private unnamed_addr constant [3 x i32*] [i32* @a, i32* @b, i32* @c], align + +; Relative string table lookup +; CHECK: @reltable.string_table = private unnamed_addr constant [3 x i32] +; CHECK-SAME: [ +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([5 x i8]* @.str to i64), i64 ptrtoint ([3 x i32]* @reltable.string_table to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([4 x i8]* @.str.1 to i64), i64 ptrtoint ([3 x i32]* @reltable.string_table to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([4 x i8]* @.str.2 to i64), i64 ptrtoint ([3 x i32]* @reltable.string_table to i64)) to i32) +; CHECK-SAME: ], align 4 + +; Relative string table lookup that where are filled with relative offset to default values +; CHECK: @reltable.string_table_holes = private unnamed_addr constant [4 x i32] +; CHECK-SAME: [ +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([5 x i8]* @.str to i64), i64 ptrtoint ([4 x i32]* @reltable.string_table_holes to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([8 x i8]* @.str.3 to i64), i64 ptrtoint ([4 x i32]* @reltable.string_table_holes to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([4 x i8]* @.str.2 to i64), i64 ptrtoint ([4 x i32]* @reltable.string_table_holes to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([6 x i8]* @.str.4 to i64), i64 ptrtoint ([4 x i32]* @reltable.string_table_holes to i64)) to i32) +; CHECK-SAME: ], align 4 + +; Single value check +; CHECK: @reltable.single_value = private unnamed_addr constant [3 x i32] +; CHECK-SAME: [ +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([5 x i8]* @.str to i64), i64 ptrtoint ([3 x i32]* @reltable.single_value to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([4 x i8]* @.str.1 to i64), i64 ptrtoint ([3 x i32]* @reltable.single_value to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([4 x i8]* @.str.2 to i64), i64 ptrtoint ([3 x i32]* @reltable.single_value to i64)) to i32) +; CHECK-SAME: ], align 4 + +; Switch used to return a string. +; Relative lookup table should be generated. +define i8* @string_table(i32 %cond) { + ; CHECK-LABEL: @string_table( + ; CHECK-NEXT: entry: + ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[COND:%.*]], 3 + ; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] + ; CHECK: switch.lookup: + ; CHECK-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i32 %cond, 2 + ; CHECK-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call i8* @llvm.load.relative.i32(i8* bitcast ([3 x i32]* @reltable.string_table to i8*), i32 [[RELTABLE_SHIFT]]) + ; CHECK-NEXT: ret i8* [[RELTABLE_INTRINSIC]] + ; CHECK: return: + ; CHECK-NEXT: ret i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.3, i64 0, i64 0) + ; +entry: + %0 = icmp ult i32 %cond, 3 + br i1 %0, label %switch.lookup, label %return + +switch.lookup: ; preds = %entry + %switch.gep = getelementptr inbounds [3 x i8*], [3 x i8*]* @switch.table.string_table, i32 0, i32 %cond + %switch.load = load i8*, i8** %switch.gep, align 8 + ret i8* %switch.load + +return: ; preds = %entry + ret i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.3, i64 0, i64 0) +} + +; Fill the holes with offset of the default value in the relative lookup table. +define i8* @string_table_holes(i32 %cond) { +; CHECK-LABEL: @string_table_holes( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[COND:%.*]], 4 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i32 [[COND:%.*]], 2 +; CHECK-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call i8* @llvm.load.relative.i32(i8* bitcast ([4 x i32]* @reltable.string_table_holes to i8*), i32 [[RELTABLE_SHIFT]]) +; CHECK-NEXT: ret i8* [[RELTABLE_INTRINSIC]] +; CHECK: return: +; CHECK-NEXT: ret i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.3, i64 0, i64 0) +; +entry: + %0 = icmp ult i32 %cond, 4 + br i1 %0, label %switch.lookup, label %return + +switch.lookup: ; preds = %entry + %switch.gep = getelementptr inbounds [4 x i8*], [4 x i8*]* @switch.table.string_table_holes, i32 0, i32 %cond + %switch.load = load i8*, i8** %switch.gep, align 8 + ret i8* %switch.load + +return: ; preds = %entry + ret i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.3, i64 0, i64 0) +} + +@a = external global i32, align 4 +@b = external global i32, align 4 +@c = external global i32, align 4 +@d = external global i32, align 4 + +define i32* @no_dso_local(i32 %cond) { +; CHECK-LABEL: @no_dso_local( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[COND:%.*]], 3 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32*], [3 x i32*]* @switch.table.no_dso_local, i32 0, i32 [[COND:%.*]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32*, i32** [[SWITCH_GEP]], align 8 +; CHECK-NEXT: ret i32* [[SWITCH_LOAD]] +; CHECK: return: +; CHECK-NEXT: ret i32* @d +; +entry: + %0 = icmp ult i32 %cond, 3 + br i1 %0, label %switch.lookup, label %return + +switch.lookup: ; preds = %entry + %switch.gep = getelementptr inbounds [3 x i32*], [3 x i32*]* @switch.table.no_dso_local, i32 0, i32 %cond + %switch.load = load i32*, i32** %switch.gep, align 8 + ret i32* %switch.load + +return: ; preds = %entry + ret i32* @d +} + +; Single value check +; If there is a lookup table, where each element contains the same value, +; a relative lookup should not be generated +define void @single_value(i32 %cond) { +; CHECK-LABEL: @single_value( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[COND:%.*]], 3 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i32 [[COND:%.*]], 2 +; CHECK-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call i8* @llvm.load.relative.i32(i8* bitcast ([3 x i32]* @reltable.single_value to i8*), i32 [[RELTABLE_SHIFT]]) +; CHECK: sw.epilog: +; CHECK-NEXT: [[STR1:%.*]] = phi i8* [ getelementptr inbounds ([5 x i8], [5 x i8]* @.str.5, i64 0, i64 0), %entry ], [ getelementptr inbounds ([12 x i8], [12 x i8]* @.str.7, i64 0, i64 0), %switch.lookup ] +; CHECK-NEXT: [[STR2:%.*]] = phi i8* [ getelementptr inbounds ([5 x i8], [5 x i8]* @.str.6, i64 0, i64 0), %entry ], [ [[RELTABLE_INTRINSIC]], [[SWITCH_LOOKUP]] ] +; CHECK-NEXT: ret void + +entry: + %0 = icmp ult i32 %cond, 3 + br i1 %0, label %switch.lookup, label %sw.epilog + +switch.lookup: ; preds = %entry + %switch.gep = getelementptr inbounds [3 x i8*], [3 x i8*]* @switch.table.single_value, i32 0, i32 %cond + %switch.load = load i8*, i8** %switch.gep, align 8 + br label %sw.epilog + +sw.epilog: ; preds = %switch.lookup, %entry + %str1.0 = phi i8* [ getelementptr inbounds ([5 x i8], [5 x i8]* @.str.5, i64 0, i64 0), %entry ], [ getelementptr inbounds ([12 x i8], [12 x i8]* @.str.7, i64 0, i64 0), %switch.lookup ] + %str2.0 = phi i8* [ getelementptr inbounds ([5 x i8], [5 x i8]* @.str.6, i64 0, i64 0), %entry ], [ %switch.load, %switch.lookup ] + ret void +} + +!llvm.module.flags = !{!0, !1} +!0 = !{i32 7, !"PIC Level", i32 2} +!1 = !{i32 1, !"Code Model", i32 1} +!4 = !{!"any pointer", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} Index: llvm/test/Other/pass-pipelines.ll =================================================================== --- llvm/test/Other/pass-pipelines.ll +++ llvm/test/Other/pass-pipelines.ll @@ -81,7 +81,6 @@ ; Reduce the size of the IR ASAP after the inliner. ; CHECK-O2-NEXT: Global Variable Optimizer ; CHECK-O2: Dead Global Elimination -; Next is the late function pass pipeline. ; CHECK-O2: FunctionPass Manager ; CHECK-O2-NOT: Manager ; We rotate loops prior to vectorization. @@ -90,6 +89,9 @@ ; CHECK-O2-NOT: Manager ; CHECK-O2: Loop Vectorization ; CHECK-O2-NOT: Manager +; CHECK-O2: Relative Lookup Table Generator +; Next is the late function pass pipeline. +; CHECK-O2: FunctionPass Manager ; CHECK-O2: SLP Vectorizer ; CHECK-O2-NOT: Manager ; After vectorization we do partial unrolling. Index: llvm/lib/Transforms/Utils/Utils.cpp =================================================================== --- llvm/lib/Transforms/Utils/Utils.cpp +++ llvm/lib/Transforms/Utils/Utils.cpp @@ -37,6 +37,7 @@ initializeLowerSwitchLegacyPassPass(Registry); initializeNameAnonGlobalLegacyPassPass(Registry); initializePromoteLegacyPassPass(Registry); + initializeRelLookupTableConverterPassPass(Registry); initializeStripNonLineTableDebugLegacyPassPass(Registry); initializeUnifyFunctionExitNodesLegacyPassPass(Registry); initializeMetaRenamerPass(Registry); Index: llvm/lib/Transforms/Utils/RelLookupTableGenerator.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Utils/RelLookupTableGenerator.cpp @@ -0,0 +1,233 @@ +//===- RelLookupTableGeneratorPass - Rel Lookup Table Gen Pass ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements relative lookup table generator that converts +// lookup tables to relative lookup tables to make them PIC-friendly. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/RelLookupTableGenerator.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +namespace llvm { + +bool shouldGenerateRelLookupTables(Module &M) { + // If not in x86 or aarch64 mode, do not generate a relative lookup table. + Triple TargetTriple(M.getTargetTriple()); + if (!(TargetTriple.getArch() == Triple::x86_64 || + TargetTriple.getArch() == Triple::aarch64)) + return false; + + // If not tiny or small code model, do not generate a relative lookup table. + Optional<CodeModel::Model> CodeModel = M.getCodeModel(); + if (!(CodeModel == CodeModel::Tiny || CodeModel == CodeModel::Small)) + return false; + + // If not in PIC mode, do not generate a relative lookup table. + if (M.getPICLevel() == PICLevel::NotPIC) + return false; + + return true; +} + +bool shouldGenerateRelLookupTableForGlobal(GlobalVariable &GlobalVar) { + if (!GlobalVar.hasInitializer() || + !isa<ConstantArray>(GlobalVar.getInitializer())) + return false; + + ConstantArray *Array = dyn_cast<ConstantArray>(GlobalVar.getInitializer()); + // If values are not pointers, do not generate a relative lookup table. + if (!Array->getType()->getElementType()->isPointerTy()) + return false; + + for (Use &Operand : Array->operands()) { + GlobalVariable *GlobalVarOp = dyn_cast<GlobalVariable>(Operand); + /// If any of the pointer values in the lookup table is not a global value + /// or dso_local, do not generate a relative lookup table. + if (GlobalVarOp && + !(GlobalVarOp->isDSOLocal() || GlobalVarOp->hasLocalLinkage())) + return false; + + ConstantExpr *CE = dyn_cast<ConstantExpr>(Operand); + if (!CE || CE->getOpcode() != Instruction::GetElementPtr) + return false; + + GlobalValue *Pointer = dyn_cast<GlobalValue>(CE->getOperand(0)); + if (!Pointer) + return false; + + if (!(Pointer->isDSOLocal() || Pointer->hasLocalLinkage())) + return false; + } + + /// If lookup table has more than one user, + /// do not generate a relative lookup table. + if (!GlobalVar.hasOneUser()) + return false; + + return true; +} + +GlobalVariable *generateRelLookupTable(Function &Func, + GlobalVariable &LookupTable) { + Module &Mod = *Func.getParent(); + ConstantArray *LookupTableArr = + dyn_cast<ConstantArray>(LookupTable.getInitializer()); + unsigned NumElts = LookupTableArr->getType()->getNumElements(); + ArrayType *IntArrayTy = + ArrayType::get(Type::getInt32Ty(Mod.getContext()), NumElts); + GlobalVariable *RelLookupTable = + new GlobalVariable(Mod, IntArrayTy, + /*isConstant=*/true, GlobalVariable::PrivateLinkage, + nullptr, "reltable." + Func.getName()); + + uint64_t Idx = 0; + SmallVector<Constant *, 64> RelLookupTableContents(NumElts); + + for (Use &Operand : LookupTableArr->operands()) { + Constant *Element = cast<Constant>(Operand); + Type *IntPtrTy = Mod.getDataLayout().getIntPtrType(Mod.getContext()); + Constant *Base = llvm::ConstantExpr::getPtrToInt(RelLookupTable, IntPtrTy); + Constant *Target = llvm::ConstantExpr::getPtrToInt(Element, IntPtrTy); + Constant *Sub = llvm::ConstantExpr::getSub(Target, Base); + Constant *RelOffset = + llvm::ConstantExpr::getTrunc(Sub, Type::getInt32Ty(Mod.getContext())); + RelLookupTableContents[Idx++] = RelOffset; + } + + Constant *Initializer = + ConstantArray::get(IntArrayTy, RelLookupTableContents); + RelLookupTable->setInitializer(Initializer); + RelLookupTable->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + RelLookupTable->setAlignment(llvm::Align(4)); + return RelLookupTable; +} + +GlobalVariable *canGenerateRelLookupTable(GlobalVariable &LookupTable) { + User *U = LookupTable.use_begin()->getUser(); + GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U); + if (!GEP) + return nullptr; + + LoadInst *Load = dyn_cast<LoadInst>(GEP->getNextNode()); + if (!Load) + return nullptr; + + Module &Mod = *LookupTable.getParent(); + BasicBlock *BB = GEP->getParent(); + IRBuilder<> Builder(BB); + Function &Func = *BB->getParent(); + + // Generate an array that consists of relative offsets + GlobalVariable *RelLookupTable = generateRelLookupTable(Func, LookupTable); + + // Place load.relative intrinsic call after GEP + Builder.SetInsertPoint(GEP); + Value *Index = GEP->getOperand(2); + IntegerType *IntTy = cast<IntegerType>(Index->getType()); + Value *Offset = + Builder.CreateShl(Index, ConstantInt::get(IntTy, 2), "reltable.shift"); + + Function *LoadRelIntrinsic = llvm::Intrinsic::getDeclaration( + &Mod, Intrinsic::load_relative, {Index->getType()}); + + Constant *Base = + llvm::ConstantExpr::getBitCast(RelLookupTable, Builder.getInt8PtrTy()); + // Call load relative intrinsic that computes the target address + // by adding base address (lookup table address) and relative offset. + CallInst *Call = + CallInst::Create(LoadRelIntrinsic, {Base, Offset}, "reltable.intrinsic"); + + // Replace load instruction with load.relative intrinsic call. + ReplaceInstWithInst(Load, Call); + + // Remove GEP instruction. + GEP->eraseFromParent(); + return RelLookupTable; +} + +// Convert lookup tables to relative lookup tables in the module. +bool generateRelativeLookupTables(Module &M) { + bool Changed = false; + + if (!shouldGenerateRelLookupTables(M)) + return false; + + for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); + GVI != E;) { + GlobalVariable *GlobalVar = &*GVI++; + + if (!shouldGenerateRelLookupTableForGlobal(*GlobalVar)) + continue; + + GlobalVariable *RelLookupTable = canGenerateRelLookupTable(*GlobalVar); + if (!RelLookupTable) + continue; + + // Remove the original lookup table. + GlobalVar->eraseFromParent(); + Changed = true; + } + + return Changed; +} + +} // end namespace llvm + +namespace { + +// Pass that converts lookup tables to relative lookup tables +class RelLookupTableConverterPass : public ModulePass { + +public: + /// Pass identification, replacement for typeid + static char ID; + + /// Specify pass name for debug output + StringRef getPassName() const override { + return "Relative Lookup Table Generator"; + } + + RelLookupTableConverterPass() : ModulePass(ID) {} + + bool runOnModule(Module &M) override { + generateRelativeLookupTables(M); + return false; + } +}; + +char RelLookupTableConverterPass::ID = 0; + +} // anonymous namespace + +PreservedAnalyses RelLookupTableGeneratorPass::run(Module &M, + ModuleAnalysisManager &AM) { + if (!generateRelativeLookupTables(M)) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} + +INITIALIZE_PASS_BEGIN(RelLookupTableConverterPass, "rel-lookup-table-generator", + "Generate relative lookup tables", false, false) +INITIALIZE_PASS_END(RelLookupTableConverterPass, "rel-lookup-table-generator", + "Generate relative lookup tables", false, false) + +namespace llvm { +ModulePass *createRelLookupTableGeneratorPass() { + return new RelLookupTableConverterPass(); +} +} // end namespace llvm Index: llvm/lib/Transforms/Utils/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Utils/CMakeLists.txt +++ llvm/lib/Transforms/Utils/CMakeLists.txt @@ -53,6 +53,7 @@ NameAnonGlobals.cpp PredicateInfo.cpp PromoteMemoryToRegister.cpp + RelLookupTableGenerator.cpp ScalarEvolutionExpander.cpp StripGCRelocates.cpp SSAUpdater.cpp Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -827,6 +827,8 @@ .hoistCommonInsts(true) .sinkCommonInsts(true))); + MPM.add(createRelLookupTableGeneratorPass()); + if (SLPVectorize) { MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. if (OptLevel > 1 && ExtraVectorizerPasses) { Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -63,8 +63,8 @@ MODULE_PASS("inferattrs", InferFunctionAttrsPass()) MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass()) MODULE_PASS("inliner-wrapper-no-mandatory-first", ModuleInlinerWrapperPass( - getInlineParams(), - DebugLogging, + getInlineParams(), + DebugLogging, false)) MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass()) MODULE_PASS("instrorderfile", InstrOrderFilePass()) @@ -93,6 +93,7 @@ MODULE_PASS("print-must-be-executed-contexts", MustBeExecutedContextPrinterPass(dbgs())) MODULE_PASS("print-stack-safety", StackSafetyGlobalPrinterPass(dbgs())) MODULE_PASS("print<module-debuginfo>", ModuleDebugInfoPrinterPass(dbgs())) +MODULE_PASS("rel-lookup-table-generator", RelLookupTableGeneratorPass()) MODULE_PASS("rewrite-statepoints-for-gc", RewriteStatepointsForGC()) MODULE_PASS("rewrite-symbols", RewriteSymbolPass()) MODULE_PASS("rpo-function-attrs", ReversePostOrderFunctionAttrsPass()) @@ -282,7 +283,7 @@ FUNCTION_PASS("print<domfrontier>", DominanceFrontierPrinterPass(dbgs())) FUNCTION_PASS("print<func-properties>", FunctionPropertiesPrinterPass(dbgs())) FUNCTION_PASS("print<inline-cost>", InlineCostAnnotationPrinterPass(dbgs())) -FUNCTION_PASS("print<inliner-size-estimator>", +FUNCTION_PASS("print<inliner-size-estimator>", InlineSizeEstimatorAnalysisPrinterPass(dbgs())) FUNCTION_PASS("print<loops>", LoopPrinterPass(dbgs())) FUNCTION_PASS("print<memoryssa>", MemorySSAPrinterPass(dbgs())) Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -227,6 +227,7 @@ #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/MetaRenamer.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" +#include "llvm/Transforms/Utils/RelLookupTableGenerator.h" #include "llvm/Transforms/Utils/StripGCRelocates.h" #include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" Index: llvm/include/llvm/Transforms/Utils/RelLookupTableGenerator.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Utils/RelLookupTableGenerator.h @@ -0,0 +1,33 @@ +//===-- RelLookupTableGeneratorPass.h - Rel Table Gen --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements relative lookup table generator that converts +// lookup tables to relative lookup tables to make them PIC-friendly. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_RELLOOKUPTABLEGENERATOR_H +#define LLVM_TRANSFORMS_UTILS_RELLOOKUPTABLEGENERATOR_H + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +// Simple pass that converts lookup tables to relative lookup tables. +class RelLookupTableGeneratorPass + : public PassInfoMixin<RelLookupTableGeneratorPass> { +public: + RelLookupTableGeneratorPass() = default; + + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_RELLOOKUPTABLEGENERATOR_H Index: llvm/include/llvm/Transforms/Scalar.h =================================================================== --- llvm/include/llvm/Transforms/Scalar.h +++ llvm/include/llvm/Transforms/Scalar.h @@ -517,6 +517,7 @@ ///===---------------------------------------------------------------------===// ModulePass *createNameAnonGlobalPass(); +ModulePass *createRelLookupTableGeneratorPass(); ModulePass *createCanonicalizeAliasesPass(); //===----------------------------------------------------------------------===// Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -318,6 +318,7 @@ void initializeMustExecutePrinterPass(PassRegistry&); void initializeMustBeExecutedContextPrinterPass(PassRegistry&); void initializeNameAnonGlobalLegacyPassPass(PassRegistry&); +void initializeRelLookupTableConverterPassPass(PassRegistry &); void initializeUniqueInternalLinkageNamesLegacyPassPass(PassRegistry &); void initializeNaryReassociateLegacyPassPass(PassRegistry&); void initializeNewGVNLegacyPassPass(PassRegistry&); Index: llvm/docs/Passes.rst =================================================================== --- llvm/docs/Passes.rst +++ llvm/docs/Passes.rst @@ -973,6 +973,11 @@ at 2), which effectively gives values in deep loops higher rank than values not in loops. +``-rel-lookup-table-generator``: Relative lookup table generator +----------------------------------------- + +This pass converts lookup tables to PIC-friendly relative lookup tables. + ``-reg2mem``: Demote all values to stack slots ---------------------------------------------- Index: clang/test/CodeGen/switch-to-lookup-table.c =================================================================== --- /dev/null +++ clang/test/CodeGen/switch-to-lookup-table.c @@ -0,0 +1,55 @@ +// Check switch to lookup optimization in fPIC and fno-PIC mode +// RUN: %clang %s -target x86_64-linux -O2 -fno-PIC -fno-discard-value-names -S -emit-llvm -o - | FileCheck %s --check-prefix=FNOPIC +// RUN: %clang %s -target x86_64-linux -O2 -fPIC -fno-discard-value-names -mcmodel=small -S -emit-llvm -o - | FileCheck %s --check-prefix=FPIC + +// Switch lookup table +// FNOPIC: @switch.table.string_table = private unnamed_addr constant [3 x i8*] +// FNOPIC-SAME: [ +// FNOPIC-SAME: i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), +// FNOPIC-SAME: i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), +// FNOPIC-SAME: i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0) +// FNOPIC-SAME: ], align 8 + +// Relative switch lookup table +// FPIC: @reltable.string_table = private unnamed_addr constant [3 x i32] +// FPIC-SAME: [ +// FPIC-SAME: i32 trunc (i64 sub (i64 ptrtoint ([5 x i8]* @.str to i64), i64 ptrtoint ([3 x i32]* @reltable.string_table to i64)) to i32), +// FPIC-SAME: i32 trunc (i64 sub (i64 ptrtoint ([4 x i8]* @.str.1 to i64), i64 ptrtoint ([3 x i32]* @reltable.string_table to i64)) to i32), +// FPIC-SAME: i32 trunc (i64 sub (i64 ptrtoint ([4 x i8]* @.str.2 to i64), i64 ptrtoint ([3 x i32]* @reltable.string_table to i64)) to i32) +// FPIC-SAME: ], align 4 +char* string_table(int cond) +{ + // FNOPIC-LABEL: @string_table( + // FNOPIC-NEXT: entry: + // FNOPIC-NEXT: [[TMP0:%.*]] = icmp ult i32 [[COND:%.*]], 3 + // FNOPIC-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] + // FNOPIC: switch.lookup: + // FNOPIC-NEXT: [[TMP1:%.*]] = sext i32 %cond to i64 + // FNOPIC-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* @switch.table.string_table, i64 0, i64 [[TMP1]] + // FNOPIC-NEXT: [[SWITCH_LOAD:%.*]] = load i8*, i8** [[SWITCH_GEP]], align 8 + // FNOPIC-NEXT: ret i8* [[SWITCH_LOAD]] + // FNOPIC: return: + // FNOPIC-NEXT: ret i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.3, i64 0, i64 0) + + // FPIC-LABEL: @string_table( + // FPIC-NEXT: entry: + // FPIC-NEXT: [[TMP0:%.*]] = icmp ult i32 [[COND:%.*]], 3 + // FPIC-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] + // FPIC: switch.lookup: + // FPIC-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i32 %cond, 2 + // FPIC-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call i8* @llvm.load.relative.i32(i8* bitcast ([3 x i32]* @reltable.string_table to i8*), i32 [[RELTABLE_SHIFT]]) + // FPIC-NEXT: ret i8* [[RELTABLE_INTRINSIC]] + // FPIC: return: + // FPIC-NEXT: ret i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.3, i64 0, i64 0) + + switch (cond) { + case 0: + return "zero"; + case 1: + return "one"; + case 2: + return "two"; + default: + return "default"; + } +}
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits