https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/112671
>From ded5771bb4ff7c8fd5401b4efe0af988539a8162 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee <kyu...@meta.com> Date: Fri, 30 Aug 2024 00:09:09 -0700 Subject: [PATCH 1/2] [CGData] Global Merge Functions --- llvm/include/llvm/CGData/CodeGenData.h | 11 + llvm/include/llvm/InitializePasses.h | 1 + llvm/include/llvm/LinkAllPasses.h | 1 + llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + llvm/include/llvm/Transforms/IPO.h | 2 + .../Transforms/IPO/GlobalMergeFunctions.h | 77 ++ llvm/lib/CodeGen/TargetPassConfig.cpp | 3 + llvm/lib/LTO/LTO.cpp | 1 + llvm/lib/Transforms/IPO/CMakeLists.txt | 2 + .../Transforms/IPO/GlobalMergeFunctions.cpp | 687 ++++++++++++++++++ .../ThinLTO/AArch64/cgdata-merge-local.ll | 62 ++ .../test/ThinLTO/AArch64/cgdata-merge-read.ll | 82 +++ .../AArch64/cgdata-merge-two-rounds.ll | 68 ++ .../ThinLTO/AArch64/cgdata-merge-write.ll | 97 +++ llvm/tools/llvm-lto2/CMakeLists.txt | 1 + llvm/tools/llvm-lto2/llvm-lto2.cpp | 6 + 16 files changed, 1102 insertions(+) create mode 100644 llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h create mode 100644 llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h index 5d7c74725ccef1..da0e412f2a0e03 100644 --- a/llvm/include/llvm/CGData/CodeGenData.h +++ b/llvm/include/llvm/CGData/CodeGenData.h @@ -145,6 +145,9 @@ class CodeGenData { const OutlinedHashTree *getOutlinedHashTree() { return PublishedHashTree.get(); } + const StableFunctionMap *getStableFunctionMap() { + return PublishedStableFunctionMap.get(); + } /// Returns true if we should write codegen data. bool emitCGData() { return EmitCGData; } @@ -169,10 +172,18 @@ inline bool hasOutlinedHashTree() { return CodeGenData::getInstance().hasOutlinedHashTree(); } +inline bool hasStableFunctionMap() { + return CodeGenData::getInstance().hasStableFunctionMap(); +} + inline const OutlinedHashTree *getOutlinedHashTree() { return CodeGenData::getInstance().getOutlinedHashTree(); } +inline const StableFunctionMap *getStableFunctionMap() { + return CodeGenData::getInstance().getStableFunctionMap(); +} + inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); } inline void diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 4352099d6dbb99..9aa36d5bb7f801 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -123,6 +123,7 @@ void initializeGCEmptyBasicBlocksPass(PassRegistry &); void initializeGCMachineCodeAnalysisPass(PassRegistry &); void initializeGCModuleInfoPass(PassRegistry &); void initializeGVNLegacyPassPass(PassRegistry &); +void initializeGlobalMergeFuncPass(PassRegistry &); void initializeGlobalMergePass(PassRegistry &); void initializeGlobalsAAWrapperPassPass(PassRegistry &); void initializeHardwareLoopsLegacyPass(PassRegistry &); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 92b59a66567c95..ea3609a2b4bc71 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -79,6 +79,7 @@ struct ForcePassLinking { (void)llvm::createDomOnlyViewerWrapperPassPass(); (void)llvm::createDomViewerWrapperPassPass(); (void)llvm::createAlwaysInlinerLegacyPass(); + (void)llvm::createGlobalMergeFuncPass(); (void)llvm::createGlobalsAAWrapperPass(); (void)llvm::createInstSimplifyLegacyPass(); (void)llvm::createInstructionCombiningPass(); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 13bc4700d87029..96b5b815132bc0 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -74,6 +74,7 @@ #include "llvm/Target/CGPassBuilderOption.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/CFGuard.h" +#include "llvm/Transforms/IPO/GlobalMergeFunctions.h" #include "llvm/Transforms/Scalar/ConstantHoisting.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Scalar/LoopStrengthReduce.h" diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index ee0e35aa618325..86a8654f56997c 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -55,6 +55,8 @@ enum class PassSummaryAction { Export, ///< Export information to summary. }; +Pass *createGlobalMergeFuncPass(); + } // End llvm namespace #endif diff --git a/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h b/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h new file mode 100644 index 00000000000000..565be54f89e882 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h @@ -0,0 +1,77 @@ +//===------ GlobalMergeFunctions.h - Global merge functions -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This file defines global merge functions pass and related data structure. +/// +//===----------------------------------------------------------------------===// + +#ifndef PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H +#define PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StableHashing.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CGData/StableFunctionMap.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include <map> +#include <mutex> + +enum class HashFunctionMode { + Local, + BuildingHashFuncion, + UsingHashFunction, +}; + +namespace llvm { + +// A vector of locations (the pair of (instruction, operand) indices) reachable +// from a parameter. +using ParamLocs = SmallVector<IndexPair, 4>; +// A vector of parameters +using ParamLocsVecTy = SmallVector<ParamLocs, 8>; +// A map of stable hash to a vector of stable functions + +/// GlobalMergeFunc finds functions which only differ by constants in +/// certain instructions, e.g. resulting from specialized functions of layout +/// compatible types. +/// Unlike PikaMergeFunc that directly compares IRs, this uses stable function +/// hash to find the merge candidate. Similar to the global outliner, we can run +/// codegen twice to collect function merge candidate in the first round, and +/// merge functions globally in the second round. +class GlobalMergeFunc : public ModulePass { + HashFunctionMode MergerMode = HashFunctionMode::Local; + + std::unique_ptr<StableFunctionMap> LocalFunctionMap; + +public: + static char ID; + + GlobalMergeFunc(); + + StringRef getPassName() const override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + void initializeMergerMode(const Module &M); + + bool runOnModule(Module &M) override; + + /// Analyze module to create stable function into LocalFunctionMap. + void analyze(Module &M); + + /// Emit LocalFunctionMap into __llvm_merge section. + void emitFunctionMap(Module &M); + + /// Merge functions in the module using the global function map. + bool merge(Module &M, const StableFunctionMap *FunctionMap); +}; + +} // end namespace llvm +#endif // PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 11a7752ef7a381..76b582fbf32c6c 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -141,6 +141,9 @@ static cl::opt<RunOutliner> EnableMachineOutliner( "Disable all outlining"), // Sentinel value for unspecified option. clEnumValN(RunOutliner::AlwaysOutline, "", ""))); +cl::opt<bool> EnableGlobalMergeFunc( + "enable-global-merge-func", cl::Hidden, + cl::desc("Enable global merge functions that are based on hash function")); // Disable the pass to fix unwind information. Whether the pass is included in // the pipeline is controlled via the target options, this option serves as // manual override. diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 90c4e2c3cd131c..08ceae96799878 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -53,6 +53,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/GlobalMergeFunctions.h" #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" #include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index 15cb57399d2460..6a36ed64149f93 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -21,6 +21,7 @@ add_llvm_component_library(LLVMipo GlobalDCE.cpp GlobalOpt.cpp GlobalSplit.cpp + GlobalMergeFunctions.cpp HotColdSplitting.cpp IPO.cpp IROutliner.cpp @@ -60,6 +61,7 @@ add_llvm_component_library(LLVMipo Analysis BitReader BitWriter + CGData Core FrontendOpenMP InstCombine diff --git a/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp b/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp new file mode 100644 index 00000000000000..6d15183e8c8829 --- /dev/null +++ b/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp @@ -0,0 +1,687 @@ +//===---- GlobalMergeFunctions.cpp - Global merge functions -------*- C++ -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// TODO: This implements a function merge using function hash while tracking +// differences in Constants. This uses stable function hash to find potential +// merge candidates. The first codegen round collects stable function hashes, +// and determines the merge candidates that match the stable function hashes. +// The set of parameters pointing to different Constants are also computed +// during the stable function merge. The second codegen round uses this global +// function info to optimistically create a merged function in each module +// context to guarantee correct transformation. Similar to the global outliner, +// the linker's deduplication (ICF) folds the identical merged functions to save +// the final binary size. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/GlobalMergeFunctions.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/CGData/CodeGenData.h" +#include "llvm/CGData/StableFunctionMap.h" +#include "llvm/CodeGen/MachineStableHash.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/StructuralHash.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +#define DEBUG_TYPE "global-merge-func" + +using namespace llvm; +using namespace llvm::support; + +static cl::opt<bool> + DisableGlobalMerging("disable-global-merging", cl::Hidden, + cl::desc("Disable global merging only by ignoring " + "the codegen data generation or use"), + cl::init(false)); + +extern cl::opt<bool> EnableGlobalMergeFunc; + +STATISTIC(NumMismatchedFunctionHashGlobalMergeFunction, + "Number of mismatched function hash for global merge function"); +STATISTIC(NumMismatchedInstCountGlobalMergeFunction, + "Number of mismatched instruction count for global merge function"); +STATISTIC(NumMismatchedConstHashGlobalMergeFunction, + "Number of mismatched const hash for global merge function"); +STATISTIC(NumMismatchedModuleIdGlobalMergeFunction, + "Number of mismatched Module Id for global merge function"); +STATISTIC(NumGlobalMergeFunctions, + "Number of functions that are actually merged using function hash"); +STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed"); +STATISTIC(NumAnalyzedFunctions, "Number of functions that are analyzed"); +STATISTIC(NumEligibleFunctions, "Number of functions that are eligible"); + +/// Returns true if the \opIdx operand of \p CI is the callee operand. +static bool isCalleeOperand(const CallBase *CI, unsigned OpIdx) { + return &CI->getCalledOperandUse() == &CI->getOperandUse(OpIdx); +} + +static bool canParameterizeCallOperand(const CallBase *CI, unsigned OpIdx) { + if (CI->isInlineAsm()) + return false; + Function *Callee = CI->getCalledOperand() + ? dyn_cast_or_null<Function>( + CI->getCalledOperand()->stripPointerCasts()) + : nullptr; + if (Callee) { + if (Callee->isIntrinsic()) + return false; + // objc_msgSend stubs must be called, and can't have their address taken. + if (Callee->getName().starts_with("objc_msgSend$")) + return false; + } + if (isCalleeOperand(CI, OpIdx) && + CI->getOperandBundle(LLVMContext::OB_ptrauth).has_value()) { + // The operand is the callee and it has already been signed. Ignore this + // because we cannot add another ptrauth bundle to the call instruction. + return false; + } + return true; +} + +bool isEligibleInstrunctionForConstantSharing(const Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Load: + case Instruction::Store: + case Instruction::Call: + case Instruction::Invoke: + return true; + default: + return false; + } +} + +bool isEligibleOperandForConstantSharing(const Instruction *I, unsigned OpIdx) { + assert(OpIdx < I->getNumOperands() && "Invalid operand index"); + + if (!isEligibleInstrunctionForConstantSharing(I)) + return false; + + auto Opnd = I->getOperand(OpIdx); + if (!isa<Constant>(Opnd)) + return false; + + if (const auto *CI = dyn_cast<CallBase>(I)) + return canParameterizeCallOperand(CI, OpIdx); + + return true; +} + +/// Returns true if function \p F is eligible for merging. +bool isEligibleFunction(Function *F) { + if (F->isDeclaration()) + return false; + + if (F->hasFnAttribute(llvm::Attribute::NoMerge)) + return false; + + if (F->hasAvailableExternallyLinkage()) { + return false; + } + + if (F->getFunctionType()->isVarArg()) { + return false; + } + + if (F->getCallingConv() == CallingConv::SwiftTail) + return false; + + // if function contains callsites with musttail, if we merge + // it, the merged function will have the musttail callsite, but + // the number of parameters can change, thus the parameter count + // of the callsite will mismatch with the function itself. + // if (IgnoreMusttailFunction) { + for (const BasicBlock &BB : *F) { + for (const Instruction &I : BB) { + const auto *CB = dyn_cast<CallBase>(&I); + if (CB && CB->isMustTailCall()) + return false; + } + } + + // TODO: Add a size constraint for tuning. + + return true; +} + +static bool +isEligibleInstrunctionForConstantSharingLocal(const Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Load: + case Instruction::Store: + case Instruction::Call: + case Instruction::Invoke: + return true; + default: + return false; + } +} + +static bool ignoreOp(const Instruction *I, unsigned OpIdx) { + assert(OpIdx < I->getNumOperands() && "Invalid operand index"); + + if (!isEligibleInstrunctionForConstantSharingLocal(I)) + return false; + + if (!isa<Constant>(I->getOperand(OpIdx))) + return false; + + if (const auto *CI = dyn_cast<CallBase>(I)) + return canParameterizeCallOperand(CI, OpIdx); + + return true; +} + +// copy from merge functions.cpp +static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) { + Type *SrcTy = V->getType(); + if (SrcTy->isStructTy()) { + assert(DestTy->isStructTy()); + assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements()); + Value *Result = PoisonValue::get(DestTy); + for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) { + Value *Element = + createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)), + DestTy->getStructElementType(I)); + + Result = Builder.CreateInsertValue(Result, Element, ArrayRef(I)); + } + return Result; + } + assert(!DestTy->isStructTy()); + if (auto *SrcAT = dyn_cast<ArrayType>(SrcTy)) { + auto *DestAT = dyn_cast<ArrayType>(DestTy); + assert(DestAT); + assert(SrcAT->getNumElements() == DestAT->getNumElements()); + Value *Result = UndefValue::get(DestTy); + for (unsigned int I = 0, E = SrcAT->getNumElements(); I < E; ++I) { + Value *Element = + createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)), + DestAT->getElementType()); + + Result = Builder.CreateInsertValue(Result, Element, ArrayRef(I)); + } + return Result; + } + assert(!DestTy->isArrayTy()); + if (SrcTy->isIntegerTy() && DestTy->isPointerTy()) + return Builder.CreateIntToPtr(V, DestTy); + else if (SrcTy->isPointerTy() && DestTy->isIntegerTy()) + return Builder.CreatePtrToInt(V, DestTy); + else + return Builder.CreateBitCast(V, DestTy); +} + +void GlobalMergeFunc::analyze(Module &M) { + ++NumAnalyzedModues; + for (Function &Func : M) { + ++NumAnalyzedFunctions; + if (isEligibleFunction(&Func)) { + ++NumEligibleFunctions; + + auto FI = llvm::StructuralHashWithDifferences(Func, ignoreOp); + + // Convert the map to a vector for a serialization-friendly format. + IndexOperandHashVecType IndexOperandHashes; + for (auto &Pair : *FI.IndexOperandHashMap) + IndexOperandHashes.emplace_back(Pair); + + StableFunction SF(FI.FunctionHash, get_stable_name(Func.getName()).str(), + M.getModuleIdentifier(), FI.IndexInstruction->size(), + std::move(IndexOperandHashes)); + + LocalFunctionMap->insert(SF); + } + } +} + +/// Tuple to hold function info to process merging. +struct FuncMergeInfo { + StableFunctionEntry *SF; + Function *F; + std::unique_ptr<IndexInstrMap> IndexInstruction; +}; + +// Given the func info, and the parameterized locations, create and return +// a new merged function by replacing the original constants with the new +// parameters. +static Function *createMergedFunction(FuncMergeInfo &FI, + ArrayRef<Type *> ConstParamTypes, + const ParamLocsVecTy &ParamLocsVec) { + // Synthesize a new merged function name by appending ".Tgm" to the root + // function's name. + auto *MergedFunc = FI.F; + auto NewFunctionName = MergedFunc->getName().str() + ".Tgm"; + auto *M = MergedFunc->getParent(); + assert(!M->getFunction(NewFunctionName)); + + FunctionType *OrigTy = MergedFunc->getFunctionType(); + // Get the original params' types. + SmallVector<Type *> ParamTypes(OrigTy->param_begin(), OrigTy->param_end()); + // Append const parameter types that are passed in. + ParamTypes.append(ConstParamTypes.begin(), ConstParamTypes.end()); + FunctionType *FuncType = + FunctionType::get(OrigTy->getReturnType(), ParamTypes, false); + + // Declare a new function + Function *NewFunction = + Function::Create(FuncType, MergedFunc->getLinkage(), NewFunctionName); + if (auto *SP = MergedFunc->getSubprogram()) + NewFunction->setSubprogram(SP); + NewFunction->copyAttributesFrom(MergedFunc); + NewFunction->setDLLStorageClass(GlobalValue::DefaultStorageClass); + + NewFunction->setLinkage(GlobalValue::InternalLinkage); + NewFunction->addFnAttr(Attribute::NoInline); + + // Add the new function before the root function. + M->getFunctionList().insert(MergedFunc->getIterator(), NewFunction); + + // Move the body of MergedFunc into the NewFunction. + NewFunction->splice(NewFunction->begin(), MergedFunc); + + // Update the original args by the new args. + auto NewArgIter = NewFunction->arg_begin(); + for (Argument &OrigArg : MergedFunc->args()) { + Argument &NewArg = *NewArgIter++; + OrigArg.replaceAllUsesWith(&NewArg); + } + + // Replace the original Constants by the new args. + unsigned NumOrigArgs = MergedFunc->arg_size(); + for (unsigned ParamIdx = 0; ParamIdx < ParamLocsVec.size(); ++ParamIdx) { + Argument *NewArg = NewFunction->getArg(NumOrigArgs + ParamIdx); + for (auto [InstIndex, OpndIndex] : ParamLocsVec[ParamIdx]) { + auto *Inst = FI.IndexInstruction->lookup(InstIndex); + auto *OrigC = Inst->getOperand(OpndIndex); + if (OrigC->getType() != NewArg->getType()) { + IRBuilder<> Builder(Inst->getParent(), Inst->getIterator()); + Inst->setOperand(OpndIndex, + createCast(Builder, NewArg, OrigC->getType())); + } else + Inst->setOperand(OpndIndex, NewArg); + } + } + + return NewFunction; +} + +// Given the original function (Thunk) and the merged function (ToFunc), create +// a thunk to the merged function. + +static void createThunk(FuncMergeInfo &FI, ArrayRef<Constant *> Params, + Function *ToFunc) { + auto *Thunk = FI.F; + + assert(Thunk->arg_size() + Params.size() == + ToFunc->getFunctionType()->getNumParams()); + Thunk->dropAllReferences(); + + BasicBlock *BB = BasicBlock::Create(Thunk->getContext(), "", Thunk); + IRBuilder<> Builder(BB); + + SmallVector<Value *> Args; + unsigned ParamIdx = 0; + FunctionType *ToFuncTy = ToFunc->getFunctionType(); + + // Add arguments which are passed through Thunk. + for (Argument &AI : Thunk->args()) { + Args.push_back(createCast(Builder, &AI, ToFuncTy->getParamType(ParamIdx))); + ++ParamIdx; + } + + // Add new arguments defined by Params. + for (auto *Param : Params) { + assert(ParamIdx < ToFuncTy->getNumParams()); + // FIXME: do not support signing + Args.push_back( + createCast(Builder, Param, ToFuncTy->getParamType(ParamIdx))); + ++ParamIdx; + } + + CallInst *CI = Builder.CreateCall(ToFunc, Args); + bool isSwiftTailCall = ToFunc->getCallingConv() == CallingConv::SwiftTail && + Thunk->getCallingConv() == CallingConv::SwiftTail; + CI->setTailCallKind(isSwiftTailCall ? llvm::CallInst::TCK_MustTail + : llvm::CallInst::TCK_Tail); + CI->setCallingConv(ToFunc->getCallingConv()); + CI->setAttributes(ToFunc->getAttributes()); + if (Thunk->getReturnType()->isVoidTy()) { + Builder.CreateRetVoid(); + } else { + Builder.CreateRet(createCast(Builder, CI, Thunk->getReturnType())); + } +} + +// Check if the old merged/optimized IndexOperandHashMap is compatible with +// the current IndexOperandHashMap. OpndHash may not be stable across +// different builds due to varying modules combined. To address this, we relax +// the hash check condition by comparing Const hash patterns instead of absolute +// hash values. For example, let's assume we have three Consts located at idx1, +// idx3, and idx6, where their corresponding hashes are hash1, hash2, and hash1 +// in the old merged map below: +// Old (Merged): [(idx1, hash1), (idx3, hash2), (idx6, hash1)] +// Current: [(idx1, hash1'), (idx3, hash2'), (idx6, hash1')] +// If the current function also has three Consts in the same locations, +// with hash sequences hash1', hash2', and hash1' where the first and third +// are the same as the old hash sequences, we consider them matched. +static bool checkConstHashCompatible( + const DenseMap<IndexPair, OpndHash> &OldInstOpndIndexToConstHash, + const DenseMap<IndexPair, OpndHash> &CurrInstOpndIndexToConstHash) { + + DenseMap<OpndHash, OpndHash> OldHashToCurrHash; + for (const auto &[Index, OldHash] : OldInstOpndIndexToConstHash) { + auto It = CurrInstOpndIndexToConstHash.find(Index); + if (It == CurrInstOpndIndexToConstHash.end()) + return false; + + auto CurrHash = It->second; + auto J = OldHashToCurrHash.find(OldHash); + if (J == OldHashToCurrHash.end()) + OldHashToCurrHash.insert({OldHash, CurrHash}); + else if (J->second != CurrHash) + return false; + } + + return true; +} + +// Validate the locations pointed by a param has the same hash and Constant. +static bool checkConstLocationCompatible(const StableFunctionEntry &SF, + const IndexInstrMap &IndexInstruction, + const ParamLocsVecTy &ParamLocsVec) { + for (auto &ParamLocs : ParamLocsVec) { + std::optional<OpndHash> OldHash; + std::optional<Constant *> OldConst; + for (auto &Loc : ParamLocs) { + assert(SF.IndexOperandHashMap->count(Loc)); + auto CurrHash = SF.IndexOperandHashMap.get()->at(Loc); + auto [InstIndex, OpndIndex] = Loc; + assert(InstIndex < IndexInstruction.size()); + const auto *Inst = IndexInstruction.lookup(InstIndex); + auto *CurrConst = cast<Constant>(Inst->getOperand(OpndIndex)); + if (!OldHash) { + OldHash = CurrHash; + OldConst = CurrConst; + } else if (CurrConst != *OldConst || CurrHash != *OldHash) + return false; + } + } + return true; +} + +static ParamLocsVecTy +computeParamInfo(const SmallVector<std::unique_ptr<StableFunctionEntry>> &SFS) { + std::map<std::vector<OpndHash>, ParamLocs> HashSeqToLocs; + auto &RSF = *SFS[0]; + unsigned StableFunctionCount = SFS.size(); + + for (auto &[IndexPair, Hash] : *RSF.IndexOperandHashMap) { + // Const hash sequence across stable functions. + // We will allocate a parameter per unique hash squence. + // can't use SmallVector as key + std::vector<OpndHash> ConstHashSeq; + ConstHashSeq.push_back(Hash); + bool Identical = true; + for (unsigned J = 1; J < StableFunctionCount; ++J) { + auto &SF = SFS[J]; + assert(SF->IndexOperandHashMap->count(IndexPair)); + auto SHash = (*SF->IndexOperandHashMap)[IndexPair]; + if (Hash != SHash) + Identical = false; + ConstHashSeq.push_back(SHash); + } + + if (Identical) + continue; + + // For each unique Const hash sequence (parameter), add the locations. + HashSeqToLocs[ConstHashSeq].push_back(IndexPair); + } + + ParamLocsVecTy ParamLocsVec; + for (auto &[HashSeq, Locs] : HashSeqToLocs) { + ParamLocsVec.push_back(std::move(Locs)); + std::sort( + ParamLocsVec.begin(), ParamLocsVec.end(), + [&](const ParamLocs &L, const ParamLocs &R) { return L[0] < R[0]; }); + } + return ParamLocsVec; +} + +bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) { + bool Changed = false; + + // Build a map from stable function name to function. + StringMap<Function *> StableNameToFuncMap; + for (auto &F : M) + StableNameToFuncMap[get_stable_name(F.getName())] = &F; + // Track merged functions + DenseSet<Function *> MergedFunctions; + + auto ModId = M.getModuleIdentifier(); + for (auto &[Hash, SFS] : FunctionMap->getFunctionMap()) { + // No interest if the number of candidates are less than 2. + if (SFS.size() < 2) + continue; + // Compute the parameter locations based on the unique hash sequences + // across the candidates. + auto ParamLocsVec = computeParamInfo(SFS); + LLVM_DEBUG({ + dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash << " with Params " + << ParamLocsVec.size() << "\n"; + }); + + Function *MergedFunc = nullptr; + std::string MergedModId; + SmallVector<FuncMergeInfo> FuncMergeInfos; + for (auto &SF : SFS) { + // Get the function from the stable name. + auto I = StableNameToFuncMap.find( + *FunctionMap->getNameForId(SF->FunctionNameId)); + if (I == StableNameToFuncMap.end()) + continue; + Function *F = I->second; + assert(F); + // Skip if the function has been merged before. + if (MergedFunctions.count(F)) + continue; + // Consider the function if it is eligible for merging. + if (!isEligibleFunction(F)) + continue; + + auto FI = llvm::StructuralHashWithDifferences(*F, ignoreOp); + uint64_t FuncHash = FI.FunctionHash; + if (Hash != FuncHash) { + ++NumMismatchedFunctionHashGlobalMergeFunction; + continue; + } + + if (SF->InstCount != FI.IndexInstruction->size()) { + ++NumMismatchedInstCountGlobalMergeFunction; + continue; + } + bool HasValidSharedConst = true; + for (auto &[Index, Hash] : *SF->IndexOperandHashMap) { + auto [InstIndex, OpndIndex] = Index; + assert(InstIndex < FI.IndexInstruction->size()); + auto *Inst = FI.IndexInstruction->lookup(InstIndex); + if (!isEligibleOperandForConstantSharing(Inst, OpndIndex)) { + HasValidSharedConst = false; + break; + } + } + if (!HasValidSharedConst) { + ++NumMismatchedConstHashGlobalMergeFunction; + continue; + } + if (!checkConstHashCompatible(*SF->IndexOperandHashMap, + *FI.IndexOperandHashMap)) { + ++NumMismatchedConstHashGlobalMergeFunction; + continue; + } + if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction, + ParamLocsVec)) { + ++NumMismatchedConstHashGlobalMergeFunction; + continue; + } + + if (MergedFunc) { + // Check if the matched functions fall into the same (first) module. + // This module check is not strictly necessary as the functions can move + // around. We just want to avoid merging functions from different + // modules than the first one in the functon map, as they may not end up + // with not being ICFed. + if (MergedModId != *FunctionMap->getNameForId(SF->ModuleNameId)) { + ++NumMismatchedModuleIdGlobalMergeFunction; + continue; + } + } else { + MergedFunc = F; + MergedModId = *FunctionMap->getNameForId(SF->ModuleNameId); + } + + FuncMergeInfos.push_back({SF.get(), F, std::move(FI.IndexInstruction)}); + MergedFunctions.insert(F); + } + unsigned FuncMergeInfoSize = FuncMergeInfos.size(); + if (FuncMergeInfoSize == 0) + continue; + + LLVM_DEBUG({ + dbgs() << "[GlobalMergeFunc] Merging function count " << FuncMergeInfoSize + << " in " << ModId << "\n"; + }); + for (auto &FMI : FuncMergeInfos) { + Changed = true; + + // We've already validated all locations of constant operands pointed by + // the parameters. Just use the first one to bookkeep the original + // constants for each parameter + SmallVector<Constant *> Params; + SmallVector<Type *> ParamTypes; + for (auto &ParamLocs : ParamLocsVec) { + assert(!ParamLocs.empty()); + auto &[InstIndex, OpndIndex] = ParamLocs[0]; + auto *Inst = FMI.IndexInstruction->lookup(InstIndex); + auto *Opnd = cast<Constant>(Inst->getOperand(OpndIndex)); + Params.push_back(Opnd); + ParamTypes.push_back(Opnd->getType()); + } + + // Create a merged function derived from the first function in the current + // module context. + Function *MergedFunc = + createMergedFunction(FMI, ParamTypes, ParamLocsVec); + + LLVM_DEBUG({ + dbgs() << "[GlobalMergeFunc] Merged function (hash:" << FMI.SF->Hash + << ") " << MergedFunc->getName() << " generated from " + << FMI.F->getName() << ":\n"; + MergedFunc->dump(); + }); + + // Create a thunk to the merged function. + createThunk(FMI, Params, MergedFunc); + LLVM_DEBUG({ + dbgs() << "[GlobalMergeFunc] Thunk generated: \n"; + FMI.F->dump(); + }); + ++NumGlobalMergeFunctions; + } + } + + return Changed; +} + +char GlobalMergeFunc::ID = 0; +INITIALIZE_PASS_BEGIN(GlobalMergeFunc, "global-merge-func", + "Global merge function pass", false, false) +INITIALIZE_PASS_END(GlobalMergeFunc, "global-merge-func", + "Global merge function pass", false, false) + +StringRef GlobalMergeFunc::getPassName() const { + return "Global Merge Functions"; +} + +void GlobalMergeFunc::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addUsedIfAvailable<ImmutableModuleSummaryIndexWrapperPass>(); + AU.setPreservesAll(); + ModulePass::getAnalysisUsage(AU); +} + +GlobalMergeFunc::GlobalMergeFunc() : ModulePass(ID) { + initializeGlobalMergeFuncPass(*llvm::PassRegistry::getPassRegistry()); +} + +namespace llvm { +Pass *createGlobalMergeFuncPass() { return new GlobalMergeFunc(); } +} // namespace llvm + +void GlobalMergeFunc::initializeMergerMode(const Module &M) { + LocalFunctionMap = std::make_unique<StableFunctionMap>(); + + if (DisableGlobalMerging) + return; + + if (auto *IndexWrapperPass = + getAnalysisIfAvailable<ImmutableModuleSummaryIndexWrapperPass>()) { + auto *TheIndex = IndexWrapperPass->getIndex(); + // (Full)LTO module does not have functions added to the index. + // In this case, we run a local merger without using codegen data. + if (TheIndex && !TheIndex->hasExportedFunctions(M)) + return; + } + + if (cgdata::emitCGData()) + MergerMode = HashFunctionMode::BuildingHashFuncion; + else if (cgdata::hasStableFunctionMap()) + MergerMode = HashFunctionMode::UsingHashFunction; +} + +void GlobalMergeFunc::emitFunctionMap(Module &M) { + LLVM_DEBUG({ + dbgs() << "Emit function map. Size: " << LocalFunctionMap->size() << "\n"; + }); + SmallVector<char> Buf; + raw_svector_ostream OS(Buf); + + StableFunctionMapRecord::serialize(OS, LocalFunctionMap.get()); + + std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer( + OS.str(), "in-memory stable function map", false); + + Triple TT(M.getTargetTriple()); + embedBufferInModule(M, *Buffer.get(), + getCodeGenDataSectionName(CG_merge, TT.getObjectFormat()), + Align(4)); +} + +bool GlobalMergeFunc::runOnModule(Module &M) { + initializeMergerMode(M); + + const StableFunctionMap *FuncMap; + if (MergerMode == HashFunctionMode::UsingHashFunction) { + // Use the prior CG data to optimistically create global merge candidates. + FuncMap = cgdata::getStableFunctionMap(); + } else { + analyze(M); + // Emit the local function map to the custom section, __llvm_merge before + // finalizing it. + if (MergerMode == HashFunctionMode::BuildingHashFuncion && + !LocalFunctionMap->empty()) + emitFunctionMap(M); + LocalFunctionMap->finalize(); + FuncMap = LocalFunctionMap.get(); + } + + return merge(M, FuncMap); +} diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll new file mode 100644 index 00000000000000..4f5f5c0b5b26d9 --- /dev/null +++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll @@ -0,0 +1,62 @@ +; This test checks if two similar functions, f1 and f2, can be merged locally within a single module +; while parameterizing a difference in their global variables, g1 and g2. +; To achieve this, we create two instances of the global merging function, f1.Tgm and f2.Tgm, +; which are tail-called from thunks g1 and g2 respectively. +; These identical functions, f1.Tgm and f2.Tgm, will be folded by the linker via Identical Code Folding (IFC). + +; RUN: opt -module-summary -module-hash %s -o %t + +; RUN: llvm-lto2 run -enable-global-merge-func=false %t -o %tout-nomerge \ +; RUN: -r %t,_f1,px \ +; RUN: -r %t,_f2,px \ +; RUN: -r %t,_g,l -r %t,_g1,l -r %t,_g2,l +; RUN: llvm-nm %tout-nomerge.1 | FileCheck %s --check-prefix=NOMERGE +; RUN: llvm-lto2 run -enable-global-merge-func=true %t -o %tout-merge \ +; RUN: -r %t,_f1,px \ +; RUN: -r %t,_f2,px \ +; RUN: -r %t,_g,l -r %t,_g1,l -r %t,_g2,l +; RUN: llvm-nm %tout-merge.1 | FileCheck %s --check-prefix=GLOBALMERGE +; RUN: llvm-objdump -d %tout-merge.1 | FileCheck %s --check-prefix=THUNK + +; NOMERGE-NOT: _f1.Tgm +; GLOBALMERGE: _f1.Tgm +; GLOBALMERGE: _f2.Tgm + +; THUNK: <_f1>: +; THUNK-NEXT: adrp x1, +; THUNK-NEXT: ldr x1, [x1] +; THUNK-NEXT: b + +; THUNK: <_f2>: +; THUNK-NEXT: adrp x1, +; THUNK-NEXT: ldr x1, [x1] +; THUNK-NEXT: b + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f1(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g1, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} + +define i32 @f2(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g2, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll new file mode 100644 index 00000000000000..da756e7f15e68e --- /dev/null +++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll @@ -0,0 +1,82 @@ +; This test demonstrates how similar functions are handled during global outlining. +; Currently, we do not attempt to share an merged function for identical sequences. +; Instead, each merging instance is created uniquely. + +; RUN: rm -rf %t; split-file %s %t + +; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc +; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc + +; First, run with -codegen-data-generate=true to generate the cgdata in the object files. +; Using llvm-cgdata, merge the cg data. +; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=true %t-foo.bc %t-goo.bc -o %tout-write \ +; RUN: -r %t-foo.bc,_f1,px \ +; RUN: -r %t-goo.bc,_f2,px \ +; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \ +; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l +; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-write.1 %tout-write.2 + +; Now run with -codegen-data-use-path=%tout.cgdata to optimize the binary. +; Each module has its own merging instance as it is matched against the merged cgdata. +; RUN: llvm-lto2 run -enable-global-merge-func=true \ +; RUN: -codegen-data-use-path=%tout.cgdata \ +; RUN: %t-foo.bc %t-goo.bc -o %tout-read \ +; RUN: -r %t-foo.bc,_f1,px \ +; RUN: -r %t-goo.bc,_f2,px \ +; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \ +; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l +; RUN: llvm-nm %tout-read.1 | FileCheck %s --check-prefix=READ1 +; RUN: llvm-nm %tout-read.2 | FileCheck %s --check-prefix=READ2 +; RUN: llvm-objdump -d %tout-read.1 | FileCheck %s --check-prefix=THUNK1 +; RUN: llvm-objdump -d %tout-read.2 | FileCheck %s --check-prefix=THUNK2 + +; READ1: _f1.Tgm +; READ2: _f2.Tgm + +; THUNK1: <_f1>: +; THUNK1-NEXT: adrp x1, +; THUNK1-NEXT: ldr x1, [x1] +; THUNK1-NEXT: b + +; THUNK2: <_f2>: +; THUNK2-NEXT: adrp x1, +; THUNK2-NEXT: ldr x1, [x1] +; THUNK2-NEXT: b + +;--- foo.ll +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f1(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g1, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} + +;--- goo.ll +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f2(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g2, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll new file mode 100644 index 00000000000000..06880e3d268189 --- /dev/null +++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll @@ -0,0 +1,68 @@ +; TODO: This test checks if the how similar functions are handled during global outlining +; by repeating the codegen via -codegen-data-thinlto-two-rounds=true. + +; RUN: rm -rf %t; split-file %s %t + +; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc +; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc + +; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-thinlto-two-rounds=true %t-foo.bc %t-goo.bc -o %tout \ +; RUN: -r %t-foo.bc,_f1,px \ +; RUN: -r %t-goo.bc,_f2,px \ +; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \ +; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l +; RUN: llvm-nm %tout.1 | FileCheck %s --check-prefix=OUT1 +; RUN: llvm-nm %tout.2 | FileCheck %s --check-prefix=OUT2 +; RUN: llvm-objdump -d %tout.1 | FileCheck %s --check-prefix=THUNK1 +; RUN: llvm-objdump -d %tout.2 | FileCheck %s --check-prefix=THUNK2 + +; OUT1: _f1.Tgm +; OUT2: _f2.Tgm + +; THUNK1: <_f1>: +; THUNK1-NEXT: adrp x1, +; THUNK1-NEXT: ldr x1, [x1] +; THUNK1-NEXT: b + +; THUNK2: <_f2>: +; THUNK2-NEXT: adrp x1, +; THUNK2-NEXT: ldr x1, [x1] +; THUNK2-NEXT: b + +;--- foo.ll +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f1(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g1, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} + +;--- goo.ll +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f2(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g2, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll new file mode 100644 index 00000000000000..a4022eb885b43f --- /dev/null +++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll @@ -0,0 +1,97 @@ +; This test verifies whether a stable function is encoded into the __llvm_merge section +; when the -codegen-data-generate flag is used under -enable-global-merge-func=true. + +; RUN: rm -rf %t; split-file %s %t + +; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc +; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc + +; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=false %t-foo.bc %t-goo.bc -o %tout-nowrite \ +; RUN: -r %t-foo.bc,_f1,px \ +; RUN: -r %t-goo.bc,_f2,px \ +; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \ +; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l +; RUN: llvm-nm %tout-nowrite.1 | FileCheck %s --check-prefix=NOWRITE +; RUN: llvm-nm %tout-nowrite.2 | FileCheck %s --check-prefix=NOWRITE + +; No merge instance is locally created as each module has a singltone function. +; NOWRITE-NOT: _f1.Tgm +; NOWRITE-NOT: _f2.Tgm + +; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=true %t-foo.bc %t-goo.bc -o %tout-nowrite \ +; RUN: -r %t-foo.bc,_f1,px \ +; RUN: -r %t-goo.bc,_f2,px \ +; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \ +; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l +; RUN: llvm-nm %tout-nowrite.1 | FileCheck %s --check-prefix=WRITE +; RUN: llvm-nm %tout-nowrite.2 | FileCheck %s --check-prefix=WRITE +; RUN: llvm-objdump -h %tout-nowrite.1 | FileCheck %s --check-prefix=SECTNAME +; RUN: llvm-objdump -h %tout-nowrite.2 | FileCheck %s --check-prefix=SECTNAME + +; On a write mode, no merging happens yet for each module. +; We only create stable functions and publish them into __llvm_merge section for each object. +; WRITE-NOT: _f1.Tgm +; WRITE-NOT: _f2.Tgm +; SECTNAME: __llvm_merge + +; Merge the cgdata using llvm-cgdata. +; We now validate the content of the merged cgdata. +; Two functions have the same hash with only one different constnat at a same location. +; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-nowrite.1 %tout-nowrite.2 +; RUN: llvm-cgdata --convert %tout.cgdata -o - | FileCheck %s + +; CHECK: - Hash: [[#%d,HASH:]] +; CHECK-NEXT: FunctionName: f1 +; CHECK-NEXT: ModuleName: {{.*}} +; CHECK-NEXT: InstCount: [[#%d,INSTCOUNT:]] +; CHECK-NEXT: IndexOperandHashes: +; CHECK-NEXT: - InstIndex: [[#%d,INSTINDEX:]] +; CHECK-NEXT: OpndIndex: [[#%d,OPNDINDEX:]] +; CHECK-NEXT: OpndHash: {{.*}} + +; CHECK: - Hash: [[#%d,HASH]] +; CHECK-NEXT: FunctionName: f2 +; CHECK-NEXT: ModuleName: {{.*}} +; CHECK-NEXT: InstCount: [[#%d,INSTCOUNT]] +; CHECK-NEXT: IndexOperandHashes: +; CHECK-NEXT: - InstIndex: [[#%d,INSTINDEX]] +; CHECK-NEXT: OpndIndex: [[#%d,OPNDINDEX]] +; CHECK-NEXT: OpndHash: {{.*}} + +;--- foo.ll +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f1(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g1, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} + +;--- goo.ll +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f2(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g2, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} diff --git a/llvm/tools/llvm-lto2/CMakeLists.txt b/llvm/tools/llvm-lto2/CMakeLists.txt index 3b4644d6e27715..6ddccc7f17442f 100644 --- a/llvm/tools/llvm-lto2/CMakeLists.txt +++ b/llvm/tools/llvm-lto2/CMakeLists.txt @@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS BitReader CodeGen Core + IPO Linker LTO MC diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp index d4f022ef021a44..0dc6623552a4bd 100644 --- a/llvm/tools/llvm-lto2/llvm-lto2.cpp +++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/PluginLoader.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/Threading.h" +#include "llvm/Transforms/IPO.h" #include <atomic> using namespace llvm; @@ -195,6 +196,7 @@ static cl::opt<bool> TryUseNewDbgInfoFormat( extern cl::opt<bool> UseNewDbgInfoFormat; extern cl::opt<cl::boolOrDefault> LoadBitcodeIntoNewDbgInfoFormat; extern cl::opt<cl::boolOrDefault> PreserveInputDbgFormat; +extern cl::opt<bool> EnableGlobalMergeFunc; static void check(Error E, std::string Msg) { if (!E) @@ -374,6 +376,10 @@ static int run(int argc, char **argv) { if (DI.getSeverity() == DS_Error) HasErrors = true; }; + Conf.PreCodeGenPassesHook = [](legacy::PassManager &pm) { + if (EnableGlobalMergeFunc) + pm.add(createGlobalMergeFuncPass()); + }; LTO::LTOKind LTOMode = LTO::LTOK_Default; >From 12b539e6e88710d85c40955be63e901a6af0f316 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee <kyu...@meta.com> Date: Fri, 18 Oct 2024 10:12:21 -0700 Subject: [PATCH 2/2] Address comments from tschuett --- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 - llvm/include/llvm/Transforms/IPO.h | 3 ++ .../Transforms/IPO/GlobalMergeFunctions.h | 45 ++++++++-------- .../Transforms/IPO/GlobalMergeFunctions.cpp | 53 +++++++++---------- 4 files changed, 51 insertions(+), 51 deletions(-) diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 96b5b815132bc0..13bc4700d87029 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -74,7 +74,6 @@ #include "llvm/Target/CGPassBuilderOption.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/CFGuard.h" -#include "llvm/Transforms/IPO/GlobalMergeFunctions.h" #include "llvm/Transforms/Scalar/ConstantHoisting.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Scalar/LoopStrengthReduce.h" diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index 86a8654f56997c..28cf330ad20812 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -55,6 +55,9 @@ enum class PassSummaryAction { Export, ///< Export information to summary. }; +/// createGlobalMergeFuncPass - This pass generates merged instances by +/// parameterizing distinct constants across similar functions, utilizing stable +/// function hash information. Pass *createGlobalMergeFuncPass(); } // End llvm namespace diff --git a/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h b/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h index 565be54f89e882..395aeda73b2d0f 100644 --- a/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h +++ b/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h @@ -5,23 +5,29 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -/// -/// This file defines global merge functions pass and related data structure. -/// +// +// This pass defines the implementation of a function merging mechanism +// that utilizes a stable function hash to track differences in constants and +// identify potential merge candidates. The process involves two rounds: +// 1. The first round collects stable function hashes and identifies merge +// candidates with matching hashes. It also computes the set of parameters +// that point to different constants during the stable function merge. +// 2. The second round leverages this collected global function information to +// optimistically create a merged function in each module context, ensuring +// correct transformation. +// Similar to the global outliner, this approach uses the linker's deduplication +// (ICF) to fold identical merged functions, thereby reducing the final binary +// size. The work is inspired by the concepts discussed in the following paper: +// https://dl.acm.org/doi/pdf/10.1145/3652032.3657575. +// //===----------------------------------------------------------------------===// -#ifndef PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H -#define PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H +#ifndef LLVM_TRANSFORMS_IPO_GLOBALMERGEFUNCTIONS_H +#define LLVM_TRANSFORMS_IPO_GLOBALMERGEFUNCTIONS_H -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StableHashing.h" -#include "llvm/ADT/StringRef.h" #include "llvm/CGData/StableFunctionMap.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" -#include <map> -#include <mutex> enum class HashFunctionMode { Local, @@ -36,15 +42,10 @@ namespace llvm { using ParamLocs = SmallVector<IndexPair, 4>; // A vector of parameters using ParamLocsVecTy = SmallVector<ParamLocs, 8>; -// A map of stable hash to a vector of stable functions - -/// GlobalMergeFunc finds functions which only differ by constants in -/// certain instructions, e.g. resulting from specialized functions of layout -/// compatible types. -/// Unlike PikaMergeFunc that directly compares IRs, this uses stable function -/// hash to find the merge candidate. Similar to the global outliner, we can run -/// codegen twice to collect function merge candidate in the first round, and -/// merge functions globally in the second round. + +/// GlobalMergeFunc is a ModulePass that implements a function merging mechanism +/// using stable function hashes. It identifies and merges functions with +/// matching hashes across modules to optimize binary size. class GlobalMergeFunc : public ModulePass { HashFunctionMode MergerMode = HashFunctionMode::Local; @@ -69,9 +70,9 @@ class GlobalMergeFunc : public ModulePass { /// Emit LocalFunctionMap into __llvm_merge section. void emitFunctionMap(Module &M); - /// Merge functions in the module using the global function map. + /// Merge functions in the module using the given function map. bool merge(Module &M, const StableFunctionMap *FunctionMap); }; } // end namespace llvm -#endif // PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H +#endif // LLVM_TRANSFORMS_IPO_GLOBALMERGEFUNCTIONS_H diff --git a/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp b/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp index 6d15183e8c8829..3216b370878e54 100644 --- a/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp +++ b/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp @@ -6,16 +6,19 @@ // //===----------------------------------------------------------------------===// // -// TODO: This implements a function merge using function hash while tracking -// differences in Constants. This uses stable function hash to find potential -// merge candidates. The first codegen round collects stable function hashes, -// and determines the merge candidates that match the stable function hashes. -// The set of parameters pointing to different Constants are also computed -// during the stable function merge. The second codegen round uses this global -// function info to optimistically create a merged function in each module -// context to guarantee correct transformation. Similar to the global outliner, -// the linker's deduplication (ICF) folds the identical merged functions to save -// the final binary size. +// This pass defines the implementation of a function merging mechanism +// that utilizes a stable function hash to track differences in constants and +// create potential merge candidates. The process involves two rounds: +// 1. The first round collects stable function hashes and identifies merge +// candidates with matching hashes. It also computes the set of parameters +// that point to different constants during the stable function merge. +// 2. The second round leverages this collected global function information to +// optimistically create a merged function in each module context, ensuring +// correct transformation. +// Similar to the global outliner, this approach uses the linker's deduplication +// (ICF) to fold identical merged functions, thereby reducing the final binary +// size. The work is inspired by the concepts discussed in the following paper: +// https://dl.acm.org/doi/pdf/10.1145/3652032.3657575. // //===----------------------------------------------------------------------===// @@ -23,9 +26,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/CGData/CodeGenData.h" -#include "llvm/CGData/StableFunctionMap.h" -#include "llvm/CodeGen/MachineStableHash.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/StructuralHash.h" #include "llvm/InitializePasses.h" @@ -59,7 +59,7 @@ STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed"); STATISTIC(NumAnalyzedFunctions, "Number of functions that are analyzed"); STATISTIC(NumEligibleFunctions, "Number of functions that are eligible"); -/// Returns true if the \opIdx operand of \p CI is the callee operand. +/// Returns true if the \OpIdx operand of \p CI is the callee operand. static bool isCalleeOperand(const CallBase *CI, unsigned OpIdx) { return &CI->getCalledOperandUse() == &CI->getOperandUse(OpIdx); } @@ -123,22 +123,19 @@ bool isEligibleFunction(Function *F) { if (F->hasFnAttribute(llvm::Attribute::NoMerge)) return false; - if (F->hasAvailableExternallyLinkage()) { + if (F->hasAvailableExternallyLinkage()) return false; - } - if (F->getFunctionType()->isVarArg()) { + if (F->getFunctionType()->isVarArg()) return false; - } if (F->getCallingConv() == CallingConv::SwiftTail) return false; - // if function contains callsites with musttail, if we merge + // If function contains callsites with musttail, if we merge // it, the merged function will have the musttail callsite, but // the number of parameters can change, thus the parameter count // of the callsite will mismatch with the function itself. - // if (IgnoreMusttailFunction) { for (const BasicBlock &BB : *F) { for (const Instruction &I : BB) { const auto *CB = dyn_cast<CallBase>(&I); @@ -180,7 +177,6 @@ static bool ignoreOp(const Instruction *I, unsigned OpIdx) { return true; } -// copy from merge functions.cpp static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) { Type *SrcTy = V->getType(); if (SrcTy->isStructTy()) { @@ -229,7 +225,8 @@ void GlobalMergeFunc::analyze(Module &M) { auto FI = llvm::StructuralHashWithDifferences(Func, ignoreOp); - // Convert the map to a vector for a serialization-friendly format. + // Convert the operand map to a vector for a serialization-friendly + // format. IndexOperandHashVecType IndexOperandHashes; for (auto &Pair : *FI.IndexOperandHashMap) IndexOperandHashes.emplace_back(Pair); @@ -539,7 +536,7 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) { // This module check is not strictly necessary as the functions can move // around. We just want to avoid merging functions from different // modules than the first one in the functon map, as they may not end up - // with not being ICFed. + // with not being ICFed by the linker. if (MergedModId != *FunctionMap->getNameForId(SF->ModuleNameId)) { ++NumMismatchedModuleIdGlobalMergeFunction; continue; @@ -560,12 +557,12 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) { dbgs() << "[GlobalMergeFunc] Merging function count " << FuncMergeInfoSize << " in " << ModId << "\n"; }); + for (auto &FMI : FuncMergeInfos) { Changed = true; // We've already validated all locations of constant operands pointed by - // the parameters. Just use the first one to bookkeep the original - // constants for each parameter + // the parameters. Populate parameters pointing to the original constants. SmallVector<Constant *> Params; SmallVector<Type *> ParamTypes; for (auto &ParamLocs : ParamLocsVec) { @@ -577,8 +574,7 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) { ParamTypes.push_back(Opnd->getType()); } - // Create a merged function derived from the first function in the current - // module context. + // Create a merged function derived from the current function. Function *MergedFunc = createMergedFunction(FMI, ParamTypes, ParamLocsVec); @@ -589,7 +585,8 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) { MergedFunc->dump(); }); - // Create a thunk to the merged function. + // Transform the current function into a thunk that calls the merged + // function. createThunk(FMI, Params, MergedFunc); LLVM_DEBUG({ dbgs() << "[GlobalMergeFunc] Thunk generated: \n"; _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits