llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-pgo Author: Mingming Liu (minglotus-6) <details> <summary>Changes</summary> * The parent patch is https://github.com/llvm/llvm-project/pull/81024 --- Full diff: https://github.com/llvm/llvm-project/pull/81051.diff 3 Files Affected: - (modified) llvm/include/llvm/ProfileData/InstrProf.h (+28) - (modified) llvm/lib/ProfileData/InstrProf.cpp (+79-41) - (modified) llvm/unittests/ProfileData/InstrProfTest.cpp (+47) ``````````diff diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 6cdceae5eeb96..d5d3c7f6b4b34 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -198,6 +198,15 @@ std::string getPGOFuncName(StringRef RawFuncName, /// called from LTO optimization passes. std::string getIRPGOFuncName(const Function &F, bool InLTO = false); +/// Returns the PGO object name. This function has some special handling +/// when called in LTO optimization: +/// - In LTO mode, returns the name in `PGONameMetadata` if exists, otherwise +/// returns the IRPGO name as if the GO has non-local linkage. +/// - In non-LTO mode, returns the IRPGO name as it is. +/// More rationale in the comment of function implementation. +std::string getIRPGOObjectName(const GlobalObject &GO, bool InLTO = false, + MDNode *PGONameMetadata = nullptr); + /// \return the filename and the function name parsed from the output of /// \c getIRPGOFuncName() std::pair<StringRef, StringRef> getParsedIRPGOFuncName(StringRef IRPGOFuncName); @@ -487,8 +496,25 @@ class InstrProfSymtab { return "** External Symbol **"; } + // Returns the canonical name of the given PGOName by stripping the names + // suffixes that begins with ".". If MayHaveUniqueSuffix is true, ".__uniq." + // suffix is kept in the canonical name. + StringRef getCanonicalName(StringRef PGOName, bool MayHaveUniqueSuffix); + + // Add the function into the symbol table, by creating the following + // map entries: + // - <MD5Hash(PGOFuncName), PGOFuncName> + // - <MD5Hash(PGOFuncName), F> + // - <MD5Hash(getCanonicalName(PGOFuncName), F) Error addFuncWithName(Function &F, StringRef PGOFuncName); + // Add the vtable into the symbol table, by creating the following + // map entries: + // - <MD5Hash(PGOVTableName), PGOVTableName> + // - <MD5Hash(PGOVTableName), V> + // - <MD5Hash(getCanonicalName(PGOVTableName), V) + Error addVTableWithName(GlobalVariable &V, StringRef PGOVTableName); + // If the symtab is created by a series of calls to \c addFuncName, \c // finalizeSymtab needs to be called before looking up function names. // This is required because the underlying map is a vector (for space @@ -543,6 +569,7 @@ class InstrProfSymtab { Error create(const FuncNameIterRange &FuncIterRange, const VTableNameIterRange &VTableIterRange); + // Map the MD5 of the symbol name to the name. Error addSymbolName(StringRef SymbolName) { if (SymbolName.empty()) return make_error<InstrProfError>(instrprof_error::malformed, @@ -665,6 +692,7 @@ void InstrProfSymtab::finalizeSymtab() { if (Sorted) return; llvm::sort(MD5NameMap, less_first()); + llvm::sort(MD5VTableMap, less_first()); llvm::sort(MD5FuncMap, less_first()); llvm::sort(AddrToMD5Map, less_first()); AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()), diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 91e79e8b2e9ad..3b05faf0cc5d3 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -325,21 +325,20 @@ static std::optional<std::string> lookupPGONameFromMetadata(MDNode *MD) { return {}; } -// Returns the PGO object name. This function has some special handling -// when called in LTO optimization. The following only applies when calling in -// LTO passes (when \c InLTO is true): LTO's internalization privatizes many -// global linkage symbols. This happens after value profile annotation, but -// those internal linkage functions should not have a source prefix. -// Additionally, for ThinLTO mode, exported internal functions are promoted -// and renamed. We need to ensure that the original internal PGO name is -// used when computing the GUID that is compared against the profiled GUIDs. -// To differentiate compiler generated internal symbols from original ones, -// PGOFuncName meta data are created and attached to the original internal -// symbols in the value profile annotation step -// (PGOUseFunc::annotateIndirectCallSites). If a symbol does not have the meta -// data, its original linkage must be non-internal. -static std::string getIRPGOObjectName(const GlobalObject &GO, bool InLTO, - MDNode *PGONameMetadata) { +std::string getIRPGOObjectName(const GlobalObject &GO, bool InLTO, + MDNode *PGONameMetadata) { + // The following only applies when calling in + // LTO passes (when \c InLTO is true): LTO's internalization privatizes many + // global linkage symbols. This happens after value profile annotation, but + // those internal linkage functions should not have a source prefix. + // Additionally, for ThinLTO mode, exported internal functions are promoted + // and renamed. We need to ensure that the original internal PGO name is + // used when computing the GUID that is compared against the profiled GUIDs. + // To differentiate compiler generated internal symbols from original ones, + // PGOFuncName meta data are created and attached to the original internal + // symbols in the value profile annotation step + // (PGOUseFunc::annotateIndirectCallSites). If a symbol does not have the meta + // data, its original linkage must be non-internal. if (!InLTO) { auto FileName = getStrippedSourceFileName(GO); return getIRPGONameForGlobalObject(GO, GO.getLinkage(), FileName); @@ -481,7 +480,9 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) { Types.clear(); G.getMetadata(LLVMContext::MD_type, Types); if (!Types.empty()) { - MD5VTableMap.emplace_back(G.getGUID(), &G); + if (Error E = addVTableWithName( + G, getIRPGOObjectName(G, InLTO, /* PGONameMetadata */ nullptr))) + return E; } } Sorted = false; @@ -489,6 +490,30 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) { return Error::success(); } +Error InstrProfSymtab::addVTableWithName(GlobalVariable &VTable, + StringRef VTablePGOName) { + if (Error E = addVTableName(VTablePGOName)) + return E; + + MD5VTableMap.emplace_back(GlobalValue::getGUID(VTablePGOName), &VTable); + + // NOTE: `-funique-internal-linkage-names` doesn't uniqufy vtables, so no + // need to check ".__uniq." + + // If a local-linkage vtable is promoted to have external linkage in ThinLTO, + // it will have `.llvm.` in its name. Use the name before externalization. + StringRef CanonicalName = + getCanonicalName(VTablePGOName, /* MayHaveUniqueSuffix= */ false); + if (CanonicalName != VTablePGOName) { + if (Error E = addVTableName(CanonicalName)) + return E; + + MD5VTableMap.emplace_back(GlobalValue::getGUID(CanonicalName), &VTable); + } + + return Error::success(); +} + /// \c NameStrings is a string composed of one of more possibly encoded /// sub-strings. The substrings are separated by 0 or more zero bytes. This /// method decodes the string and calls `NameCallback` for each substring. @@ -561,35 +586,51 @@ Error InstrProfSymtab::initVTableNamesFromCompressedStrings( std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1)); } -Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) { - if (Error E = addFuncName(PGOFuncName)) - return E; - MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F); +StringRef InstrProfSymtab::getCanonicalName(StringRef PGOName, + bool MayHaveUniqueSuffix) { + size_t pos = 0; // In ThinLTO, local function may have been promoted to global and have // suffix ".llvm." added to the function name. We need to add the // stripped function name to the symbol table so that we can find a match // from profile. // - // We may have other suffixes similar as ".llvm." which are needed to - // be stripped before the matching, but ".__uniq." suffix which is used - // to differentiate internal linkage functions in different modules - // should be kept. Now this is the only suffix with the pattern ".xxx" - // which is kept before matching. - const std::string UniqSuffix = ".__uniq."; - auto pos = PGOFuncName.find(UniqSuffix); + // ".__uniq." suffix is used to differentiate internal linkage functions in + // different modules and should be kept. Now this is the only suffix with the + // pattern ".xxx" which is kept before matching. In other words, other + // suffixes similar as + // ".llvm." will be stripped. + if (MayHaveUniqueSuffix) { + const std::string UniqSuffix = ".__uniq."; + pos = PGOName.find(UniqSuffix); + if (pos != StringRef::npos) + pos += UniqSuffix.length(); + else + pos = 0; + } + // Search '.' after ".__uniq." if ".__uniq." exists, otherwise // search '.' from the beginning. - if (pos != std::string::npos) - pos += UniqSuffix.length(); - else - pos = 0; - pos = PGOFuncName.find('.', pos); - if (pos != std::string::npos && pos != 0) { - StringRef OtherFuncName = PGOFuncName.substr(0, pos); - if (Error E = addFuncName(OtherFuncName)) + pos = PGOName.find('.', pos); + if (pos != StringRef::npos && pos != 0) + return PGOName.substr(0, pos); + + return PGOName; +} + +Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) { + if (Error E = addFuncName(PGOFuncName)) + return E; + MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F); + + StringRef CanonicalName = + getCanonicalName(PGOFuncName, /* MayHaveUniqueSuffix= */ true); + + if (CanonicalName != PGOFuncName) { + if (Error E = addFuncName(CanonicalName)) return E; - MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F); + MD5FuncMap.emplace_back(Function::getGUID(CanonicalName), &F); } + return Error::success(); } @@ -1257,11 +1298,8 @@ void annotateValueSite(Module &M, Instruction &Inst, } void annotateValueSite(Module &M, Instruction &Inst, - ArrayRef<InstrProfValueData> VDs, - uint64_t Sum, InstrProfValueKind ValueKind, - uint32_t MaxMDCount) { - if (VDs.empty()) - return; + ArrayRef<InstrProfValueData> VDs, uint64_t Sum, + InstrProfValueKind ValueKind, uint32_t MaxMDCount) { LLVMContext &Ctx = M.getContext(); MDBuilder MDHelper(Ctx); SmallVector<Metadata *, 3> Vals; diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index b007a374c2cf2..f0281bdf1525d 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" @@ -1606,6 +1607,41 @@ TEST(SymtabTest, instr_prof_symtab_module_test) { Function::Create(FTy, Function::WeakODRLinkage, "Wblah", M.get()); Function::Create(FTy, Function::WeakODRLinkage, "Wbar", M.get()); + ArrayType *VTableArrayType = ArrayType::get( + PointerType::get(Ctx, M->getDataLayout().getDefaultGlobalsAddressSpace()), + 3); + Constant *Int32TyNull = + llvm::ConstantExpr::getNullValue(PointerType::getUnqual(Ctx)); + SmallVector<llvm::Type *, 1> tys; + tys.push_back(VTableArrayType); + StructType *VTableType = llvm::StructType::get(Ctx, tys); + + GlobalVariable *ExternalGV = new llvm::GlobalVariable( + *M, VTableType, /* isConstant= */ true, + llvm::GlobalValue::ExternalLinkage, + llvm::ConstantStruct::get( + VTableType, {llvm::ConstantArray::get( + VTableArrayType, + {Int32TyNull, Int32TyNull, + Function::Create(FTy, Function::ExternalLinkage, + "VFuncInExternalGV", M.get())})}), + "ExternalGV"); + + GlobalVariable *PrivateGV = new llvm::GlobalVariable( + *M, VTableType, /* isConstant= */ true, + llvm::GlobalValue::InternalLinkage, + llvm::ConstantStruct::get( + VTableType, {llvm::ConstantArray::get( + VTableArrayType, + {Int32TyNull, Int32TyNull, + Function::Create(FTy, Function::ExternalLinkage, + "VFuncInPrivateGV", M.get())})}), + "PrivateGV"); + + // Only vtables with type metadata are added to symtab. + ExternalGV->addTypeMetadata(16, MDString::get(Ctx, "ExternalGV")); + PrivateGV->addTypeMetadata(16, MDString::get(Ctx, "PrivateGV")); + InstrProfSymtab ProfSymtab; EXPECT_THAT_ERROR(ProfSymtab.create(*M), Succeeded()); @@ -1628,6 +1664,17 @@ TEST(SymtabTest, instr_prof_symtab_module_test) { EXPECT_EQ(StringRef(PGOName), PGOFuncName); EXPECT_THAT(PGOFuncName.str(), EndsWith(Funcs[I].str())); } + + StringRef VTables[] = {"ExternalGV", "PrivateGV"}; + for (size_t I = 0; I < std::size(VTables); I++) { + GlobalVariable *GV = + M->getGlobalVariable(VTables[I], /* AllowInternal=*/true); + + std::string IRPGOName = getIRPGOObjectName(*GV); + EXPECT_EQ(IRPGOName, ProfSymtab.getFuncOrVarName( + IndexedInstrProf::ComputeHash(IRPGOName))); + EXPECT_EQ(VTables[I], getParsedIRPGOFuncName(IRPGOName).second); + } } // Testing symtab serialization and creator/deserialization interface `````````` </details> https://github.com/llvm/llvm-project/pull/81051 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits