================ @@ -321,14 +746,127 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) { if (!NumCandidates || (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount))) continue; + auto PromotionCandidates = getPromotionCandidatesForCallSite( *CB, ICallProfDataRef, TotalCount, NumCandidates); - Changed |= tryToPromoteWithFuncCmp(*CB, PromotionCandidates, TotalCount, - ICallProfDataRef, NumCandidates); + + VTableGUIDCountsMap VTableGUIDCounts; + Instruction *VPtr = + computeVTableInfos(CB, VTableGUIDCounts, PromotionCandidates); + + if (isProfitableToCompareVTables(PromotionCandidates, TotalCount)) + Changed |= tryToPromoteWithVTableCmp(*CB, VPtr, PromotionCandidates, + TotalCount, NumCandidates, + ICallProfDataRef, VTableGUIDCounts); + else + Changed |= tryToPromoteWithFuncCmp(*CB, VPtr, PromotionCandidates, + TotalCount, ICallProfDataRef, + NumCandidates, VTableGUIDCounts); } return Changed; } +// TODO: Returns false if the function addressing and vtable load instructions +// cannot sink to indirect fallback. +bool IndirectCallPromoter::isProfitableToCompareVTables( + const std::vector<PromotionCandidate> &Candidates, uint64_t TotalCount) { + if (!ICPEnableVTableCmp || Candidates.empty()) + return false; + uint64_t RemainingVTableCount = TotalCount; + for (size_t I = 0; I < Candidates.size(); I++) { + auto &Candidate = Candidates[I]; + uint64_t VTableSumCount = 0; + for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts) + VTableSumCount += Count; + + if (VTableSumCount < Candidate.Count * ICPVTableCountPercentage) + return false; + + RemainingVTableCount -= Candidate.Count; + + int NumAdditionalVTable = 0; + if (I == Candidates.size() - 1) + NumAdditionalVTable = ICPNumAdditionalVTableLast; + + int ActualNumAdditionalInst = Candidate.AddressPoints.size() - 1; + if (ActualNumAdditionalInst > NumAdditionalVTable) { + return false; + } + } + + // If the indirect fallback is not cold, don't compare vtables. + if (PSI && PSI->hasProfileSummary() && + !PSI->isColdCount(RemainingVTableCount)) + return false; + + return true; +} + +static void +computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM, + VirtualCallSiteTypeInfoMap &VirtualCSInfo) { + auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & { + return FAM.getResult<DominatorTreeAnalysis>(F); + }; + + auto compute = [&](Function *Func) { + if (!Func || Func->use_empty()) + return; + // Iterate all type.test calls and find all indirect calls. + // TODO: Add llvm.public.type.test + for (Use &U : llvm::make_early_inc_range(Func->uses())) { + auto *CI = dyn_cast<CallInst>(U.getUser()); + if (!CI) + continue; + auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1)); + if (!TypeMDVal) + continue; + auto *CompatibleTypeId = dyn_cast<MDString>(TypeMDVal->getMetadata()); + if (!CompatibleTypeId) + continue; + + // Find out all devirtualizable call sites given a llvm.type.test + // intrinsic call. + SmallVector<DevirtCallSite, 1> DevirtCalls; + SmallVector<CallInst *, 1> Assumes; + auto &DT = LookupDomTree(*CI->getFunction()); + findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT); + + // type-id, offset from the address point + // combined with type metadata to compute function offset + for (auto &DevirtCall : DevirtCalls) { + CallBase &CB = DevirtCall.CB; + // Given an indirect call, try find the instruction which loads a + // pointer to virtual table. + Instruction *VTablePtr = + PGOIndirectCallVisitor::tryGetVTableInstruction(&CB); + if (!VTablePtr) + continue; + VirtualCSInfo[&CB] = {DevirtCall.Offset, VTablePtr, + CompatibleTypeId->getString()}; + } + } + }; + + // Right now only llvm.type.test is used to find out virtual call sites. + // With ThinLTO and whole-program-devirtualization, llvm.type.test and + // llvm.public.type.test are emitted, and llvm.public.type.test is either + // refined to llvm.type.test or dropped before indirect-call-promotion pass. ---------------- minglotus-6 wrote:
If ICP pass is disabled in prelink, we don't need to analyze `llvm.public.type.test` as you point out. `llvm.public.type.test` was analyzed here because I didn't disable prelink ICP for compiler-rt test. Now I updated compiler-rt test to disable prelink ICP and enable postlink ICP, and skip the analysis for `llvm.public.type.test`. https://github.com/llvm/llvm-project/pull/81442 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits