https://github.com/EthanLuisMcDonough created https://github.com/llvm/llvm-project/pull/93365
This pull request is the second part of an ongoing effort to extends PGO instrumentation to GPU device code and depends on #76587. This PR makes the following changes: - Introduces `__llvm_write_custom_profile` to PGO compiler-rt library. This is an external function that can be used to write profiles with custom data to target-specific files. - Adds `__llvm_write_custom_profile` as weak symbol to libomptarget so that it can write the collected data to a profraw file. >From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Fri, 15 Dec 2023 20:38:38 -0600 Subject: [PATCH 01/27] Add profiling functions to libomptarget --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++ openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++ .../DeviceRTL/include/Profiling.h | 21 +++++++++++++++++++ .../libomptarget/DeviceRTL/src/Profiling.cpp | 19 +++++++++++++++++ 4 files changed, 45 insertions(+) create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d22d2a8e948b0..1d887d5cb5812 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr) +__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1ce3e1e40a80a..55ee15d068c67 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -89,6 +89,7 @@ set(include_files ${include_directory}/Interface.h ${include_directory}/LibC.h ${include_directory}/Mapping.h + ${include_directory}/Profiling.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -104,6 +105,7 @@ set(src_files ${source_directory}/Mapping.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h new file mode 100644 index 0000000000000..68c7744cd6075 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -0,0 +1,21 @@ +//===-------- Profiling.h - OpenMP interface ---------------------- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#ifndef OMPTARGET_DEVICERTL_PROFILING_H +#define OMPTARGET_DEVICERTL_PROFILING_H + +extern "C" { + +void __llvm_profile_register_function(void *ptr); +void __llvm_profile_register_names_function(void *ptr, long int i); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp new file mode 100644 index 0000000000000..799477f5e47d2 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp @@ -0,0 +1,19 @@ +//===------- Profiling.cpp ---------------------------------------- C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Profiling.h" + +#pragma omp begin declare target device_type(nohost) + +extern "C" { + +void __llvm_profile_register_function(void *ptr) {} +void __llvm_profile_register_names_function(void *ptr, long int i) {} +} + +#pragma omp end declare target >From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Sat, 16 Dec 2023 01:18:41 -0600 Subject: [PATCH 02/27] Fix PGO instrumentation for GPU targets --- clang/lib/CodeGen/CodeGenPGO.cpp | 10 ++++++++-- .../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 ++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b16..edae6885b528a 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, unsigned Counter = (*RegionCounterMap)[S]; - llvm::Value *Args[] = {FuncNameVar, - Builder.getInt64(FunctionHash), + // Make sure that pointer to global is passed in with zero addrspace + // This is relevant during GPU profiling + auto *I8Ty = llvm::Type::getInt8Ty(CGM.getLLVMContext()); + auto *I8PtrTy = llvm::PointerType::getUnqual(I8Ty); + auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( + FuncNameVar, I8PtrTy); + + llvm::Value *Args[] = {NormalizedPtr, Builder.getInt64(FunctionHash), Builder.getInt32(NumRegionCounters), Builder.getInt32(Counter), StepV}; if (!StepV) diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index fe5a0578bd972..d2cb8155c1796 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -1658,10 +1658,13 @@ void InstrLowerer::emitRegistration() { IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", RegisterF)); for (Value *Data : CompilerUsedVars) if (!isa<Function>(Data)) - IRB.CreateCall(RuntimeRegisterF, Data); + // Check for addrspace cast when profiling GPU + IRB.CreateCall(RuntimeRegisterF, + IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy)); for (Value *Data : UsedVars) if (Data != NamesVar && !isa<Function>(Data)) - IRB.CreateCall(RuntimeRegisterF, Data); + IRB.CreateCall(RuntimeRegisterF, + IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy)); if (NamesVar) { Type *ParamTypes[] = {VoidPtrTy, Int64Ty}; @@ -1670,7 +1673,9 @@ void InstrLowerer::emitRegistration() { auto *NamesRegisterF = Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage, getInstrProfNamesRegFuncName(), M); - IRB.CreateCall(NamesRegisterF, {NamesVar, IRB.getInt64(NamesSize)}); + IRB.CreateCall(NamesRegisterF, {IRB.CreatePointerBitCastOrAddrSpaceCast( + NamesVar, VoidPtrTy), + IRB.getInt64(NamesSize)}); } IRB.CreateRetVoid(); >From 7a0e0efa178cc4de6a22a8f5cc3f53cd1c81ea3a Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Thu, 21 Dec 2023 00:25:46 -0600 Subject: [PATCH 03/27] Change global visibility on GPU targets --- llvm/include/llvm/ProfileData/InstrProf.h | 4 ++++ llvm/lib/ProfileData/InstrProf.cpp | 17 +++++++++++++++-- .../Instrumentation/InstrProfiling.cpp | 15 +++++++++++---- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 288dc71d756ae..bf9899d867e3d 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -171,6 +171,10 @@ inline StringRef getInstrProfCounterBiasVarName() { /// Return the marker used to separate PGO names during serialization. inline StringRef getInstrProfNameSeparator() { return "\01"; } +/// Determines whether module targets a GPU eligable for PGO +/// instrumentation +bool isGPUProfTarget(const Module &M); + /// Return the modified name for function \c F suitable to be /// used the key for profile lookup. Variable \c InLTO indicates if this /// is called in LTO optimization passes. diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 649d814cfd9de..0d6717aeb0142 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -410,13 +410,22 @@ std::string getPGOFuncNameVarName(StringRef FuncName, return VarName; } +bool isGPUProfTarget(const Module &M) { + const auto &triple = M.getTargetTriple(); + return triple.rfind("nvptx", 0) == 0 || triple.rfind("amdgcn", 0) == 0 || + triple.rfind("r600", 0) == 0; +} + GlobalVariable *createPGOFuncNameVar(Module &M, GlobalValue::LinkageTypes Linkage, StringRef PGOFuncName) { + // Ensure profiling variables on GPU are visible to be read from host + if (isGPUProfTarget(M)) + Linkage = GlobalValue::ExternalLinkage; // We generally want to match the function's linkage, but available_externally // and extern_weak both have the wrong semantics, and anything that doesn't // need to link across compilation units doesn't need to be visible at all. - if (Linkage == GlobalValue::ExternalWeakLinkage) + else if (Linkage == GlobalValue::ExternalWeakLinkage) Linkage = GlobalValue::LinkOnceAnyLinkage; else if (Linkage == GlobalValue::AvailableExternallyLinkage) Linkage = GlobalValue::LinkOnceODRLinkage; @@ -430,8 +439,12 @@ GlobalVariable *createPGOFuncNameVar(Module &M, new GlobalVariable(M, Value->getType(), true, Linkage, Value, getPGOFuncNameVarName(PGOFuncName, Linkage)); + // If the target is a GPU, make the symbol protected so it can + // be read from the host device + if (isGPUProfTarget(M)) + FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility); // Hide the symbol so that we correctly get a copy for each executable. - if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage())) + else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage())) FuncNameVar->setVisibility(GlobalValue::HiddenVisibility); return FuncNameVar; diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index d2cb8155c1796..3b582b6519080 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -1481,6 +1481,10 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) { for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]); + if (isGPUProfTarget(M)) { + Linkage = GlobalValue::ExternalLinkage; + Visibility = GlobalValue::ProtectedVisibility; + } // If the data variable is not referenced by code (if we don't emit // @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the // data variable live under linker GC, the data variable can be private. This @@ -1492,9 +1496,9 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) { // If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees // that other copies must have the same CFG and cannot have value profiling. // If no hash suffix, other profd copies may be referenced by code. - if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) && - (TT.isOSBinFormatELF() || - (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) { + else if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) && + (TT.isOSBinFormatELF() || + (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) { Linkage = GlobalValue::PrivateLinkage; Visibility = GlobalValue::DefaultVisibility; } @@ -1696,7 +1700,10 @@ bool InstrLowerer::emitRuntimeHook() { auto *Var = new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage, nullptr, getInstrProfRuntimeHookVarName()); - Var->setVisibility(GlobalValue::HiddenVisibility); + if (isGPUProfTarget(M)) + Var->setVisibility(GlobalValue::ProtectedVisibility); + else + Var->setVisibility(GlobalValue::HiddenVisibility); if (TT.isOSBinFormatELF() && !TT.isPS()) { // Mark the user variable as used so that it isn't stripped out. >From fddc07908ed9aa698fe3250ddbfc5621ab4d049d Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Fri, 22 Dec 2023 23:43:29 -0600 Subject: [PATCH 04/27] Make names global public on GPU --- llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 3b582b6519080..61fba7be3ee0e 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -1621,6 +1621,13 @@ void InstrLowerer::emitNameData() { NamesVar = new GlobalVariable(M, NamesVal->getType(), true, GlobalValue::PrivateLinkage, NamesVal, getInstrProfNamesVarName()); + + // Make names variable public if current target is a GPU + if (isGPUProfTarget(M)) { + NamesVar->setLinkage(GlobalValue::ExternalLinkage); + NamesVar->setVisibility(GlobalValue::VisibilityTypes::ProtectedVisibility); + } + NamesSize = CompressedNameStr.size(); setGlobalVariableLargeSection(TT, *NamesVar); NamesVar->setSection( >From e9db03c70bf79f4f4ddad4b48a5aa63a37e0d4f6 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Fri, 29 Dec 2023 12:54:50 -0600 Subject: [PATCH 05/27] Read and print GPU device PGO globals --- .../common/include/GlobalHandler.h | 27 ++++++ .../common/src/GlobalHandler.cpp | 82 +++++++++++++++++++ .../common/src/PluginInterface.cpp | 14 ++++ 3 files changed, 123 insertions(+) diff --git a/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h b/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h index fa079ac9660ee..a82cd53648765 100644 --- a/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h +++ b/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h @@ -14,9 +14,11 @@ #define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H #include <string> +#include <vector> #include "llvm/ADT/DenseMap.h" #include "llvm/Object/ELFObjectFile.h" +#include "llvm/ProfileData/InstrProf.h" #include "Shared/Debug.h" #include "Shared/Utils.h" @@ -58,6 +60,22 @@ class GlobalTy { void setPtr(void *P) { Ptr = P; } }; +typedef void *IntPtrT; +struct __llvm_profile_data { +#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) Type Name; +#include "llvm/ProfileData/InstrProfData.inc" +}; + +/// PGO profiling data extracted from a GPU device +struct GPUProfGlobals { + std::string names; + std::vector<std::vector<int64_t>> counts; + std::vector<__llvm_profile_data> data; + Triple targetTriple; + + void dump() const; +}; + /// Subclass of GlobalTy that holds the memory for a global of \p Ty. template <typename Ty> class StaticGlobalTy : public GlobalTy { Ty Data; @@ -172,6 +190,15 @@ class GenericGlobalHandlerTy { return moveGlobalBetweenDeviceAndHost(Device, Image, HostGlobal, /* D2H */ false); } + + /// Checks whether a given image contains profiling globals. + bool hasProfilingGlobals(GenericDeviceTy &Device, DeviceImageTy &Image); + + /// Reads profiling data from a GPU image to supplied profdata struct. + /// Iterates through the image symbol table and stores global values + /// with profiling prefixes. + Expected<GPUProfGlobals> readProfilingGlobals(GenericDeviceTy &Device, + DeviceImageTy &Image); }; } // namespace plugin diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp index 3a272e228c7df..5dd5daec468ca 100644 --- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp @@ -176,3 +176,85 @@ Error GenericGlobalHandlerTy::readGlobalFromImage(GenericDeviceTy &Device, return Plugin::success(); } + +bool GenericGlobalHandlerTy::hasProfilingGlobals(GenericDeviceTy &Device, + DeviceImageTy &Image) { + GlobalTy global(getInstrProfNamesVarName().str(), 0); + if (auto Err = getGlobalMetadataFromImage(Device, Image, global)) { + consumeError(std::move(Err)); + return false; + } + return true; +} + +Expected<GPUProfGlobals> +GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device, + DeviceImageTy &Image) { + GPUProfGlobals profdata; + const auto *elf = getOrCreateELFObjectFile(Device, Image); + profdata.targetTriple = elf->makeTriple(); + // Iterate through + for (auto &sym : elf->symbols()) { + if (auto name = sym.getName()) { + // Check if given current global is a profiling global based + // on name + if (name->equals(getInstrProfNamesVarName())) { + // Read in profiled function names + std::vector<char> chars(sym.getSize() / sizeof(char), ' '); + GlobalTy NamesGlobal(name->str(), sym.getSize(), chars.data()); + if (auto Err = readGlobalFromDevice(Device, Image, NamesGlobal)) + return Err; + std::string names(chars.begin(), chars.end()); + profdata.names = std::move(names); + } else if (name->starts_with(getInstrProfCountersVarPrefix())) { + // Read global variable profiling counts + std::vector<int64_t> counts(sym.getSize() / sizeof(int64_t), 0); + GlobalTy CountGlobal(name->str(), sym.getSize(), counts.data()); + if (auto Err = readGlobalFromDevice(Device, Image, CountGlobal)) + return Err; + profdata.counts.push_back(std::move(counts)); + } else if (name->starts_with(getInstrProfDataVarPrefix())) { + // Read profiling data for this global variable + __llvm_profile_data data{}; + GlobalTy DataGlobal(name->str(), sym.getSize(), &data); + if (auto Err = readGlobalFromDevice(Device, Image, DataGlobal)) + return Err; + profdata.data.push_back(std::move(data)); + } + } + } + return profdata; +} + +void GPUProfGlobals::dump() const { + llvm::outs() << "======= GPU Profile =======\nTarget: " << targetTriple.str() + << "\n"; + + llvm::outs() << "======== Counters =========\n"; + for (const auto &count : counts) { + llvm::outs() << "["; + for (size_t i = 0; i < count.size(); i++) { + if (i == 0) + llvm::outs() << " "; + llvm::outs() << count[i] << " "; + } + llvm::outs() << "]\n"; + } + + llvm::outs() << "========== Data ===========\n"; + for (const auto &d : data) { + llvm::outs() << "{ "; +#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \ + llvm::outs() << d.Name << " "; +#include "llvm/ProfileData/InstrProfData.inc" + llvm::outs() << " }\n"; + } + + llvm::outs() << "======== Functions ========\n"; + InstrProfSymtab symtab; + if (Error Err = symtab.create(StringRef(names))) { + consumeError(std::move(Err)); + } + symtab.dumpNames(llvm::outs()); + llvm::outs() << "===========================\n"; +} diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp index 3c7d1ca899878..84ed90f03f84f 100644 --- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp @@ -811,6 +811,20 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { DeviceMemoryPoolTracking.AllocationMax); } + for (auto *Image : LoadedImages) { + GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler(); + if (!Handler.hasProfilingGlobals(*this, *Image)) + continue; + + GPUProfGlobals profdata; + auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image); + if (!ProfOrErr) + return ProfOrErr.takeError(); + + // TODO: write data to profiling file + ProfOrErr->dump(); + } + // Delete the memory manager before deinitializing the device. Otherwise, // we may delete device allocations after the device is deinitialized. if (MemoryManager) >From e4687605d1a6ca932312025826db09dba84845a3 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Wed, 3 Jan 2024 17:06:15 -0600 Subject: [PATCH 06/27] Fix rebase bug --- .../plugins-nextgen/common/src/GlobalHandler.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp index cb71b61f4a9c4..86742d0f77a2f 100644 --- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp @@ -178,10 +178,12 @@ Expected<GPUProfGlobals> GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device, DeviceImageTy &Image) { GPUProfGlobals profdata; - const auto *elf = getOrCreateELFObjectFile(Device, Image); - profdata.targetTriple = elf->makeTriple(); - // Iterate through - for (auto &sym : elf->symbols()) { + auto ELFObj = getELFObjectFile(Image); + if (!ELFObj) + return ELFObj.takeError(); + profdata.targetTriple = ELFObj->makeTriple(); + // Iterate through elf symbols + for (auto &sym : ELFObj->symbols()) { if (auto name = sym.getName()) { // Check if given current global is a profiling global based // on name >From ec18ce94c227e1d43927955fa1c67360ecfcfca6 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Wed, 3 Jan 2024 17:10:19 -0600 Subject: [PATCH 07/27] Refactor portions to be more idiomatic --- clang/lib/CodeGen/CodeGenPGO.cpp | 4 +--- llvm/lib/ProfileData/InstrProf.cpp | 5 ++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index edae6885b528a..7bfcec43ee4c9 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -961,10 +961,8 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, // Make sure that pointer to global is passed in with zero addrspace // This is relevant during GPU profiling - auto *I8Ty = llvm::Type::getInt8Ty(CGM.getLLVMContext()); - auto *I8PtrTy = llvm::PointerType::getUnqual(I8Ty); auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( - FuncNameVar, I8PtrTy); + FuncNameVar, llvm::PointerType::getUnqual(CGM.getLLVMContext())); llvm::Value *Args[] = {NormalizedPtr, Builder.getInt64(FunctionHash), Builder.getInt32(NumRegionCounters), diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index cdcd6840bb510..1d88da16a5ff9 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -429,9 +429,8 @@ std::string getPGOFuncNameVarName(StringRef FuncName, } bool isGPUProfTarget(const Module &M) { - const auto &triple = M.getTargetTriple(); - return triple.rfind("nvptx", 0) == 0 || triple.rfind("amdgcn", 0) == 0 || - triple.rfind("r600", 0) == 0; + const auto &Triple = llvm::Triple(M.getTargetTriple()); + return Triple.isAMDGPU() || Triple.isNVPTX(); } GlobalVariable *createPGOFuncNameVar(Module &M, >From 0872556f597056361b0a2c23cdd0be3d9745aef3 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Wed, 3 Jan 2024 17:18:47 -0600 Subject: [PATCH 08/27] Reformat DeviceRTL prof functions --- openmp/libomptarget/DeviceRTL/include/Profiling.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h index 68c7744cd6075..9efc1554c176b 100644 --- a/openmp/libomptarget/DeviceRTL/include/Profiling.h +++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h @@ -13,9 +13,8 @@ #define OMPTARGET_DEVICERTL_PROFILING_H extern "C" { - -void __llvm_profile_register_function(void *ptr); -void __llvm_profile_register_names_function(void *ptr, long int i); +void __llvm_profile_register_function(void *Ptr); +void __llvm_profile_register_names_function(void *Ptr, long int I); } #endif >From 62f31d1c71b5d100f38d6dc584cc138b3904581b Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Tue, 9 Jan 2024 11:52:29 -0600 Subject: [PATCH 09/27] Style changes + catch name error --- .../common/include/GlobalHandler.h | 16 ++-- .../common/src/GlobalHandler.cpp | 87 ++++++++++--------- 2 files changed, 56 insertions(+), 47 deletions(-) diff --git a/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h b/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h index a803b3f76d8b2..755bb23a414e3 100644 --- a/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h +++ b/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h @@ -13,8 +13,7 @@ #ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H #define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H -#include <string> -#include <vector> +#include <type_traits> #include "llvm/ADT/DenseMap.h" #include "llvm/Object/ELFObjectFile.h" @@ -60,18 +59,19 @@ class GlobalTy { void setPtr(void *P) { Ptr = P; } }; -typedef void *IntPtrT; +using IntPtrT = void *; struct __llvm_profile_data { -#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) Type Name; +#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \ + std::remove_const<Type>::type Name; #include "llvm/ProfileData/InstrProfData.inc" }; /// PGO profiling data extracted from a GPU device struct GPUProfGlobals { - std::string names; - std::vector<std::vector<int64_t>> counts; - std::vector<__llvm_profile_data> data; - Triple targetTriple; + SmallVector<uint8_t> NamesData; + SmallVector<SmallVector<int64_t>> Counts; + SmallVector<__llvm_profile_data> Data; + Triple TargetTriple; void dump() const; }; diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp index 86742d0f77a2f..7cb672e7b2683 100644 --- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/Error.h" #include <cstring> +#include <string> using namespace llvm; using namespace omp; @@ -177,73 +178,81 @@ bool GenericGlobalHandlerTy::hasProfilingGlobals(GenericDeviceTy &Device, Expected<GPUProfGlobals> GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device, DeviceImageTy &Image) { - GPUProfGlobals profdata; + GPUProfGlobals DeviceProfileData; auto ELFObj = getELFObjectFile(Image); if (!ELFObj) return ELFObj.takeError(); - profdata.targetTriple = ELFObj->makeTriple(); + DeviceProfileData.TargetTriple = ELFObj->makeTriple(); + // Iterate through elf symbols - for (auto &sym : ELFObj->symbols()) { - if (auto name = sym.getName()) { - // Check if given current global is a profiling global based - // on name - if (name->equals(getInstrProfNamesVarName())) { - // Read in profiled function names - std::vector<char> chars(sym.getSize() / sizeof(char), ' '); - GlobalTy NamesGlobal(name->str(), sym.getSize(), chars.data()); - if (auto Err = readGlobalFromDevice(Device, Image, NamesGlobal)) - return Err; - std::string names(chars.begin(), chars.end()); - profdata.names = std::move(names); - } else if (name->starts_with(getInstrProfCountersVarPrefix())) { - // Read global variable profiling counts - std::vector<int64_t> counts(sym.getSize() / sizeof(int64_t), 0); - GlobalTy CountGlobal(name->str(), sym.getSize(), counts.data()); - if (auto Err = readGlobalFromDevice(Device, Image, CountGlobal)) - return Err; - profdata.counts.push_back(std::move(counts)); - } else if (name->starts_with(getInstrProfDataVarPrefix())) { - // Read profiling data for this global variable - __llvm_profile_data data{}; - GlobalTy DataGlobal(name->str(), sym.getSize(), &data); - if (auto Err = readGlobalFromDevice(Device, Image, DataGlobal)) - return Err; - profdata.data.push_back(std::move(data)); - } + for (auto &Sym : ELFObj->symbols()) { + auto NameOrErr = Sym.getName(); + if (!NameOrErr) + return ELFObj.takeError(); + + // Check if given current global is a profiling global based + // on name + if (NameOrErr->equals(getInstrProfNamesVarName())) { + // Read in profiled function names + DeviceProfileData.NamesData = SmallVector<uint8_t>(Sym.getSize(), 0); + GlobalTy NamesGlobal(NameOrErr->str(), Sym.getSize(), + DeviceProfileData.NamesData.data()); + if (auto Err = readGlobalFromDevice(Device, Image, NamesGlobal)) + return Err; + } else if (NameOrErr->starts_with(getInstrProfCountersVarPrefix())) { + // Read global variable profiling counts + SmallVector<int64_t> Counts(Sym.getSize() / sizeof(int64_t), 0); + GlobalTy CountGlobal(NameOrErr->str(), Sym.getSize(), Counts.data()); + if (auto Err = readGlobalFromDevice(Device, Image, CountGlobal)) + return Err; + DeviceProfileData.Counts.push_back(std::move(Counts)); + } else if (NameOrErr->starts_with(getInstrProfDataVarPrefix())) { + // Read profiling data for this global variable + __llvm_profile_data Data{}; + GlobalTy DataGlobal(NameOrErr->str(), Sym.getSize(), &Data); + if (auto Err = readGlobalFromDevice(Device, Image, DataGlobal)) + return Err; + DeviceProfileData.Data.push_back(std::move(Data)); } } - return profdata; + return DeviceProfileData; } void GPUProfGlobals::dump() const { - llvm::outs() << "======= GPU Profile =======\nTarget: " << targetTriple.str() + llvm::outs() << "======= GPU Profile =======\nTarget: " << TargetTriple.str() << "\n"; llvm::outs() << "======== Counters =========\n"; - for (const auto &count : counts) { + for (const auto &Count : Counts) { llvm::outs() << "["; - for (size_t i = 0; i < count.size(); i++) { + for (size_t i = 0; i < Count.size(); i++) { if (i == 0) llvm::outs() << " "; - llvm::outs() << count[i] << " "; + llvm::outs() << Count[i] << " "; } llvm::outs() << "]\n"; } llvm::outs() << "========== Data ===========\n"; - for (const auto &d : data) { + for (const auto &ProfData : Data) { llvm::outs() << "{ "; #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \ - llvm::outs() << d.Name << " "; + llvm::outs() << ProfData.Name << " "; #include "llvm/ProfileData/InstrProfData.inc" llvm::outs() << " }\n"; } llvm::outs() << "======== Functions ========\n"; - InstrProfSymtab symtab; - if (Error Err = symtab.create(StringRef(names))) { + std::string s; + s.reserve(NamesData.size()); + for (uint8_t Name : NamesData) { + s.push_back((char)Name); + } + + InstrProfSymtab Symtab; + if (Error Err = Symtab.create(StringRef(s))) { consumeError(std::move(Err)); } - symtab.dumpNames(llvm::outs()); + Symtab.dumpNames(llvm::outs()); llvm::outs() << "===========================\n"; } >From 0c4bbeb54d189c1461affd37853aa86c3e3ca7d8 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Wed, 17 Jan 2024 19:59:06 -0600 Subject: [PATCH 10/27] Add GPU PGO test --- .../common/src/GlobalHandler.cpp | 2 +- openmp/libomptarget/test/CMakeLists.txt | 6 +++ openmp/libomptarget/test/lit.cfg | 3 ++ openmp/libomptarget/test/lit.site.cfg.in | 2 +- openmp/libomptarget/test/offloading/pgo1.c | 39 +++++++++++++++++++ 5 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 openmp/libomptarget/test/offloading/pgo1.c diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp index 7cb672e7b2683..e5eb653d02228 100644 --- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp @@ -239,7 +239,7 @@ void GPUProfGlobals::dump() const { #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \ llvm::outs() << ProfData.Name << " "; #include "llvm/ProfileData/InstrProfData.inc" - llvm::outs() << " }\n"; + llvm::outs() << "}\n"; } llvm::outs() << "======== Functions ========\n"; diff --git a/openmp/libomptarget/test/CMakeLists.txt b/openmp/libomptarget/test/CMakeLists.txt index a0ba233eaa572..21233f3e252eb 100644 --- a/openmp/libomptarget/test/CMakeLists.txt +++ b/openmp/libomptarget/test/CMakeLists.txt @@ -12,6 +12,12 @@ else() set(LIBOMPTARGET_DEBUG False) endif() +if (OPENMP_STANDALONE_BUILD) + set(LIBOMPTARGET_TEST_GPU_PGO False) +else() + set(LIBOMPTARGET_TEST_GPU_PGO True) +endif() + # Replace the space from user's input with ";" in case that CMake add escape # char into the lit command. string(REPLACE " " ";" LIBOMPTARGET_LIT_ARG_LIST "${LIBOMPTARGET_LIT_ARGS}") diff --git a/openmp/libomptarget/test/lit.cfg b/openmp/libomptarget/test/lit.cfg index 19c5e5c457222..49743f9fed7f2 100644 --- a/openmp/libomptarget/test/lit.cfg +++ b/openmp/libomptarget/test/lit.cfg @@ -104,6 +104,9 @@ config.available_features.add(config.libomptarget_current_target) if config.libomptarget_has_libc: config.available_features.add('libc') +if config.libomptarget_test_pgo: + config.available_features.add('pgo') + # Determine whether the test system supports unified memory. # For CUDA, this is the case with compute capability 70 (Volta) or higher. # For all other targets, we currently assume it is. diff --git a/openmp/libomptarget/test/lit.site.cfg.in b/openmp/libomptarget/test/lit.site.cfg.in index 2d63811883872..494d1636af304 100644 --- a/openmp/libomptarget/test/lit.site.cfg.in +++ b/openmp/libomptarget/test/lit.site.cfg.in @@ -25,6 +25,6 @@ config.libomptarget_not = "@OPENMP_NOT_EXECUTABLE@" config.libomptarget_debug = @LIBOMPTARGET_DEBUG@ config.has_libomptarget_ompt = @LIBOMPTARGET_OMPT_SUPPORT@ config.libomptarget_has_libc = @LIBOMPTARGET_GPU_LIBC_SUPPORT@ - +config.libomptarget_test_pgo = @LIBOMPTARGET_TEST_GPU_PGO@ # Let the main config do the real work. lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg") diff --git a/openmp/libomptarget/test/offloading/pgo1.c b/openmp/libomptarget/test/offloading/pgo1.c new file mode 100644 index 0000000000000..ca8a6f502a06a --- /dev/null +++ b/openmp/libomptarget/test/offloading/pgo1.c @@ -0,0 +1,39 @@ +// RUN: %libomptarget-compile-generic -fprofile-instr-generate -Xclang "-fprofile-instrument=clang" +// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic + +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO +// UNSUPPORTED: aarch64-unknown-linux-gnu +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +// REQUIRES: pgo + +#ifdef _OPENMP +#include <omp.h> +#endif + +int test1(int a) { return a / 2; } +int test2(int a) { return a * 2; } + +int main() { + int m = 2; +#pragma omp target + for (int i = 0; i < 10; i++) { + m = test1(m); + for (int j = 0; j < 2; j++) { + m = test2(m); + } + } +} + +// CHECK: ======== Counters ========= +// CHECK-NEXT: [ 0 11 20 ] +// CHECK-NEXT: [ 10 ] +// CHECK-NEXT: [ 20 ] +// CHECK-NEXT: ========== Data =========== +// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } +// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } +// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } +// CHECK-NEXT: ======== Functions ======== +// CHECK-NEXT: pgo1.c:__omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}} +// CHECK-NEXT: test1 +// CHECK-NEXT: test2 >From c7ae2a74daa93b05058fcc9bba64e0734359362c Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Wed, 17 Jan 2024 23:12:27 -0600 Subject: [PATCH 11/27] Fix PGO test formatting --- openmp/libomptarget/test/offloading/pgo1.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/openmp/libomptarget/test/offloading/pgo1.c b/openmp/libomptarget/test/offloading/pgo1.c index ca8a6f502a06a..389be19b670d7 100644 --- a/openmp/libomptarget/test/offloading/pgo1.c +++ b/openmp/libomptarget/test/offloading/pgo1.c @@ -1,4 +1,5 @@ -// RUN: %libomptarget-compile-generic -fprofile-instr-generate -Xclang "-fprofile-instrument=clang" +// RUN: %libomptarget-compile-generic -fprofile-instr-generate \ +// RUN: -Xclang "-fprofile-instrument=clang" // RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic // UNSUPPORTED: x86_64-pc-linux-gnu @@ -30,9 +31,18 @@ int main() { // CHECK-NEXT: [ 10 ] // CHECK-NEXT: [ 20 ] // CHECK-NEXT: ========== Data =========== -// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } -// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } -// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } +// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} +// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} +// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} +// CHECK-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } +// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} +// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} +// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} +// CHECK-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } +// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} +// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} +// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} +// CHECK-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } // CHECK-NEXT: ======== Functions ======== // CHECK-NEXT: pgo1.c:__omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}} // CHECK-NEXT: test1 >From 8bb22072914bbb830e2788d117aedd0e0bab66ff Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Thu, 18 Jan 2024 23:15:55 -0600 Subject: [PATCH 12/27] Refactor visibility logic --- llvm/lib/ProfileData/InstrProf.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 511571a3eed9b..708ea63fd95e0 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -422,6 +422,16 @@ bool isGPUProfTarget(const Module &M) { return Triple.isAMDGPU() || Triple.isNVPTX(); } +void setPGOFuncVisibility(Module &M, GlobalVariable *FuncNameVar) { + // If the target is a GPU, make the symbol protected so it can + // be read from the host device + if (isGPUProfTarget(M)) + FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility); + // Hide the symbol so that we correctly get a copy for each executable. + else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage())) + FuncNameVar->setVisibility(GlobalValue::HiddenVisibility); +} + GlobalVariable *createPGOFuncNameVar(Module &M, GlobalValue::LinkageTypes Linkage, StringRef PGOFuncName) { @@ -445,14 +455,7 @@ GlobalVariable *createPGOFuncNameVar(Module &M, new GlobalVariable(M, Value->getType(), true, Linkage, Value, getPGOFuncNameVarName(PGOFuncName, Linkage)); - // If the target is a GPU, make the symbol protected so it can - // be read from the host device - if (isGPUProfTarget(M)) - FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility); - // Hide the symbol so that we correctly get a copy for each executable. - else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage())) - FuncNameVar->setVisibility(GlobalValue::HiddenVisibility); - + setPGOFuncVisibility(M, FuncNameVar); return FuncNameVar; } >From 9f13943f64cb16162e44902d54de53a9b1229179 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Tue, 23 Jan 2024 18:33:58 -0600 Subject: [PATCH 13/27] Add LLVM instrumentation support This PR formerly only supported -fprofile-instrument=clang. This commit adds support for -fprofile-instrument=llvm --- .../Instrumentation/PGOInstrumentation.cpp | 12 +++- openmp/libomptarget/test/offloading/pgo1.c | 72 +++++++++++++------ 2 files changed, 59 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index c20fc942eaf0d..bbc8da78fd7ba 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -862,6 +862,10 @@ static void instrumentOneFunc( auto Name = FuncInfo.FuncNameVar; auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()), FuncInfo.FunctionHash); + // Make sure that pointer to global is passed in with zero addrspace + // This is relevant during GPU profiling + auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( + Name, llvm::PointerType::getUnqual(M->getContext())); if (PGOFunctionEntryCoverage) { auto &EntryBB = F.getEntryBlock(); IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); @@ -869,7 +873,7 @@ static void instrumentOneFunc( // i32 <index>) Builder.CreateCall( Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover), - {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)}); + {NormalizedPtr, CFGHash, Builder.getInt32(1), Builder.getInt32(0)}); return; } @@ -887,7 +891,8 @@ static void instrumentOneFunc( // i32 <index>) Builder.CreateCall( Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp), - {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)}); + {NormalizedPtr, CFGHash, Builder.getInt32(NumCounters), + Builder.getInt32(I)}); I += PGOBlockCoverage ? 8 : 1; } @@ -901,7 +906,8 @@ static void instrumentOneFunc( Intrinsic::getDeclaration(M, PGOBlockCoverage ? Intrinsic::instrprof_cover : Intrinsic::instrprof_increment), - {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)}); + {NormalizedPtr, CFGHash, Builder.getInt32(NumCounters), + Builder.getInt32(I++)}); } // Now instrument select instructions: diff --git a/openmp/libomptarget/test/offloading/pgo1.c b/openmp/libomptarget/test/offloading/pgo1.c index 389be19b670d7..d95793b508dcf 100644 --- a/openmp/libomptarget/test/offloading/pgo1.c +++ b/openmp/libomptarget/test/offloading/pgo1.c @@ -1,6 +1,11 @@ // RUN: %libomptarget-compile-generic -fprofile-instr-generate \ // RUN: -Xclang "-fprofile-instrument=clang" -// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic +// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic \ +// RUN: --check-prefix="CLANG-PGO" +// RUN: %libomptarget-compile-generic -fprofile-generate \ +// RUN: -Xclang "-fprofile-instrument=llvm" +// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic \ +// RUN: --check-prefix="LLVM-PGO" // UNSUPPORTED: x86_64-pc-linux-gnu // UNSUPPORTED: x86_64-pc-linux-gnu-LTO @@ -26,24 +31,47 @@ int main() { } } -// CHECK: ======== Counters ========= -// CHECK-NEXT: [ 0 11 20 ] -// CHECK-NEXT: [ 10 ] -// CHECK-NEXT: [ 20 ] -// CHECK-NEXT: ========== Data =========== -// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} -// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} -// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} -// CHECK-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } -// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} -// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} -// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} -// CHECK-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } -// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} -// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} -// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} -// CHECK-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } -// CHECK-NEXT: ======== Functions ======== -// CHECK-NEXT: pgo1.c:__omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}} -// CHECK-NEXT: test1 -// CHECK-NEXT: test2 +// CLANG-PGO: ======== Counters ========= +// CLANG-PGO-NEXT: [ 0 11 20 ] +// CLANG-PGO-NEXT: [ 10 ] +// CLANG-PGO-NEXT: [ 20 ] +// CLANG-PGO-NEXT: ========== Data =========== +// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}} +// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} +// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} +// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } +// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}} +// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} +// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} +// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } +// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}} +// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} +// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} +// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } +// CLANG-PGO-NEXT: ======== Functions ======== +// CLANG-PGO-NEXT: pgo1.c: +// CLANG-PGO-SAME: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}} +// CLANG-PGO-NEXT: test1 +// CLANG-PGO-NEXT: test2 + +// LLVM-PGO: ======== Counters ========= +// LLVM-PGO-NEXT: [ 20 ] +// LLVM-PGO-NEXT: [ 10 ] +// LLVM-PGO-NEXT: [ 20 10 1 1 ] +// LLVM-PGO-NEXT: ========== Data =========== +// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}} +// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} +// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} +// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } +// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}} +// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} +// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} +// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } +// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}} +// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} +// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} +// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } +// LLVM-PGO-NEXT: ======== Functions ======== +// LLVM-PGO-NEXT: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}} +// LLVM-PGO-NEXT: test1 +// LLVM-PGO-NEXT: test2 >From 0606f0dd1b32ef9ebe138bbc964b3921e22d95d1 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Wed, 14 Feb 2024 01:46:55 -0600 Subject: [PATCH 14/27] Use explicit addrspace instead of unqual --- clang/lib/CodeGen/CodeGenPGO.cpp | 2 +- llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index e084dda879cbc..4c75a01222d30 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -1103,7 +1103,7 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, // Make sure that pointer to global is passed in with zero addrspace // This is relevant during GPU profiling auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( - FuncNameVar, llvm::PointerType::getUnqual(CGM.getLLVMContext())); + FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0)); llvm::Value *Args[] = {NormalizedPtr, Builder.getInt64(FunctionHash), Builder.getInt32(NumRegionCounters), diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index bbc8da78fd7ba..c63b3e4ecf786 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -865,7 +865,7 @@ static void instrumentOneFunc( // Make sure that pointer to global is passed in with zero addrspace // This is relevant during GPU profiling auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( - Name, llvm::PointerType::getUnqual(M->getContext())); + Name, llvm::PointerType::get(M->getContext(), 0)); if (PGOFunctionEntryCoverage) { auto &EntryBB = F.getEntryBlock(); IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); >From c1f9be321678766525141214aaab74636cafbc2c Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Thu, 15 Feb 2024 19:10:09 -0600 Subject: [PATCH 15/27] Remove redundant namespaces --- .../Instrumentation/PGOInstrumentation.cpp | 4 +-- .../common/src/GlobalHandler.cpp | 26 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index c63b3e4ecf786..3058e577738fd 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -864,8 +864,8 @@ static void instrumentOneFunc( FuncInfo.FunctionHash); // Make sure that pointer to global is passed in with zero addrspace // This is relevant during GPU profiling - auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( - Name, llvm::PointerType::get(M->getContext(), 0)); + auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast( + Name, PointerType::get(M->getContext(), 0)); if (PGOFunctionEntryCoverage) { auto &EntryBB = F.getEntryBlock(); IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp index e5eb653d02228..ae270c60804d2 100644 --- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp @@ -219,30 +219,30 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device, } void GPUProfGlobals::dump() const { - llvm::outs() << "======= GPU Profile =======\nTarget: " << TargetTriple.str() + outs() << "======= GPU Profile =======\nTarget: " << TargetTriple.str() << "\n"; - llvm::outs() << "======== Counters =========\n"; + outs() << "======== Counters =========\n"; for (const auto &Count : Counts) { - llvm::outs() << "["; + outs() << "["; for (size_t i = 0; i < Count.size(); i++) { if (i == 0) - llvm::outs() << " "; - llvm::outs() << Count[i] << " "; + outs() << " "; + outs() << Count[i] << " "; } - llvm::outs() << "]\n"; + outs() << "]\n"; } - llvm::outs() << "========== Data ===========\n"; + outs() << "========== Data ===========\n"; for (const auto &ProfData : Data) { - llvm::outs() << "{ "; + outs() << "{ "; #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \ - llvm::outs() << ProfData.Name << " "; + outs() << ProfData.Name << " "; #include "llvm/ProfileData/InstrProfData.inc" - llvm::outs() << "}\n"; + outs() << "}\n"; } - llvm::outs() << "======== Functions ========\n"; + outs() << "======== Functions ========\n"; std::string s; s.reserve(NamesData.size()); for (uint8_t Name : NamesData) { @@ -253,6 +253,6 @@ void GPUProfGlobals::dump() const { if (Error Err = Symtab.create(StringRef(s))) { consumeError(std::move(Err)); } - Symtab.dumpNames(llvm::outs()); - llvm::outs() << "===========================\n"; + Symtab.dumpNames(outs()); + outs() << "===========================\n"; } >From 6a3ae407e69e7524f0f808329c534f8352ee1779 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Thu, 15 Feb 2024 19:15:15 -0600 Subject: [PATCH 16/27] Clang format --- .../libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp index ae270c60804d2..1fce244892262 100644 --- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp @@ -220,7 +220,7 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device, void GPUProfGlobals::dump() const { outs() << "======= GPU Profile =======\nTarget: " << TargetTriple.str() - << "\n"; + << "\n"; outs() << "======== Counters =========\n"; for (const auto &Count : Counts) { >From 6866862d459e3c3fa65fae8ae639ddc3ff735252 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Fri, 16 Feb 2024 13:13:39 -0600 Subject: [PATCH 17/27] Use getAddrSpaceCast Replace getPointerBitCastOrAddrSpaceCast with getAddrSpaceCast and allow no-op getAddrSpaceCast calls when types are identical --- clang/lib/CodeGen/CodeGenPGO.cpp | 2 +- llvm/lib/IR/Constants.cpp | 4 ++++ llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 8f52018445d2b..baceeba8380dd 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -1099,7 +1099,7 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, // Make sure that pointer to global is passed in with zero addrspace // This is relevant during GPU profiling - auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( + auto *NormalizedPtr = llvm::ConstantExpr::getAddrSpaceCast( FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0)); llvm::Value *Args[] = {NormalizedPtr, Builder.getInt64(FunctionHash), diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index a38b912164b13..2d89c5bbd4a4c 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -2067,6 +2067,10 @@ Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy, Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy, bool OnlyIfReduced) { + // Skip cast if types are identical + if (C->getType() == DstTy) + return C; + assert(CastInst::castIsValid(Instruction::AddrSpaceCast, C, DstTy) && "Invalid constantexpr addrspacecast!"); return getFoldedCast(Instruction::AddrSpaceCast, C, DstTy, OnlyIfReduced); diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 3058e577738fd..c0be71aa4cc00 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -864,7 +864,7 @@ static void instrumentOneFunc( FuncInfo.FunctionHash); // Make sure that pointer to global is passed in with zero addrspace // This is relevant during GPU profiling - auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast( + auto *NormalizedPtr = ConstantExpr::getAddrSpaceCast( Name, PointerType::get(M->getContext(), 0)); if (PGOFunctionEntryCoverage) { auto &EntryBB = F.getEntryBlock(); >From 62a5ee1c75545571f81d9edd22e19e9ef7cff69f Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Tue, 27 Feb 2024 14:53:51 -0600 Subject: [PATCH 18/27] Revert "Use getAddrSpaceCast" This reverts commit 6866862d459e3c3fa65fae8ae639ddc3ff735252. --- clang/lib/CodeGen/CodeGenPGO.cpp | 2 +- llvm/lib/IR/Constants.cpp | 4 ---- llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index baceeba8380dd..8f52018445d2b 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -1099,7 +1099,7 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, // Make sure that pointer to global is passed in with zero addrspace // This is relevant during GPU profiling - auto *NormalizedPtr = llvm::ConstantExpr::getAddrSpaceCast( + auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0)); llvm::Value *Args[] = {NormalizedPtr, Builder.getInt64(FunctionHash), diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index 2d89c5bbd4a4c..a38b912164b13 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -2067,10 +2067,6 @@ Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy, Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy, bool OnlyIfReduced) { - // Skip cast if types are identical - if (C->getType() == DstTy) - return C; - assert(CastInst::castIsValid(Instruction::AddrSpaceCast, C, DstTy) && "Invalid constantexpr addrspacecast!"); return getFoldedCast(Instruction::AddrSpaceCast, C, DstTy, OnlyIfReduced); diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index c0be71aa4cc00..3058e577738fd 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -864,7 +864,7 @@ static void instrumentOneFunc( FuncInfo.FunctionHash); // Make sure that pointer to global is passed in with zero addrspace // This is relevant during GPU profiling - auto *NormalizedPtr = ConstantExpr::getAddrSpaceCast( + auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast( Name, PointerType::get(M->getContext(), 0)); if (PGOFunctionEntryCoverage) { auto &EntryBB = F.getEntryBlock(); >From 052394fa28c923d130bf73a07b965a9751467302 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Tue, 27 Feb 2024 15:34:34 -0600 Subject: [PATCH 19/27] Revert "Use getAddrSpaceCast" This reverts commit 6866862d459e3c3fa65fae8ae639ddc3ff735252. --- clang/lib/CodeGen/CodeGenPGO.cpp | 2 +- llvm/lib/IR/Constants.cpp | 4 ---- llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index baceeba8380dd..8f52018445d2b 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -1099,7 +1099,7 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, // Make sure that pointer to global is passed in with zero addrspace // This is relevant during GPU profiling - auto *NormalizedPtr = llvm::ConstantExpr::getAddrSpaceCast( + auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0)); llvm::Value *Args[] = {NormalizedPtr, Builder.getInt64(FunctionHash), diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index 2d89c5bbd4a4c..a38b912164b13 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -2067,10 +2067,6 @@ Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy, Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy, bool OnlyIfReduced) { - // Skip cast if types are identical - if (C->getType() == DstTy) - return C; - assert(CastInst::castIsValid(Instruction::AddrSpaceCast, C, DstTy) && "Invalid constantexpr addrspacecast!"); return getFoldedCast(Instruction::AddrSpaceCast, C, DstTy, OnlyIfReduced); diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index c0be71aa4cc00..3058e577738fd 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -864,7 +864,7 @@ static void instrumentOneFunc( FuncInfo.FunctionHash); // Make sure that pointer to global is passed in with zero addrspace // This is relevant during GPU profiling - auto *NormalizedPtr = ConstantExpr::getAddrSpaceCast( + auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast( Name, PointerType::get(M->getContext(), 0)); if (PGOFunctionEntryCoverage) { auto &EntryBB = F.getEntryBlock(); >From 612d5a5f6966a77e82e5591f5aea475fbf886e55 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Fri, 1 Mar 2024 02:04:00 -0600 Subject: [PATCH 20/27] Write PGO TODO: Fix tests --- compiler-rt/lib/profile/InstrProfiling.h | 11 ++ compiler-rt/lib/profile/InstrProfilingFile.c | 148 +++++++++++++++--- .../common/include/GlobalHandler.h | 14 +- .../common/src/GlobalHandler.cpp | 57 +++++-- .../common/src/PluginInterface.cpp | 6 +- 5 files changed, 200 insertions(+), 36 deletions(-) diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h index 0123908336918..937acbd417de4 100644 --- a/compiler-rt/lib/profile/InstrProfiling.h +++ b/compiler-rt/lib/profile/InstrProfiling.h @@ -275,6 +275,17 @@ void __llvm_profile_get_padding_sizes_for_counters( */ void __llvm_profile_set_dumped(); +/*! + * \brief Write custom target-specific profiling data to a seperate file. + * Used by libomptarget for GPU PGO. + */ +int __llvm_write_custom_profile(const char *Target, + const __llvm_profile_data *DataBegin, + const __llvm_profile_data *DataEnd, + const char *CountersBegin, + const char *CountersEnd, const char *NamesBegin, + const char *NamesEnd); + /*! * This variable is defined in InstrProfilingRuntime.cpp as a hidden * symbol. Its main purpose is to enable profile runtime user to diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c index f3b457d786e6b..4fc401bb9bebf 100644 --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -502,27 +502,15 @@ static FILE *getFileObject(const char *OutputName) { return fopen(OutputName, "ab"); } -/* Write profile data to file \c OutputName. */ -static int writeFile(const char *OutputName) { - int RetVal; - FILE *OutputFile; - - int MergeDone = 0; +/* Get file object and merge if applicable */ +static FILE *getMergeFileObject(const char *OutputName, int *MergeDone) { VPMergeHook = &lprofMergeValueProfData; if (doMerging()) - OutputFile = openFileForMerging(OutputName, &MergeDone); - else - OutputFile = getFileObject(OutputName); - - if (!OutputFile) - return -1; - - FreeHook = &free; - setupIOBuffer(); - ProfDataWriter fileWriter; - initFileWriter(&fileWriter, OutputFile); - RetVal = lprofWriteData(&fileWriter, lprofGetVPDataReader(), MergeDone); + return openFileForMerging(OutputName, MergeDone); + return getFileObject(OutputName); +} +static void closeFileObject(FILE *OutputFile) { if (OutputFile == getProfileFile()) { fflush(OutputFile); if (doMerging() && !__llvm_profile_is_continuous_mode_enabled()) { @@ -531,7 +519,23 @@ static int writeFile(const char *OutputName) { } else { fclose(OutputFile); } +} + +/* Write profile data to file \c OutputName. */ +static int writeFile(const char *OutputName) { + int RetVal, MergeDone = 0; + FILE *OutputFile = getMergeFileObject(OutputName, &MergeDone); + + if (!OutputFile) + return -1; + + FreeHook = &free; + setupIOBuffer(); + ProfDataWriter fileWriter; + initFileWriter(&fileWriter, OutputFile); + RetVal = lprofWriteData(&fileWriter, lprofGetVPDataReader(), MergeDone); + closeFileObject(OutputFile); return RetVal; } @@ -558,10 +562,16 @@ static int writeOrderFile(const char *OutputName) { #define LPROF_INIT_ONCE_ENV "__LLVM_PROFILE_RT_INIT_ONCE" +static void forceTruncateFile(const char *Filename) { + FILE *File = fopen(Filename, "w"); + if (!File) + return; + fclose(File); +} + static void truncateCurrentFile(void) { const char *Filename; char *FilenameBuf; - FILE *File; int Length; Length = getCurFilenameLength(); @@ -591,10 +601,7 @@ static void truncateCurrentFile(void) { return; /* Truncate the file. Later we'll reopen and append. */ - File = fopen(Filename, "w"); - if (!File) - return; - fclose(File); + forceTruncateFile(Filename); } /* Write a partial profile to \p Filename, which is required to be backed by @@ -1271,4 +1278,99 @@ COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File, return 0; } +int __llvm_write_custom_profile(const char *Target, + const __llvm_profile_data *DataBegin, + const __llvm_profile_data *DataEnd, + const char *CountersBegin, + const char *CountersEnd, const char *NamesBegin, + const char *NamesEnd) { + int ReturnValue = 0, FilenameLength, TargetLength, MergeDone; + char *FilenameBuf, *TargetFilename; + const char *Filename; + + /* Save old profile data */ + FILE *oldFile = getProfileFile(); + + // Temporarily suspend getting SIGKILL when the parent exits. + int PDeathSig = lprofSuspendSigKill(); + + if (lprofProfileDumped() || __llvm_profile_is_continuous_mode_enabled()) { + PROF_NOTE("Profile data not written to file: %s.\n", "already written"); + if (PDeathSig == 1) + lprofRestoreSigKill(); + return 0; + } + + /* Get current filename */ + FilenameLength = getCurFilenameLength(); + FilenameBuf = (char *)COMPILER_RT_ALLOCA(FilenameLength + 1); + Filename = getCurFilename(FilenameBuf, 0); + + /* Check the filename. */ + if (!Filename) { + PROF_ERR("Failed to write file : %s\n", "Filename not set"); + if (PDeathSig == 1) + lprofRestoreSigKill(); + return -1; + } + + /* Allocate new space for our target-specific PGO filename */ + TargetLength = strlen(Target); + TargetFilename = + (char *)COMPILER_RT_ALLOCA(FilenameLength + TargetLength + 2); + + /* Prepend "TARGET." to current filename */ + memcpy(TargetFilename, Target, TargetLength); + TargetFilename[TargetLength] = '.'; + memcpy(TargetFilename, Target, TargetLength); + memcpy(TargetFilename + 1 + TargetLength, Filename, FilenameLength); + TargetFilename[FilenameLength + 1 + TargetLength] = 0; + + /* Check if there is llvm/runtime version mismatch. */ + if (GET_VERSION(__llvm_profile_get_version()) != INSTR_PROF_RAW_VERSION) { + PROF_ERR("Runtime and instrumentation version mismatch : " + "expected %d, but get %d\n", + INSTR_PROF_RAW_VERSION, + (int)GET_VERSION(__llvm_profile_get_version())); + if (PDeathSig == 1) + lprofRestoreSigKill(); + return -1; + } + + /* Clean old target file */ + forceTruncateFile(TargetFilename); + + /* Open target-specific PGO file */ + MergeDone = 0; + FILE *OutputFile = getMergeFileObject(TargetFilename, &MergeDone); + + if (!OutputFile) { + PROF_ERR("Failed to open file : %s\n", TargetFilename); + if (PDeathSig == 1) + lprofRestoreSigKill(); + return -1; + } + + FreeHook = &free; + setupIOBuffer(); + ProfDataWriter fileWriter; + initFileWriter(&fileWriter, OutputFile); + + /* Write custom data to the file */ + ReturnValue = lprofWriteDataImpl( + &fileWriter, DataBegin, DataEnd, CountersBegin, CountersEnd, NULL, NULL, + lprofGetVPDataReader(), NamesBegin, NamesEnd, MergeDone); + + closeFileObject(OutputFile); + + // Restore SIGKILL. + if (PDeathSig == 1) + lprofRestoreSigKill(); + + /* Restore old profiling file */ + setProfileFile(oldFile); + + return ReturnValue; +} + #endif diff --git a/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h b/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h index f5a15ca11bfcd..af0cd4dcdf5dc 100644 --- a/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h +++ b/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h @@ -63,14 +63,24 @@ struct __llvm_profile_data { #include "llvm/ProfileData/InstrProfData.inc" }; +extern "C" { +extern int __attribute__((weak)) +__llvm_write_custom_profile(const char *Target, + const __llvm_profile_data *DataBegin, + const __llvm_profile_data *DataEnd, + const char *CountersBegin, const char *CountersEnd, + const char *NamesBegin, const char *NamesEnd); +} + /// PGO profiling data extracted from a GPU device struct GPUProfGlobals { - SmallVector<uint8_t> NamesData; - SmallVector<SmallVector<int64_t>> Counts; + SmallVector<int64_t> Counts; SmallVector<__llvm_profile_data> Data; + SmallVector<uint8_t> NamesData; Triple TargetTriple; void dump() const; + Error write() const; }; /// Subclass of GlobalTy that holds the memory for a global of \p Ty. diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp index 1fce244892262..2f16b6e3c139e 100644 --- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp @@ -205,7 +205,7 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device, GlobalTy CountGlobal(NameOrErr->str(), Sym.getSize(), Counts.data()); if (auto Err = readGlobalFromDevice(Device, Image, CountGlobal)) return Err; - DeviceProfileData.Counts.push_back(std::move(Counts)); + DeviceProfileData.Counts.append(std::move(Counts)); } else if (NameOrErr->starts_with(getInstrProfDataVarPrefix())) { // Read profiling data for this global variable __llvm_profile_data Data{}; @@ -223,15 +223,14 @@ void GPUProfGlobals::dump() const { << "\n"; outs() << "======== Counters =========\n"; - for (const auto &Count : Counts) { - outs() << "["; - for (size_t i = 0; i < Count.size(); i++) { - if (i == 0) - outs() << " "; - outs() << Count[i] << " "; - } - outs() << "]\n"; + for (size_t i = 0; i < Counts.size(); i++) { + if (i > 0 && i % 10 == 0) + outs() << "\n"; + else if (i != 0) + outs() << " "; + outs() << Counts[i]; } + outs() << "\n"; outs() << "========== Data ===========\n"; for (const auto &ProfData : Data) { @@ -256,3 +255,43 @@ void GPUProfGlobals::dump() const { Symtab.dumpNames(outs()); outs() << "===========================\n"; } + +Error GPUProfGlobals::write() const { + if (!__llvm_write_custom_profile) + return Plugin::error("Could not find symbol __llvm_write_custom_profile. " + "The compiler-rt profiling library must be linked for " + "GPU PGO to work."); + + size_t DataSize = Data.size() * sizeof(__llvm_profile_data), + CountsSize = Counts.size() * sizeof(int64_t); + __llvm_profile_data *DataBegin, *DataEnd; + char *CountersBegin, *CountersEnd, *NamesBegin, *NamesEnd; + + // Initialize array of contiguous data. We need to make sure each section is + // contiguous so that the PGO library can compute deltas properly + SmallVector<uint8_t> ContiguousData(NamesData.size() + DataSize + CountsSize); + + // Compute region pointers + DataBegin = (__llvm_profile_data *)(ContiguousData.data() + CountsSize); + DataEnd = + (__llvm_profile_data *)(ContiguousData.data() + CountsSize + DataSize); + CountersBegin = (char *)ContiguousData.data(); + CountersEnd = (char *)(ContiguousData.data() + CountsSize); + NamesBegin = (char *)(ContiguousData.data() + CountsSize + DataSize); + NamesEnd = (char *)(ContiguousData.data() + CountsSize + DataSize + + NamesData.size()); + + // Copy data to contiguous buffer + memcpy(DataBegin, Data.data(), DataSize); + memcpy(CountersBegin, Counts.data(), CountsSize); + memcpy(NamesBegin, NamesData.data(), NamesData.size()); + + // Invoke compiler-rt entrypoint + int result = __llvm_write_custom_profile(TargetTriple.str().c_str(), + DataBegin, DataEnd, CountersBegin, + CountersEnd, NamesBegin, NamesEnd); + if (result != 0) + return Plugin::error("Error writing GPU PGO data to file"); + + return Plugin::success(); +} diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp index 1ea93795ce8ce..d5e6b6128152d 100644 --- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp @@ -837,8 +837,10 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { if (!ProfOrErr) return ProfOrErr.takeError(); - // TODO: write data to profiling file - ProfOrErr->dump(); + // Write data to profiling file + if (auto Err = ProfOrErr->write()) { + consumeError(std::move(Err)); + } } // Delete the memory manager before deinitializing the device. Otherwise, >From b8c916305acf08c0bd2d51b81875be5e8fc59ff3 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Wed, 13 Mar 2024 20:05:32 -0500 Subject: [PATCH 21/27] Fix tests --- .../plugins-nextgen/common/src/PluginInterface.cpp | 3 +++ openmp/libomptarget/test/offloading/pgo1.c | 8 ++------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp index d5e6b6128152d..2359ad28a25b0 100644 --- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp @@ -837,6 +837,9 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { if (!ProfOrErr) return ProfOrErr.takeError(); + // Dump out profdata + ProfOrErr->dump(); + // Write data to profiling file if (auto Err = ProfOrErr->write()) { consumeError(std::move(Err)); diff --git a/openmp/libomptarget/test/offloading/pgo1.c b/openmp/libomptarget/test/offloading/pgo1.c index d95793b508dcf..79e93d0f10827 100644 --- a/openmp/libomptarget/test/offloading/pgo1.c +++ b/openmp/libomptarget/test/offloading/pgo1.c @@ -32,9 +32,7 @@ int main() { } // CLANG-PGO: ======== Counters ========= -// CLANG-PGO-NEXT: [ 0 11 20 ] -// CLANG-PGO-NEXT: [ 10 ] -// CLANG-PGO-NEXT: [ 20 ] +// CLANG-PGO-NEXT: 0 11 20 10 20 // CLANG-PGO-NEXT: ========== Data =========== // CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}} // CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} @@ -55,9 +53,7 @@ int main() { // CLANG-PGO-NEXT: test2 // LLVM-PGO: ======== Counters ========= -// LLVM-PGO-NEXT: [ 20 ] -// LLVM-PGO-NEXT: [ 10 ] -// LLVM-PGO-NEXT: [ 20 10 1 1 ] +// LLVM-PGO-NEXT: 20 10 20 10 1 1 // LLVM-PGO-NEXT: ========== Data =========== // LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}} // LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} >From 7770b37a5a4c40bd45887f762bd7f1e652bc0ed2 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Tue, 7 May 2024 16:31:48 -0500 Subject: [PATCH 22/27] Fix params --- compiler-rt/lib/profile/InstrProfilingFile.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c index 466bfe480543b..bc1d40a37a5ad 100644 --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -1360,9 +1360,10 @@ int __llvm_write_custom_profile(const char *Target, initFileWriter(&fileWriter, OutputFile); /* Write custom data to the file */ - ReturnValue = lprofWriteDataImpl( - &fileWriter, DataBegin, DataEnd, CountersBegin, CountersEnd, NULL, NULL, - lprofGetVPDataReader(), NamesBegin, NamesEnd, MergeDone); + ReturnValue = + lprofWriteDataImpl(&fileWriter, DataBegin, DataEnd, CountersBegin, + CountersEnd, NULL, NULL, lprofGetVPDataReader(), NULL, + NULL, NULL, NULL, NamesBegin, NamesEnd, MergeDone); closeFileObject(OutputFile); >From aa895a1788969a0d27692057a1457074e9772c78 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Mon, 18 Mar 2024 21:31:32 -0500 Subject: [PATCH 23/27] Fix elf obj file --- offload/plugins-nextgen/common/src/GlobalHandler.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/offload/plugins-nextgen/common/src/GlobalHandler.cpp b/offload/plugins-nextgen/common/src/GlobalHandler.cpp index 80cdcaff75528..7717e19a5b677 100644 --- a/offload/plugins-nextgen/common/src/GlobalHandler.cpp +++ b/offload/plugins-nextgen/common/src/GlobalHandler.cpp @@ -177,16 +177,19 @@ Expected<GPUProfGlobals> GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device, DeviceImageTy &Image) { GPUProfGlobals DeviceProfileData; - auto ELFObj = getELFObjectFile(Image); - if (!ELFObj) - return ELFObj.takeError(); + auto ObjFile = getELFObjectFile(Image); + if (!ObjFile) + return ObjFile.takeError(); + + std::unique_ptr<ELFObjectFileBase> ELFObj( + static_cast<ELFObjectFileBase *>(ObjFile->release())); DeviceProfileData.TargetTriple = ELFObj->makeTriple(); // Iterate through elf symbols for (auto &Sym : ELFObj->symbols()) { auto NameOrErr = Sym.getName(); if (!NameOrErr) - return ELFObj.takeError(); + return NameOrErr.takeError(); // Check if given current global is a profiling global based // on name >From 2031e49c2b26864f2dab72e629eb6cbe34928a7a Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Mon, 6 May 2024 23:13:58 -0500 Subject: [PATCH 24/27] Add more addrspace casts for GPU targets --- .../Transforms/Instrumentation/InstrProfiling.cpp | 11 ++++++++--- .../Instrumentation/PGOInstrumentation.cpp | 13 +++++++++---- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index a6b1e0d488120..dd8c027c4bbf6 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -869,6 +869,8 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { llvm::InstrProfValueKind::IPVK_MemOPSize); CallInst *Call = nullptr; auto *TLI = &GetTLI(*Ind->getFunction()); + auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast( + DataVar, PointerType::getUnqual(M.getContext())); // To support value profiling calls within Windows exception handlers, funclet // information contained within operand bundles needs to be copied over to @@ -877,11 +879,13 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { SmallVector<OperandBundleDef, 1> OpBundles; Ind->getOperandBundlesAsDefs(OpBundles); if (!IsMemOpSize) { - Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)}; + Value *Args[3] = {Ind->getTargetValue(), NormalizedPtr, + Builder.getInt32(Index)}; Call = Builder.CreateCall(getOrInsertValueProfilingCall(M, *TLI), Args, OpBundles); } else { - Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)}; + Value *Args[3] = {Ind->getTargetValue(), NormalizedPtr, + Builder.getInt32(Index)}; Call = Builder.CreateCall( getOrInsertValueProfilingCall(M, *TLI, ValueProfilingCallType::MemOp), Args, OpBundles); @@ -1575,7 +1579,8 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) { getInstrProfSectionName(IPSK_vals, TT.getObjectFormat())); ValuesVar->setAlignment(Align(8)); maybeSetComdat(ValuesVar, Fn, CntsVarName); - ValuesPtrExpr = ValuesVar; + ValuesPtrExpr = ConstantExpr::getPointerBitCastOrAddrSpaceCast( + ValuesVar, PointerType::getUnqual(Fn->getContext())); } uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 4b51396a8baa3..ee1657ba8400e 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1007,12 +1007,15 @@ static void instrumentOneFunc( ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); assert(ToProfile && "value profiling Value is of unexpected type"); + auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast( + Name, PointerType::get(M->getContext(), 0)); + SmallVector<OperandBundleDef, 1> OpBundles; populateEHOperandBundle(Cand, BlockColors, OpBundles); Builder.CreateCall( Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), - {FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash), - ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, + {NormalizedPtr, Builder.getInt64(FuncInfo.FunctionHash), ToProfile, + Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, OpBundles); } } // IPVK_First <= Kind <= IPVK_Last @@ -1685,10 +1688,12 @@ void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { IRBuilder<> Builder(&SI); Type *Int64Ty = Builder.getInt64Ty(); auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); + auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast( + FuncNameVar, PointerType::get(M->getContext(), 0)); Builder.CreateCall( Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), - {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), - Builder.getInt32(*CurCtrIdx), Step}); + {NormalizedPtr, Builder.getInt64(FuncHash), + Builder.getInt32(TotalNumCtrs), Builder.getInt32(*CurCtrIdx), Step}); ++(*CurCtrIdx); } >From be6524bb4f77de0add1e698f68115fd336f32238 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Mon, 13 May 2024 17:41:00 -0500 Subject: [PATCH 25/27] Have test read from profraw instead of dump --- offload/test/lit.cfg | 2 + offload/test/offloading/pgo1.c | 94 ++++++++++++++++------------------ 2 files changed, 46 insertions(+), 50 deletions(-) diff --git a/offload/test/lit.cfg b/offload/test/lit.cfg index 069110dc69a6e..38e6a33b01faf 100644 --- a/offload/test/lit.cfg +++ b/offload/test/lit.cfg @@ -391,6 +391,8 @@ if config.test_fortran_compiler: config.available_features.add('flang') config.substitutions.append(("%flang", config.test_fortran_compiler)) +config.substitutions.append(("%target_triple", config.libomptarget_current_target)) + config.substitutions.append(("%openmp_flags", config.test_openmp_flags)) if config.libomptarget_current_target.startswith('nvptx') and config.cuda_path: config.substitutions.append(("%cuda_flags", "--cuda-path=" + config.cuda_path)) diff --git a/offload/test/offloading/pgo1.c b/offload/test/offloading/pgo1.c index 79e93d0f10827..d22d5340f5b3e 100644 --- a/offload/test/offloading/pgo1.c +++ b/offload/test/offloading/pgo1.c @@ -1,22 +1,21 @@ -// RUN: %libomptarget-compile-generic -fprofile-instr-generate \ -// RUN: -Xclang "-fprofile-instrument=clang" -// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic \ -// RUN: --check-prefix="CLANG-PGO" -// RUN: %libomptarget-compile-generic -fprofile-generate \ -// RUN: -Xclang "-fprofile-instrument=llvm" -// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic \ +// RUN: %libomptarget-compile-generic -Xclang "-fprofile-instrument=llvm" +// RUN: env LLVM_PROFILE_FILE=llvm.profraw %libomptarget-run-generic 2>&1 +// RUN: llvm-profdata show --all-functions --counts \ +// RUN: %target_triple.llvm.profraw | %fcheck-generic \ // RUN: --check-prefix="LLVM-PGO" +// RUN: %libomptarget-compile-generic -Xclang "-fprofile-instrument=clang" +// RUN: env LLVM_PROFILE_FILE=clang.profraw %libomptarget-run-generic 2>&1 +// RUN: llvm-profdata show --all-functions --counts \ +// RUN: %target_triple.clang.profraw | %fcheck-generic \ +// RUN: --check-prefix="CLANG-PGO" + // UNSUPPORTED: x86_64-pc-linux-gnu // UNSUPPORTED: x86_64-pc-linux-gnu-LTO // UNSUPPORTED: aarch64-unknown-linux-gnu // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO // REQUIRES: pgo -#ifdef _OPENMP -#include <omp.h> -#endif - int test1(int a) { return a / 2; } int test2(int a) { return a * 2; } @@ -31,43 +30,38 @@ int main() { } } -// CLANG-PGO: ======== Counters ========= -// CLANG-PGO-NEXT: 0 11 20 10 20 -// CLANG-PGO-NEXT: ========== Data =========== -// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}} -// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} -// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} -// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } -// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}} -// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} -// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} -// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } -// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}} -// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} -// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} -// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } -// CLANG-PGO-NEXT: ======== Functions ======== -// CLANG-PGO-NEXT: pgo1.c: -// CLANG-PGO-SAME: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}} -// CLANG-PGO-NEXT: test1 -// CLANG-PGO-NEXT: test2 +// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: +// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} +// LLVM-PGO: Counters: 4 +// LLVM-PGO: Function count: 20 +// LLVM-PGO: Block counts: [10, 20, 10] + +// LLVM-PGO-LABEL: test1: +// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} +// LLVM-PGO: Counters: 1 +// LLVM-PGO: Function count: 1 +// LLVM-PGO: Block counts: [] + +// LLVM-PGO-LABEL: test2: +// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} +// LLVM-PGO: Counters: 1 +// LLVM-PGO: Function count: 1 +// LLVM-PGO: Block counts: [] + +// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: +// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} +// CLANG-PGO: Counters: 3 +// CLANG-PGO: Function count: 0 +// CLANG-PGO: Block counts: [11, 20] + +// CLANG-PGO-LABEL: test1: +// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} +// CLANG-PGO: Counters: 1 +// CLANG-PGO: Function count: 10 +// CLANG-PGO: Block counts: [] -// LLVM-PGO: ======== Counters ========= -// LLVM-PGO-NEXT: 20 10 20 10 1 1 -// LLVM-PGO-NEXT: ========== Data =========== -// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}} -// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} -// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} -// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } -// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}} -// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} -// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} -// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } -// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}} -// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} -// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} -// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} } -// LLVM-PGO-NEXT: ======== Functions ======== -// LLVM-PGO-NEXT: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}} -// LLVM-PGO-NEXT: test1 -// LLVM-PGO-NEXT: test2 +// CLANG-PGO-LABEL: test2: +// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} +// CLANG-PGO: Counters: 1 +// CLANG-PGO: Function count: 20 +// CLANG-PGO: Block counts: [] >From 2b8eb2935ec21bf0acc5c56f45837b5976560963 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Fri, 24 May 2024 19:59:33 -0500 Subject: [PATCH 26/27] Fix PGO test format --- offload/test/offloading/pgo1.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/offload/test/offloading/pgo1.c b/offload/test/offloading/pgo1.c index d22d5340f5b3e..0e75c684ed926 100644 --- a/offload/test/offloading/pgo1.c +++ b/offload/test/offloading/pgo1.c @@ -33,20 +33,17 @@ int main() { // LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: // LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} // LLVM-PGO: Counters: 4 -// LLVM-PGO: Function count: 20 -// LLVM-PGO: Block counts: [10, 20, 10] +// LLVM-PGO: Block counts: [20, 10, 20, 10] // LLVM-PGO-LABEL: test1: // LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} // LLVM-PGO: Counters: 1 -// LLVM-PGO: Function count: 1 -// LLVM-PGO: Block counts: [] +// LLVM-PGO: Block counts: [1] // LLVM-PGO-LABEL: test2: // LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} // LLVM-PGO: Counters: 1 -// LLVM-PGO: Function count: 1 -// LLVM-PGO: Block counts: [] +// LLVM-PGO: Block counts: [1] // CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: // CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} >From 67f3009173d815295f36e2b37e85add1347e3bf9 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough <ethanluismcdono...@gmail.com> Date: Fri, 24 May 2024 20:45:04 -0500 Subject: [PATCH 27/27] Refactor profile writer --- compiler-rt/lib/profile/InstrProfilingFile.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c index bc1d40a37a5ad..76238214c13aa 100644 --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -1344,8 +1344,7 @@ int __llvm_write_custom_profile(const char *Target, forceTruncateFile(TargetFilename); /* Open target-specific PGO file */ - MergeDone = 0; - FILE *OutputFile = getMergeFileObject(TargetFilename, &MergeDone); + FILE *OutputFile = getFileObject(TargetFilename); if (!OutputFile) { PROF_ERR("Failed to open file : %s\n", TargetFilename); @@ -1356,15 +1355,11 @@ int __llvm_write_custom_profile(const char *Target, FreeHook = &free; setupIOBuffer(); - ProfDataWriter fileWriter; - initFileWriter(&fileWriter, OutputFile); - - /* Write custom data to the file */ - ReturnValue = - lprofWriteDataImpl(&fileWriter, DataBegin, DataEnd, CountersBegin, - CountersEnd, NULL, NULL, lprofGetVPDataReader(), NULL, - NULL, NULL, NULL, NamesBegin, NamesEnd, MergeDone); + /* Write custom data */ + ReturnValue = __llvm_profile_write_buffer_internal( + OutputFile, DataBegin, DataEnd, CountersBegin, CountersEnd, NULL, NULL, + NamesBegin, NamesEnd); closeFileObject(OutputFile); // Restore SIGKILL. _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits