https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/119261
>From f9f30a77f5e7232f968a3063c34338c9dfc7bac5 Mon Sep 17 00:00:00 2001 From: Alex Maclean <amacl...@nvidia.com> Date: Fri, 8 Nov 2024 22:39:34 +0000 Subject: [PATCH 1/3] [NVPTX] Add NVVMUpgradeAnnotations pass to cleanup legacy annotations --- llvm/lib/Target/NVPTX/CMakeLists.txt | 1 + llvm/lib/Target/NVPTX/NVPTX.h | 5 + llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 4 + llvm/lib/Target/NVPTX/NVPTXUtilities.cpp | 9 +- .../Target/NVPTX/NVVMUpgradeAnnotations.cpp | 130 ++++++++++++++++++ .../CodeGen/NVPTX/upgrade-nvvm-annotations.ll | 30 ++++ 6 files changed, 177 insertions(+), 2 deletions(-) create mode 100644 llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp create mode 100644 llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt index 693365161330f5..bb2e4ad48b51d8 100644 --- a/llvm/lib/Target/NVPTX/CMakeLists.txt +++ b/llvm/lib/Target/NVPTX/CMakeLists.txt @@ -39,6 +39,7 @@ set(NVPTXCodeGen_sources NVVMReflect.cpp NVPTXProxyRegErasure.cpp NVPTXCtorDtorLowering.cpp + NVVMUpgradeAnnotations.cpp ) add_llvm_target(NVPTXCodeGen diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h index ca915cd3f3732f..53418148be3615 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.h +++ b/llvm/lib/Target/NVPTX/NVPTX.h @@ -52,6 +52,7 @@ FunctionPass *createNVPTXLowerUnreachablePass(bool TrapUnreachable, bool NoTrapAfterNoreturn); MachineFunctionPass *createNVPTXPeephole(); MachineFunctionPass *createNVPTXProxyRegErasurePass(); +ModulePass *createNVVMUpgradeAnnotationsPass(); struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); @@ -74,6 +75,10 @@ struct NVPTXCopyByValArgsPass : PassInfoMixin<NVPTXCopyByValArgsPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; +struct NVVMUpgradeAnnotationsPass : PassInfoMixin<NVVMUpgradeAnnotationsPass> { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + namespace NVPTX { enum DrvInterface { NVCL, diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index a5c5e9420ee737..b4fd36625adc9c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -254,6 +254,8 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { PB.registerPipelineStartEPCallback( [this](ModulePassManager &PM, OptimizationLevel Level) { + PM.addPass(NVVMUpgradeAnnotationsPass()); + FunctionPassManager FPM; FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion())); // Note: NVVMIntrRangePass was causing numerical discrepancies at one @@ -349,6 +351,8 @@ void NVPTXPassConfig::addIRPasses() { AAR.addAAResult(WrapperPass->getResult()); })); + addPass(createNVVMUpgradeAnnotationsPass()); + // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running // it here does nothing. But since we need it for correctness when lowering // to NVPTX, run it here too, in case whoever built our pass pipeline didn't diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp index 98bffd92a087b6..04e83576cbf958 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -311,11 +311,16 @@ std::optional<unsigned> getMaxNReg(const Function &F) { } bool isKernelFunction(const Function &F) { + if (F.getCallingConv() == CallingConv::PTX_Kernel) + return true; + + if (F.hasFnAttribute("nvvm.kernel")) + return true; + if (const auto X = findOneNVVMAnnotation(&F, "kernel")) return (*X == 1); - // There is no NVVM metadata, check the calling convention - return F.getCallingConv() == CallingConv::PTX_Kernel; + return false; } MaybeAlign getAlign(const Function &F, unsigned Index) { diff --git a/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp b/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp new file mode 100644 index 00000000000000..ca550434835a2c --- /dev/null +++ b/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp @@ -0,0 +1,130 @@ +//===- NVVMUpgradeAnnotations.cpp - Upgrade NVVM Annotations --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass replaces deprecated metadata in nvvm.annotation with a more modern +// IR representation. +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include <cstdint> + +#define DEBUG_TYPE "nvvm-upgrade-annotations" + +using namespace llvm; + +namespace llvm { +void initializeNVVMUpgradeAnnotationsLegacyPassPass(PassRegistry &); +} // namespace llvm + +namespace { + +class NVVMUpgradeAnnotationsLegacyPass : public ModulePass { +public: + static char ID; + NVVMUpgradeAnnotationsLegacyPass() : ModulePass(ID) { + initializeNVVMUpgradeAnnotationsLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + bool runOnModule(Module &M) override; +}; +} // namespace + +char NVVMUpgradeAnnotationsLegacyPass::ID = 0; + +bool static autoUpgradeAnnotation(Function *F, StringRef K, const Metadata *V) { + if (K == "kernel") { + assert(mdconst::extract<ConstantInt>(V)->getZExtValue() == 1); + F->addFnAttr("nvvm.kernel"); + return true; + } + if (K == "align") { + const uint64_t AlignBits = mdconst::extract<ConstantInt>(V)->getZExtValue(); + const unsigned Idx = (AlignBits >> 16); + const Align StackAlign = Align(AlignBits & 0xFFFF); + // TODO: Skip adding the stackalign attribute for returns, for now. + if (!Idx) + return false; + F->addAttributeAtIndex( + Idx, Attribute::getWithStackAlignment(F->getContext(), StackAlign)); + return true; + } + + return false; +} + +// Iterate over nvvm.annotations rewriting them as appropiate. +void static upgradeNVAnnotations(Module &M) { + NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations"); + if (!NamedMD) + return; + + SmallVector<MDNode *, 8> NewNodes; + SmallSet<const MDNode *, 8> SeenNodes; + for (MDNode *MD : NamedMD->operands()) { + if (SeenNodes.contains(MD)) + continue; + SeenNodes.insert(MD); + + Function *F = mdconst::dyn_extract_or_null<Function>(MD->getOperand(0)); + if (!F) + continue; + + assert(MD && "Invalid MDNode for annotation"); + assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands"); + + SmallVector<Metadata *, 8> NewOperands; + // start index = 1, to skip the global variable key + // increment = 2, to skip the value for each property-value pairs + for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) { + MDString *K = cast<MDString>(MD->getOperand(j)); + const MDOperand &V = MD->getOperand(j + 1); + bool Upgraded = autoUpgradeAnnotation(F, K->getString(), V); + if (!Upgraded) + NewOperands.append({K, V}); + } + + if (!NewOperands.empty()) { + NewOperands.insert(NewOperands.begin(), MD->getOperand(0)); + NewNodes.push_back(MDNode::get(M.getContext(), NewOperands)); + } + } + + NamedMD->clearOperands(); + for (MDNode *N : NewNodes) + NamedMD->addOperand(N); +} + +PreservedAnalyses NVVMUpgradeAnnotationsPass::run(Module &M, + ModuleAnalysisManager &AM) { + upgradeNVAnnotations(M); + return PreservedAnalyses::all(); +} + +bool NVVMUpgradeAnnotationsLegacyPass::runOnModule(Module &M) { + upgradeNVAnnotations(M); + return false; +} + +INITIALIZE_PASS(NVVMUpgradeAnnotationsLegacyPass, DEBUG_TYPE, + "NVVMUpgradeAnnotations", false, false) + +ModulePass *llvm::createNVVMUpgradeAnnotationsPass() { + return new NVVMUpgradeAnnotationsLegacyPass(); +} diff --git a/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll b/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll new file mode 100644 index 00000000000000..68dc2353858cb3 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 +; RUN: opt < %s -mtriple=nvptx64-unknown-unknown -O0 -S | FileCheck %s + +define i32 @foo(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @foo( +; CHECK-SAME: i32 alignstack(8) [[A:%.*]], i32 alignstack(16) [[B:%.*]]) { +; CHECK-NEXT: ret i32 0 +; + ret i32 0 +} + +define i32 @bar(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @bar( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret i32 0 +; + ret i32 0 +} + +!nvvm.annotations = !{!0, !1, !2} + +!0 = !{ptr @foo, !"align", i32 u0x00000008, !"align", i32 u0x00010008, !"align", i32 u0x00020010} +!1 = !{null, !"align", i32 u0x00000008, !"align", i32 u0x00010008, !"align", i32 u0x00020008} +!2 = !{ptr @bar, !"kernel", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { "nvvm.kernel" } +;. +; CHECK: [[META0:![0-9]+]] = !{ptr @foo, !"align", i32 8} +;. >From b65b1d8f30b0aadb1152d07eedf39545e1c8fd65 Mon Sep 17 00:00:00 2001 From: Alex Maclean <amacl...@nvidia.com> Date: Mon, 9 Dec 2024 22:43:39 +0000 Subject: [PATCH 2/3] fixups --- clang/lib/CodeGen/Targets/NVPTX.cpp | 36 ++++++++++++------- clang/test/CodeGen/nvptx_attributes.c | 8 ++++- clang/test/CodeGenCUDA/grid-constant.cu | 21 ++++++++--- clang/test/CodeGenCUDA/ptx-kernels.cu | 7 ++-- clang/test/CodeGenCUDA/usual-deallocators.cu | 4 +-- clang/test/CodeGenOpenCL/ptx-calls.cl | 4 +-- clang/test/CodeGenOpenCL/ptx-kernels.cl | 4 +-- clang/test/CodeGenOpenCL/reflect.cl | 8 ++++- .../Target/NVPTX/NVVMUpgradeAnnotations.cpp | 12 ++++--- 9 files changed, 70 insertions(+), 34 deletions(-) diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index 0431d2cc4ddc39..2fddaf8efad10d 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -78,14 +78,12 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the // resulting MDNode to the nvvm.annotations MDNode. - static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, - int Operand, + static void + addNVVMGridConstantMetadata(llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs); static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, - int Operand) { - addNVVMMetadata(GV, Name, Operand, SmallVector<int, 1>(0)); - } + int Operand); private: static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, @@ -259,7 +257,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( if (FD->hasAttr<OpenCLKernelAttr>()) { // OpenCL __kernel functions get kernel metadata // Create !{<func-ref>, metadata !"kernel", i32 1} node - addNVVMMetadata(F, "kernel", 1); + F->addFnAttr("nvvm.kernel"); // And kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); } @@ -277,21 +275,20 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( // For some reason arg indices are 1-based in NVVM GCI.push_back(IV.index() + 1); // Create !{<func-ref>, metadata !"kernel", i32 1} node - addNVVMMetadata(F, "kernel", 1, GCI); + addNVVMGridConstantMetadata(F, GCI); + F->addFnAttr("nvvm.kernel"); } if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) M.handleCUDALaunchBoundsAttr(F, Attr); } // Attach kernel metadata directly if compiling for NVPTX. - if (FD->hasAttr<NVPTXKernelAttr>()) { - addNVVMMetadata(F, "kernel", 1); - } + if (FD->hasAttr<NVPTXKernelAttr>()) + F->addFnAttr("nvvm.kernel"); } -void NVPTXTargetCodeGenInfo::addNVVMMetadata( - llvm::GlobalValue *GV, StringRef Name, int Operand, - const SmallVectorImpl<int> &GridConstantArgs) { +void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, + StringRef Name, int Operand) { llvm::Module *M = GV->getParent(); llvm::LLVMContext &Ctx = M->getContext(); @@ -302,6 +299,19 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata( llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))}; + // Append metadata to nvvm.annotations + MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); +} + +void NVPTXTargetCodeGenInfo::addNVVMGridConstantMetadata( + llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) { + llvm::Module *M = GV->getParent(); + llvm::LLVMContext &Ctx = M->getContext(); + + // Get "nvvm.annotations" metadata node + llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); + + SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(GV)}; if (!GridConstantArgs.empty()) { SmallVector<llvm::Metadata *, 10> GCM; for (int I : GridConstantArgs) diff --git a/clang/test/CodeGen/nvptx_attributes.c b/clang/test/CodeGen/nvptx_attributes.c index 7dbd9f1321e280..2edca9cd28c815 100644 --- a/clang/test/CodeGen/nvptx_attributes.c +++ b/clang/test/CodeGen/nvptx_attributes.c @@ -10,8 +10,14 @@ // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RET_ADDR]], align 8 // CHECK-NEXT: store i32 1, ptr [[TMP0]], align 4 // CHECK-NEXT: ret void +// __attribute__((nvptx_kernel)) void foo(int *ret) { *ret = 1; } -// CHECK: !0 = !{ptr @foo, !"kernel", i32 1} +//. +// CHECK: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx32,+sm_61" } +//. +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +//. diff --git a/clang/test/CodeGenCUDA/grid-constant.cu b/clang/test/CodeGenCUDA/grid-constant.cu index 8d4be9c9dc7e1e..34c94009dc4d75 100644 --- a/clang/test/CodeGenCUDA/grid-constant.cu +++ b/clang/test/CodeGenCUDA/grid-constant.cu @@ -5,6 +5,15 @@ struct S {}; +// CHECK-LABEL: define dso_local void @_Z6kernel1Sii( +// CHECK-SAME: ptr noundef byval([[STRUCT_S:%.*]]) align 1 [[GC_ARG1:%.*]], i32 noundef [[ARG2:%.*]], i32 noundef [[GC_ARG3:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ARG2_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[GC_ARG3_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[ARG2]], ptr [[ARG2_ADDR]], align 4 +// CHECK-NEXT: store i32 [[GC_ARG3]], ptr [[GC_ARG3_ADDR]], align 4 +// CHECK-NEXT: ret void +// __global__ void kernel(__grid_constant__ const S gc_arg1, int arg2, __grid_constant__ const int gc_arg3) {} // dependent arguments get diagnosed after instantiation. @@ -20,12 +29,16 @@ void foo() { tkernel<const S><<<1,1>>>(1, {}); } //. +// CHECK: attributes #[[ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-features"="+ptx32" "uniform-work-group-size"="true" } //. -// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"kernel", i32 1, !"grid_constant", [[META1:![0-9]+]]} +// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"grid_constant", [[META1:![0-9]+]]} // CHECK: [[META1]] = !{i32 1, i32 3} -// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3:![0-9]+]]} +// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"grid_constant", [[META3:![0-9]+]]} // CHECK: [[META3]] = !{i32 1} -// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3]]} -// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"kernel", i32 1, !"grid_constant", [[META6:![0-9]+]]} +// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"grid_constant", [[META3]]} +// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"grid_constant", [[META6:![0-9]+]]} // CHECK: [[META6]] = !{i32 2} +// CHECK: [[META7:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: [[META8:![0-9]+]] = !{i32 4, !"nvvm-reflect-ftz", i32 0} +// CHECK: [[META9:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} //. diff --git a/clang/test/CodeGenCUDA/ptx-kernels.cu b/clang/test/CodeGenCUDA/ptx-kernels.cu index b7172b77369296..dd64bd822c01e0 100644 --- a/clang/test/CodeGenCUDA/ptx-kernels.cu +++ b/clang/test/CodeGenCUDA/ptx-kernels.cu @@ -10,7 +10,7 @@ extern "C" __device__ void device_function() {} -// CHECK-LABEL: define{{.*}} void @global_function +// CHECK-LABEL: define{{.*}} void @global_function{{.*}} #[[ATTR0:[0-9]+]] extern "C" __global__ void global_function() { // CHECK: call void @device_function @@ -23,7 +23,7 @@ template <typename T> __global__ void templated_kernel(T param) {} namespace { __global__ void anonymous_ns_kernel() {} -// CHECK-DAG: define{{.*}} void @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv( +// CHECK-DAG: define{{.*}} void @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv({{.*}} #[[ATTR0]] } void host_function() { @@ -31,5 +31,4 @@ void host_function() { anonymous_ns_kernel<<<0,0>>>(); } -// CHECK: !{{[0-9]+}} = !{ptr @global_function, !"kernel", i32 1} -// CHECK: !{{[0-9]+}} = !{ptr @_Z16templated_kernelIiEvT_, !"kernel", i32 1} +// CHECK: attributes #[[ATTR0]] = {{{.*}} "nvvm.kernel" {{.*}}} diff --git a/clang/test/CodeGenCUDA/usual-deallocators.cu b/clang/test/CodeGenCUDA/usual-deallocators.cu index b85a706813fc2b..21616b19ae135a 100644 --- a/clang/test/CodeGenCUDA/usual-deallocators.cu +++ b/clang/test/CodeGenCUDA/usual-deallocators.cu @@ -109,7 +109,7 @@ __host__ __device__ void tests_hd(void *t) { } // Make sure that we've generated the kernel used by A::~A. -// DEVICE-LABEL: define void @_Z1fIiEvT_ +// DEVICE: define void @_Z1fIiEvT_{{.*}} #[[ATTR0:[0-9]+]] // Make sure we've picked deallocator for the correct side of compilation. @@ -148,4 +148,4 @@ __host__ __device__ void tests_hd(void *t) { // DEVICE: call void @dev_fn() // HOST: call void @host_fn() -// DEVICE: !0 = !{ptr @_Z1fIiEvT_, !"kernel", i32 1} +// DEVICE: attributes #[[ATTR0]] = {{{.*}} "nvvm.kernel" {{.*}}} diff --git a/clang/test/CodeGenOpenCL/ptx-calls.cl b/clang/test/CodeGenOpenCL/ptx-calls.cl index 0081152ae40e01..c914db87572cee 100644 --- a/clang/test/CodeGenOpenCL/ptx-calls.cl +++ b/clang/test/CodeGenOpenCL/ptx-calls.cl @@ -7,7 +7,7 @@ void device_function() { __kernel void kernel_function() { device_function(); } -// CHECK-LABEL: define{{.*}} spir_kernel void @kernel_function() +// CHECK: define{{.*}} spir_kernel void @kernel_function() #[[ATTR0:[0-9]+]] // CHECK: call void @device_function() -// CHECK: !{{[0-9]+}} = !{ptr @kernel_function, !"kernel", i32 1} +// CHECK: attributes #[[ATTR0]] = {{{.*}} "nvvm.kernel" {{.*}}} diff --git a/clang/test/CodeGenOpenCL/ptx-kernels.cl b/clang/test/CodeGenOpenCL/ptx-kernels.cl index 210e5682ac721c..93f2df6c49e82a 100644 --- a/clang/test/CodeGenOpenCL/ptx-kernels.cl +++ b/clang/test/CodeGenOpenCL/ptx-kernels.cl @@ -6,6 +6,6 @@ void device_function() { __kernel void kernel_function() { } -// CHECK-LABEL: define{{.*}} spir_kernel void @kernel_function() +// CHECK: define{{.*}} spir_kernel void @kernel_function() #[[ATTR0:[0-9]+]] -// CHECK: !{{[0-9]+}} = !{ptr @kernel_function, !"kernel", i32 1} +// CHECK: attributes #[[ATTR0]] = {{{.*}} "nvvm.kernel" {{.*}}} diff --git a/clang/test/CodeGenOpenCL/reflect.cl b/clang/test/CodeGenOpenCL/reflect.cl index 9ae4a5f027d358..88ff74022e6c5f 100644 --- a/clang/test/CodeGenOpenCL/reflect.cl +++ b/clang/test/CodeGenOpenCL/reflect.cl @@ -13,7 +13,7 @@ bool device_function() { } // CHECK-LABEL: define dso_local spir_kernel void @kernel_function( -// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { +// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4 @@ -26,3 +26,9 @@ bool device_function() { __kernel void kernel_function(__global int *i) { *i = device_function(); } +//. +// CHECK: [[META3]] = !{i32 1} +// CHECK: [[META4]] = !{!"none"} +// CHECK: [[META5]] = !{!"int*"} +// CHECK: [[META6]] = !{!""} +//. diff --git a/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp b/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp index ca550434835a2c..27415be5034f96 100644 --- a/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp +++ b/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" @@ -48,10 +49,11 @@ class NVVMUpgradeAnnotationsLegacyPass : public ModulePass { char NVVMUpgradeAnnotationsLegacyPass::ID = 0; -bool static autoUpgradeAnnotation(Function *F, StringRef K, const Metadata *V) { +bool static autoUpgradeAnnotation(GlobalValue *GV, StringRef K, + const Metadata *V) { if (K == "kernel") { assert(mdconst::extract<ConstantInt>(V)->getZExtValue() == 1); - F->addFnAttr("nvvm.kernel"); + cast<Function>(GV)->addFnAttr("nvvm.kernel"); return true; } if (K == "align") { @@ -61,8 +63,8 @@ bool static autoUpgradeAnnotation(Function *F, StringRef K, const Metadata *V) { // TODO: Skip adding the stackalign attribute for returns, for now. if (!Idx) return false; - F->addAttributeAtIndex( - Idx, Attribute::getWithStackAlignment(F->getContext(), StackAlign)); + cast<Function>(GV)->addAttributeAtIndex( + Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign)); return true; } @@ -82,7 +84,7 @@ void static upgradeNVAnnotations(Module &M) { continue; SeenNodes.insert(MD); - Function *F = mdconst::dyn_extract_or_null<Function>(MD->getOperand(0)); + auto *F = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0)); if (!F) continue; >From eb96b4dcee7db603e43707a00a188937171e955b Mon Sep 17 00:00:00 2001 From: Alex Maclean <amacl...@nvidia.com> Date: Mon, 9 Dec 2024 23:45:54 +0000 Subject: [PATCH 3/3] move to auto-upgrade --- llvm/include/llvm/IR/AutoUpgrade.h | 4 + llvm/lib/AsmParser/LLParser.cpp | 1 + llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 2 + llvm/lib/IR/AutoUpgrade.cpp | 63 +++++++++ llvm/lib/Linker/IRMover.cpp | 1 + llvm/lib/Target/NVPTX/CMakeLists.txt | 1 - llvm/lib/Target/NVPTX/NVPTX.h | 5 - llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 4 - .../Target/NVPTX/NVVMUpgradeAnnotations.cpp | 132 ------------------ 9 files changed, 71 insertions(+), 142 deletions(-) delete mode 100644 llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp diff --git a/llvm/include/llvm/IR/AutoUpgrade.h b/llvm/include/llvm/IR/AutoUpgrade.h index 97c3e4d7589d7b..8c093568a1e031 100644 --- a/llvm/include/llvm/IR/AutoUpgrade.h +++ b/llvm/include/llvm/IR/AutoUpgrade.h @@ -61,6 +61,10 @@ namespace llvm { /// module is modified. bool UpgradeModuleFlags(Module &M); + /// Convert legacy nvvm.annotations metadata to appropriate function + /// attributes. + void UpgradeNVVMAnnotations(Module &M); + /// Convert calls to ARC runtime functions to intrinsic calls and upgrade the /// old retain release marker to new module flag format. void UpgradeARCRuntime(Module &M); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 34311499367b41..4e869cfe312e47 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -448,6 +448,7 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) { llvm::UpgradeDebugInfo(*M); UpgradeModuleFlags(*M); + UpgradeNVVMAnnotations(*M); UpgradeSectionAttributes(*M); if (PreserveInputDbgFormat != cl::boolOrDefault::BOU_TRUE) diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 85c6fadeda6cc3..fff196aca9ffc6 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -7142,6 +7142,8 @@ Error BitcodeReader::materializeModule() { UpgradeModuleFlags(*TheModule); + UpgradeNVVMAnnotations(*TheModule); + UpgradeARCRuntime(*TheModule); return Error::success(); diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index e73538da282e99..d774606bac9448 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -5022,6 +5022,69 @@ bool llvm::UpgradeDebugInfo(Module &M) { return Modified; } +bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, + const Metadata *V) { + if (K == "kernel") { + assert(mdconst::extract<ConstantInt>(V)->getZExtValue() == 1); + cast<Function>(GV)->addFnAttr("nvvm.kernel"); + return true; + } + if (K == "align") { + const uint64_t AlignBits = mdconst::extract<ConstantInt>(V)->getZExtValue(); + const unsigned Idx = (AlignBits >> 16); + const Align StackAlign = Align(AlignBits & 0xFFFF); + // TODO: Skip adding the stackalign attribute for returns, for now. + if (!Idx) + return false; + cast<Function>(GV)->addAttributeAtIndex( + Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign)); + return true; + } + + return false; +} + +void llvm::UpgradeNVVMAnnotations(Module &M) { + NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations"); + if (!NamedMD) + return; + + SmallVector<MDNode *, 8> NewNodes; + SmallSet<const MDNode *, 8> SeenNodes; + for (MDNode *MD : NamedMD->operands()) { + if (SeenNodes.contains(MD)) + continue; + SeenNodes.insert(MD); + + auto *F = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0)); + if (!F) + continue; + + assert(MD && "Invalid MDNode for annotation"); + assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands"); + + SmallVector<Metadata *, 8> NewOperands; + // start index = 1, to skip the global variable key + // increment = 2, to skip the value for each property-value pairs + for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) { + MDString *K = cast<MDString>(MD->getOperand(j)); + const MDOperand &V = MD->getOperand(j + 1); + bool Upgraded = upgradeSingleNVVMAnnotation(F, K->getString(), V); + if (!Upgraded) + NewOperands.append({K, V}); + } + + if (!NewOperands.empty()) { + NewOperands.insert(NewOperands.begin(), MD->getOperand(0)); + NewNodes.push_back(MDNode::get(M.getContext(), NewOperands)); + } + } + + NamedMD->clearOperands(); + for (MDNode *N : NewNodes) + NamedMD->addOperand(N); +} + /// This checks for objc retain release marker which should be upgraded. It /// returns true if module is modified. static bool upgradeRetainReleaseMarker(Module &M) { diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp index a0c3f2c5b0baf6..5681c4257a90ad 100644 --- a/llvm/lib/Linker/IRMover.cpp +++ b/llvm/lib/Linker/IRMover.cpp @@ -1247,6 +1247,7 @@ Error IRLinker::linkModuleFlagsMetadata() { // Check for module flag for updates before do anything. UpgradeModuleFlags(*SrcM); + UpgradeNVVMAnnotations(*SrcM); // If the destination module doesn't have module flags yet, then just copy // over the source module's flags. diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt index bb2e4ad48b51d8..693365161330f5 100644 --- a/llvm/lib/Target/NVPTX/CMakeLists.txt +++ b/llvm/lib/Target/NVPTX/CMakeLists.txt @@ -39,7 +39,6 @@ set(NVPTXCodeGen_sources NVVMReflect.cpp NVPTXProxyRegErasure.cpp NVPTXCtorDtorLowering.cpp - NVVMUpgradeAnnotations.cpp ) add_llvm_target(NVPTXCodeGen diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h index 53418148be3615..ca915cd3f3732f 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.h +++ b/llvm/lib/Target/NVPTX/NVPTX.h @@ -52,7 +52,6 @@ FunctionPass *createNVPTXLowerUnreachablePass(bool TrapUnreachable, bool NoTrapAfterNoreturn); MachineFunctionPass *createNVPTXPeephole(); MachineFunctionPass *createNVPTXProxyRegErasurePass(); -ModulePass *createNVVMUpgradeAnnotationsPass(); struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); @@ -75,10 +74,6 @@ struct NVPTXCopyByValArgsPass : PassInfoMixin<NVPTXCopyByValArgsPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; -struct NVVMUpgradeAnnotationsPass : PassInfoMixin<NVVMUpgradeAnnotationsPass> { - PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); -}; - namespace NVPTX { enum DrvInterface { NVCL, diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index b4fd36625adc9c..a5c5e9420ee737 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -254,8 +254,6 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { PB.registerPipelineStartEPCallback( [this](ModulePassManager &PM, OptimizationLevel Level) { - PM.addPass(NVVMUpgradeAnnotationsPass()); - FunctionPassManager FPM; FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion())); // Note: NVVMIntrRangePass was causing numerical discrepancies at one @@ -351,8 +349,6 @@ void NVPTXPassConfig::addIRPasses() { AAR.addAAResult(WrapperPass->getResult()); })); - addPass(createNVVMUpgradeAnnotationsPass()); - // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running // it here does nothing. But since we need it for correctness when lowering // to NVPTX, run it here too, in case whoever built our pass pipeline didn't diff --git a/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp b/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp deleted file mode 100644 index 27415be5034f96..00000000000000 --- a/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp +++ /dev/null @@ -1,132 +0,0 @@ -//===- NVVMUpgradeAnnotations.cpp - Upgrade NVVM Annotations --------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass replaces deprecated metadata in nvvm.annotation with a more modern -// IR representation. -// -//===----------------------------------------------------------------------===// - -#include "NVPTX.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" -#include <cstdint> - -#define DEBUG_TYPE "nvvm-upgrade-annotations" - -using namespace llvm; - -namespace llvm { -void initializeNVVMUpgradeAnnotationsLegacyPassPass(PassRegistry &); -} // namespace llvm - -namespace { - -class NVVMUpgradeAnnotationsLegacyPass : public ModulePass { -public: - static char ID; - NVVMUpgradeAnnotationsLegacyPass() : ModulePass(ID) { - initializeNVVMUpgradeAnnotationsLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - bool runOnModule(Module &M) override; -}; -} // namespace - -char NVVMUpgradeAnnotationsLegacyPass::ID = 0; - -bool static autoUpgradeAnnotation(GlobalValue *GV, StringRef K, - const Metadata *V) { - if (K == "kernel") { - assert(mdconst::extract<ConstantInt>(V)->getZExtValue() == 1); - cast<Function>(GV)->addFnAttr("nvvm.kernel"); - return true; - } - if (K == "align") { - const uint64_t AlignBits = mdconst::extract<ConstantInt>(V)->getZExtValue(); - const unsigned Idx = (AlignBits >> 16); - const Align StackAlign = Align(AlignBits & 0xFFFF); - // TODO: Skip adding the stackalign attribute for returns, for now. - if (!Idx) - return false; - cast<Function>(GV)->addAttributeAtIndex( - Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign)); - return true; - } - - return false; -} - -// Iterate over nvvm.annotations rewriting them as appropiate. -void static upgradeNVAnnotations(Module &M) { - NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations"); - if (!NamedMD) - return; - - SmallVector<MDNode *, 8> NewNodes; - SmallSet<const MDNode *, 8> SeenNodes; - for (MDNode *MD : NamedMD->operands()) { - if (SeenNodes.contains(MD)) - continue; - SeenNodes.insert(MD); - - auto *F = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0)); - if (!F) - continue; - - assert(MD && "Invalid MDNode for annotation"); - assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands"); - - SmallVector<Metadata *, 8> NewOperands; - // start index = 1, to skip the global variable key - // increment = 2, to skip the value for each property-value pairs - for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) { - MDString *K = cast<MDString>(MD->getOperand(j)); - const MDOperand &V = MD->getOperand(j + 1); - bool Upgraded = autoUpgradeAnnotation(F, K->getString(), V); - if (!Upgraded) - NewOperands.append({K, V}); - } - - if (!NewOperands.empty()) { - NewOperands.insert(NewOperands.begin(), MD->getOperand(0)); - NewNodes.push_back(MDNode::get(M.getContext(), NewOperands)); - } - } - - NamedMD->clearOperands(); - for (MDNode *N : NewNodes) - NamedMD->addOperand(N); -} - -PreservedAnalyses NVVMUpgradeAnnotationsPass::run(Module &M, - ModuleAnalysisManager &AM) { - upgradeNVAnnotations(M); - return PreservedAnalyses::all(); -} - -bool NVVMUpgradeAnnotationsLegacyPass::runOnModule(Module &M) { - upgradeNVAnnotations(M); - return false; -} - -INITIALIZE_PASS(NVVMUpgradeAnnotationsLegacyPass, DEBUG_TYPE, - "NVVMUpgradeAnnotations", false, false) - -ModulePass *llvm::createNVVMUpgradeAnnotationsPass() { - return new NVVMUpgradeAnnotationsLegacyPass(); -} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits