llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Alex MacLean (AlexMaclean) <details> <summary>Changes</summary> Specifying a kernel with the `ptx_kernel` or `amdgpu_kernel` calling convention is a more idiomatic and compile-time performant than using the `nvvm.annoation !"kernel"` metadata. Transition OMPIRBuilder to use calling conventions for PTX kernels and no longer emit `nvvm.annoation`. Update OpenMPOpt to work with kernels specified via calling convention as well as metadata. Update OpenMP tests to use the calling conventions. --- Patch is 345.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/122320.diff 33 Files Affected: - (modified) clang/test/OpenMP/assumes_include_nvptx.cpp (+2-2) - (modified) clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp (+1-1) - (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+3-13) - (modified) llvm/lib/Transforms/IPO/OpenMPOpt.cpp (+38-18) - (modified) llvm/test/Transforms/OpenMP/always_inline_device.ll (+7-10) - (modified) llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll (+1-5) - (modified) llvm/test/Transforms/OpenMP/barrier_removal.ll (+29-117) - (modified) llvm/test/Transforms/OpenMP/bug66687.ll (+4-7) - (modified) llvm/test/Transforms/OpenMP/custom_state_machines.ll (+20-61) - (modified) llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll (+26-83) - (modified) llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll (+2-5) - (modified) llvm/test/Transforms/OpenMP/deduplication_target.ll (+1-3) - (modified) llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll (+3-10) - (modified) llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll (+3-10) - (modified) llvm/test/Transforms/OpenMP/global_constructor.ll (+5-8) - (modified) llvm/test/Transforms/OpenMP/globalization_remarks.ll (+1-3) - (modified) llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll (+2-7) - (modified) llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll (+1-3) - (modified) llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll (+4-13) - (modified) llvm/test/Transforms/OpenMP/nested_parallelism.ll (+4-9) - (modified) llvm/test/Transforms/OpenMP/parallel_level_fold.ll (+3-10) - (modified) llvm/test/Transforms/OpenMP/remove_globalization.ll (+11-15) - (modified) llvm/test/Transforms/OpenMP/replace_globalization.ll (+7-14) - (modified) llvm/test/Transforms/OpenMP/single_threaded_execution.ll (+1-3) - (modified) llvm/test/Transforms/OpenMP/spmdization.ll (+240-1367) - (modified) llvm/test/Transforms/OpenMP/spmdization_assumes.ll (+12-15) - (modified) llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll (-2) - (modified) llvm/test/Transforms/OpenMP/spmdization_guarding.ll (+51-55) - (modified) llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll (+15-21) - (modified) llvm/test/Transforms/OpenMP/spmdization_indirect.ll (+72-89) - (modified) llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll (+14-21) - (modified) llvm/test/Transforms/OpenMP/spmdization_remarks.ll (+2-5) - (modified) llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll (+19-77) ``````````diff diff --git a/clang/test/OpenMP/assumes_include_nvptx.cpp b/clang/test/OpenMP/assumes_include_nvptx.cpp index 4577ea4c9c2b5e..c5040989a0e407 100644 --- a/clang/test/OpenMP/assumes_include_nvptx.cpp +++ b/clang/test/OpenMP/assumes_include_nvptx.cpp @@ -11,11 +11,11 @@ // TODO: Think about teaching the OMPIRBuilder about default attributes as well so the __kmpc* declarations are annotated. -// CHECK: define weak_odr protected void @__omp_offloading_{{.*}}__Z17complex_reductionIfEvv_{{.*}}({{.*}}) [[attr0:#[0-9]]] +// CHECK: define weak_odr protected ptx_kernel void @__omp_offloading_{{.*}}__Z17complex_reductionIfEvv_{{.*}}({{.*}}) [[attr0:#[0-9]]] // CHECK: call i32 @__kmpc_target_init( // CHECK: declare noundef float @_Z3sinf(float noundef) [[attr1:#[0-9]*]] // CHECK: declare void @__kmpc_target_deinit( -// CHECK: define weak_odr protected void @__omp_offloading_{{.*}}__Z17complex_reductionIdEvv_{{.*}}({{.*}}) [[attr0]] +// CHECK: define weak_odr protected ptx_kernel void @__omp_offloading_{{.*}}__Z17complex_reductionIdEvv_{{.*}}({{.*}}) [[attr0]] // CHECK: %call = call noundef double @_Z3sind(double noundef 0.000000e+00) [[attr2:#[0-9]]] // CHECK: declare noundef double @_Z3sind(double noundef) [[attr1]] diff --git a/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp b/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp index d573f1cd193d64..94ace20826db4d 100644 --- a/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp @@ -90,7 +90,7 @@ int foo(int n, double *ptr) { ptr[0]++; } - // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(ptr {{[^,]+}}, ptr noundef [[PTR_IN:%.+]]) + // TCHECK: define weak_odr protected ptx_kernel void @__omp_offloading_{{.+}}(ptr {{[^,]+}}, ptr noundef [[PTR_IN:%.+]]) // TCHECK: [[DYN_PTR_ADDR:%.+]] = alloca ptr, // TCHECK: [[PTR_ADDR:%.+]] = alloca ptr, // TCHECK-NOT: alloca ptr, diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 8dbf2aa7e0a243..487f886f9bdbfd 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -6404,6 +6404,8 @@ void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes( OutlinedFn->setVisibility(GlobalValue::ProtectedVisibility); if (T.isAMDGCN()) OutlinedFn->setCallingConv(CallingConv::AMDGPU_KERNEL); + else if (T.isNVPTX()) + OutlinedFn->setCallingConv(CallingConv::PTX_Kernel); } } @@ -9077,20 +9079,8 @@ void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr, if (!Fn) return; - Module &M = *(Fn->getParent()); - LLVMContext &Ctx = M.getContext(); - - // Get "nvvm.annotations" metadata node. - NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); - - Metadata *MDVals[] = { - ConstantAsMetadata::get(Fn), MDString::get(Ctx, "kernel"), - ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Ctx), 1))}; - // Append metadata to nvvm.annotations. - MD->addOperand(MDNode::get(Ctx, MDVals)); - // Add a function attribute for the kernel. - Fn->addFnAttr(Attribute::get(Ctx, "kernel")); + Fn->addFnAttr("kernel"); if (T.isAMDGCN()) Fn->addFnAttr("uniform-work-group-size", "true"); Fn->addFnAttr(Attribute::MustProgress); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 67585e9c80ef4e..f495840c254d59 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -19,6 +19,7 @@ #include "llvm/Transforms/IPO/OpenMPOpt.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/EnumeratedArray.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" @@ -36,6 +37,7 @@ #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/Assumptions.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" @@ -5909,34 +5911,52 @@ bool llvm::omp::isOpenMPKernel(Function &Fn) { return Fn.hasFnAttribute("kernel"); } +static bool isKernelCC(Function &F) { + switch (F.getCallingConv()) { + default: + return false; + case CallingConv::PTX_Kernel: + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + return true; + } +} + KernelSet llvm::omp::getDeviceKernels(Module &M) { // TODO: Create a more cross-platform way of determining device kernels. - NamedMDNode *MD = M.getNamedMetadata("nvvm.annotations"); KernelSet Kernels; - if (!MD) - return Kernels; - - for (auto *Op : MD->operands()) { - if (Op->getNumOperands() < 2) - continue; - MDString *KindID = dyn_cast<MDString>(Op->getOperand(1)); - if (!KindID || KindID->getString() != "kernel") - continue; - - Function *KernelFn = - mdconst::dyn_extract_or_null<Function>(Op->getOperand(0)); - if (!KernelFn) - continue; + DenseSet<const Function *> SeenKernels; + auto ProcessKernel = [&](Function &KF) { + if (SeenKernels.contains(&KF)) + return; + SeenKernels.insert(&KF); // We are only interested in OpenMP target regions. Others, such as kernels // generated by CUDA but linked together, are not interesting to this pass. - if (isOpenMPKernel(*KernelFn)) { + if (isOpenMPKernel(KF)) { ++NumOpenMPTargetRegionKernels; - Kernels.insert(KernelFn); + Kernels.insert(&KF); } else ++NumNonOpenMPTargetRegionKernels; - } + }; + + if (NamedMDNode *MD = M.getNamedMetadata("nvvm.annotations")) + for (auto *Op : MD->operands()) { + if (Op->getNumOperands() < 2) + continue; + MDString *KindID = dyn_cast<MDString>(Op->getOperand(1)); + if (!KindID || KindID->getString() != "kernel") + continue; + + if (auto *KernelFn = + mdconst::dyn_extract_or_null<Function>(Op->getOperand(0))) + ProcessKernel(*KernelFn); + } + + for (Function &F : M) + if (isKernelCC(F)) + ProcessKernel(F); return Kernels; } diff --git a/llvm/test/Transforms/OpenMP/always_inline_device.ll b/llvm/test/Transforms/OpenMP/always_inline_device.ll index 6028ff5278037b..9c5b19f7a6c88c 100644 --- a/llvm/test/Transforms/OpenMP/always_inline_device.ll +++ b/llvm/test/Transforms/OpenMP/always_inline_device.ll @@ -17,7 +17,7 @@ ; CHECK: @G = external global i8 ; CHECK: @kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } ;. -define weak void @__omp_offloading_fd02_c0934fc2_foo_l4(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_fd02_c0934fc2_foo_l4(ptr %dyn) #0 { ; CHECK: Function Attrs: norecurse nounwind ; CHECK-LABEL: @__omp_offloading_fd02_c0934fc2_foo_l4( ; CHECK-NEXT: entry: @@ -79,12 +79,10 @@ attributes #1 = { convergent nounwind "frame-pointer"="all" "min-legal-vector-wi attributes #2 = { convergent } !omp_offload.info = !{!0} -!nvvm.annotations = !{!1} !llvm.module.flags = !{!2, !3, !4, !5, !6} !llvm.ident = !{!7} !0 = !{i32 0, i32 64770, i32 -1064087614, !"foo", i32 4, i32 0} -!1 = !{ptr @__omp_offloading_fd02_c0934fc2_foo_l4, !"kernel", i32 1} !2 = !{i32 1, !"wchar_size", i32 4} !3 = !{i32 7, !"openmp", i32 50} !4 = !{i32 7, !"openmp-device", i32 50} @@ -97,11 +95,10 @@ attributes #2 = { convergent } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 -1064087614, !"foo", i32 4, i32 0} -; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_fd02_c0934fc2_foo_l4, !"kernel", i32 1} -; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; CHECK: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK: [[META4:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; CHECK: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; CHECK: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} ;. diff --git a/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll b/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll index 9c0416af359d4d..3f4790ee15ac8d 100644 --- a/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll +++ b/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll @@ -13,10 +13,6 @@ define linkonce_odr hidden i8 @_ZStplIdESt7complexIT_ERKS2_S4_() local_unnamed_a ret i8 undef } -declare void @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148(i64, i64, i64, ptr, ptr, i64, ptr, ptr, ptr, i64) local_unnamed_addr +declare ptx_kernel void @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148(i64, i64, i64, ptr, ptr, i64, ptr, ptr, ptr, i64) local_unnamed_addr declare dso_local fastcc void @__kmpc_for_static_init_8u() unnamed_addr - -!nvvm.annotations = !{!0} - -!0 = !{ptr @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148, !"kernel", i32 1} diff --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll index 47a5d5104aa8bd..5b7544b1a79616 100644 --- a/llvm/test/Transforms/OpenMP/barrier_removal.ll +++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll @@ -28,7 +28,7 @@ declare void @llvm.assume(i1) ; CHECK: @G1 = global i32 42 ; CHECK: @G2 = addrspace(1) global i32 0 ;. -define void @pos_empty_1(i1 %c) "kernel" { +define amdgpu_kernel void @pos_empty_1(i1 %c) "kernel" { ; MODULE-LABEL: define {{[^@]+}}@pos_empty_1 ; MODULE-SAME: (i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] { ; MODULE-NEXT: ret void @@ -45,7 +45,7 @@ define void @pos_empty_1(i1 %c) "kernel" { call void @llvm.assume(i1 %c) ret void } -define void @pos_empty_2() "kernel" { +define amdgpu_kernel void @pos_empty_2() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_2 ; CHECK-SAME: () #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: ret void @@ -53,7 +53,7 @@ define void @pos_empty_2() "kernel" { call void @aligned_barrier() ret void } -define void @pos_empty_3() "kernel" { +define amdgpu_kernel void @pos_empty_3() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_3 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: ret void @@ -61,7 +61,7 @@ define void @pos_empty_3() "kernel" { call void @llvm.nvvm.barrier0() ret void } -define void @pos_empty_4() "kernel" { +define amdgpu_kernel void @pos_empty_4() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_4 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: ret void @@ -69,7 +69,7 @@ define void @pos_empty_4() "kernel" { call i32 @llvm.nvvm.barrier0.and(i32 0) ret void } -define void @pos_empty_5() "kernel" { +define amdgpu_kernel void @pos_empty_5() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_5 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: ret void @@ -77,7 +77,7 @@ define void @pos_empty_5() "kernel" { call i32 @llvm.nvvm.barrier0.or(i32 0) ret void } -define void @pos_empty_6() "kernel" { +define amdgpu_kernel void @pos_empty_6() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_6 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: ret void @@ -85,7 +85,7 @@ define void @pos_empty_6() "kernel" { call i32 @llvm.nvvm.barrier0.popc(i32 0) ret void } -define void @pos_empty_7a() "kernel" { +define amdgpu_kernel void @pos_empty_7a() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_7a ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: call void @unknown() @@ -96,7 +96,7 @@ define void @pos_empty_7a() "kernel" { ret void } ; FIXME: We should remove the barrier. -define void @pos_empty_7b() "kernel" { +define amdgpu_kernel void @pos_empty_7b() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_7b ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: call void @unknown() #[[ATTR5:[0-9]+]] @@ -109,7 +109,7 @@ define void @pos_empty_7b() "kernel" { call void @unknown() ret void } -define void @pos_empty_8(i1 %c) "kernel" { +define amdgpu_kernel void @pos_empty_8(i1 %c) "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_8 ; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -126,7 +126,7 @@ t: f: ret void } -define void @neg_empty_8() "kernel" { +define amdgpu_kernel void @neg_empty_8() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@neg_empty_8 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: call void @unknown() @@ -137,7 +137,7 @@ define void @neg_empty_8() "kernel" { call void @llvm.amdgcn.s.barrier() ret void } -define void @neg_empty_9(i1 %c) "kernel" { +define amdgpu_kernel void @neg_empty_9(i1 %c) "kernel" { ; CHECK-LABEL: define {{[^@]+}}@neg_empty_9 ; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -173,7 +173,7 @@ m: ret void } ; FIXME: We should remove the barrier -define void @pos_empty_10() "kernel" { +define amdgpu_kernel void @pos_empty_10() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_10 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: br label [[M:%.*]] @@ -186,7 +186,7 @@ m: call void @llvm.amdgcn.s.barrier() ret void } -define void @pos_empty_11() "kernel" { +define amdgpu_kernel void @pos_empty_11() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_11 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: br label [[M:%.*]] @@ -206,7 +206,7 @@ define void @empty() { ret void } ; FIXME: We should remove the barrier in the end but not the first one. -define void @neg_empty_12(i1 %c) "kernel" { +define amdgpu_kernel void @neg_empty_12(i1 %c) "kernel" { ; MODULE-LABEL: define {{[^@]+}}@neg_empty_12 ; MODULE-SAME: (i1 [[C:%.*]]) #[[ATTR4]] { ; MODULE-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -266,7 +266,7 @@ define void @neg_empty_2() "kernel" { @GC1 = constant i32 42 @GC2 = addrspace(4) global i32 0 @GPtr4 = addrspace(4) global ptr addrspace(4) null -define void @pos_constant_loads() "kernel" { +define amdgpu_kernel void @pos_constant_loads() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_constant_loads ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: [[ARG:%.*]] = load ptr addrspace(4), ptr addrspace(4) @GPtr4, align 8 @@ -296,7 +296,7 @@ define void @pos_constant_loads() "kernel" { @GS = addrspace(3) global i32 0 @GPtr = global ptr null ; TODO: We could remove some of the barriers due to the lack of write effects. -define void @neg_loads() "kernel" { +define amdgpu_kernel void @neg_loads() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@neg_loads ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: [[ARG:%.*]] = load ptr, ptr @GPtr, align 8 @@ -327,7 +327,7 @@ define void @neg_loads() "kernel" { @PG1 = thread_local global i32 42 @PG2 = addrspace(5) global i32 0 @GPtr5 = global ptr addrspace(5) null -define void @pos_priv_mem() "kernel" { +define amdgpu_kernel void @pos_priv_mem() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_priv_mem ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: [[ARG:%.*]] = load ptr addrspace(5), ptr @GPtr5, align 4 @@ -358,7 +358,7 @@ define void @pos_priv_mem() "kernel" { } @G1 = global i32 42 @G2 = addrspace(1) global i32 0 -define void @neg_mem() "kernel" { +define amdgpu_kernel void @neg_mem() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@neg_mem ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: [[ARG:%.*]] = load ptr, ptr @GPtr, align 8 @@ -388,7 +388,7 @@ define void @neg_mem() "kernel" { ret void } -define void @pos_multiple() "kernel" { +define amdgpu_kernel void @pos_multiple() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_multiple ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: ret void @@ -404,7 +404,7 @@ define void @pos_multiple() "kernel" { ret void } -define void @multiple_blocks_kernel_1(i1 %c0, i1 %c1) "kernel" { +define amdgpu_kernel void @multiple_blocks_kernel_1(i1 %c0, i1 %c1) "kernel" { ; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_kernel_1 ; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]] @@ -461,7 +461,7 @@ m: ret void } -define void @multiple_blocks_kernel_2(i1 %c0, i1 %c1, ptr %p) "kernel" { +define amdgpu_kernel void @multiple_blocks_kernel_2(i1 %c0, i1 %c1, ptr %p) "kernel" { ; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_kernel_2 ; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: store i32 4, ptr [[P]], align 4 @@ -727,7 +727,7 @@ define internal void @barrier_then_write_then_barrier0(ptr %p) { call void @aligned_barrier() ret void } -define void @multiple_blocks_functions_kernel_effects_0(i1 %c0, i1 %c1, ptr %p) "kernel" { +define amdgpu_kernel void @multiple_blocks_functions_kernel_effects_0(i1 %c0, i1 %c1, ptr %p) "kernel" { ; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_kernel_effects_0 ; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] { ; MODULE-NEXT: call void @barrier_then_write_then_barrier0(ptr [[P]]) @@ -1040,7 +1040,7 @@ define internal void @callee_barrier() { call void @aligned_barrier() ret void } -define void @caller_barrier1() "kernel" { +define amdgpu_kernel void @caller_barrier1() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@caller_barrier1 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: call void @callee_barrier() @@ -1051,7 +1051,7 @@ define void @caller_barrier1() "kernel" { call void @aligned_barrier() ret void } -define void @caller_barrier2() "kernel" { +define amdgpu_kernel void @caller_barrier2() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@caller_barrier2 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: call void @unknown() @@ -1065,7 +1065,7 @@ define void @caller_barrier2() "kernel" { ret void } -define void @loop_barrier() "kernel" { +define amdgpu_kernel void @loop_barrier() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@loop_barrier ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: @@ -1095,7 +1095,7 @@ exit: ret void } -define void @loop_barrier_end_barriers() "kernel" { +define amdgpu_kernel void @loop_barrier_end_barriers() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: @@ -1129,7 +1129,7 @@ exit: ret void } -define void @loop_barrier_end_barriers_unknown() "kernel" { +define amdgpu_kernel void @loop_barrier_end_barriers_unknown() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_unknown ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: @@ -1165,7 +1165,7 @@ exit: ret void } -define void @loop_barrier_store() "kernel" { +define amdgpu_kernel void @loop_barrier_store() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@loop_barrier_store ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: @@ -1195,7 +1195,7 @@ exit: ret void } -define void @loop_barrier_end_barriers_store() "kernel" { +define amdgpu_kernel void @loop_barrier_end_barriers_store() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_store ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: @@ -1232,37 +1232,7 @@ exit: } !llvm.module.flags = !{!16,!15} -!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11,!12,!13,!14,!17,!18,!19,!20,!21,!22,!23,!24,!25,!26,!27,!28,!29,!30} -!0 = !{ptr @pos_empty_1, !"kernel", i32 1} -!1 = !{ptr @pos_empty_2, !"kernel", i32 1} -!2 = !{ptr @pos_empty_3, !"kernel", i32 1} -!3 = !{ptr @pos_empty_4, !"kernel", i32 1} -!4 = !{ptr @pos_empty_5, !"kernel", i32 1} -!5 = !{ptr @pos_empty_6, !"kernel", i32 1} -!17 = !{ptr @pos_empty_7a, !"kernel", i32 1} -!18 = !{ptr @pos_empty_7b, !"kernel", i32 1} -!23 = !{ptr @pos_empty_8, !"kernel", i32 1} -!24 = !{ptr @caller_barrier1, !"kernel", i32 1} -!25 = !{ptr @caller_barrier2, !"kernel", i32 1} -!26 = !{ptr @loop_barrier, !"kernel", i32 1} -!27 = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1} -!28 = !{ptr @loop_barrier_end_barriers_unkno... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/122320 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits