https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/118515
>From 8ba940796d4092c225ce34ff883b80d36fee58fe Mon Sep 17 00:00:00 2001 From: Shilei Tian <i...@tianshilei.me> Date: Tue, 3 Dec 2024 11:25:37 -0500 Subject: [PATCH] [AMDGPU] Use COV6 by default --- clang/docs/ReleaseNotes.rst | 2 ++ clang/include/clang/Driver/Options.td | 4 ++-- clang/lib/Driver/ToolChains/CommonArgs.cpp | 2 +- clang/test/CodeGen/amdgpu-address-spaces.cpp | 2 +- clang/test/CodeGenCUDA/amdgpu-code-object-version.cu | 2 +- clang/test/CodeGenCXX/dynamic-cast-address-space.cpp | 6 +++--- clang/test/CodeGenHIP/default-attributes.hip | 4 ++-- clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl | 6 +++--- .../amdgcn/bitcode/oclc_abi_version_600.bc | 0 .../lib/amdgcn/bitcode/oclc_abi_version_600.bc | 0 .../lib64/amdgcn/bitcode/oclc_abi_version_600.bc | 0 clang/test/Driver/hip-device-libs.hip | 2 +- clang/test/OpenMP/amdgcn_target_global_constructor.cpp | 4 ++-- libc/cmake/modules/prepare_libc_gpu_build.cmake | 2 +- llvm/docs/ReleaseNotes.md | 2 ++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 +- .../CodeGen/AMDGPU/default_amdhsa_code_object_version.ll | 7 +++++++ offload/plugins-nextgen/common/src/Utils/ELF.cpp | 5 +++-- 18 files changed, 32 insertions(+), 20 deletions(-) create mode 100644 clang/test/Driver/Inputs/rocm-spack/llvm-amdgpu-4.0.0-ieagcs7inf7runpyfvepqkurasoglq4z/amdgcn/bitcode/oclc_abi_version_600.bc create mode 100644 clang/test/Driver/Inputs/rocm_resource_dir/lib/amdgcn/bitcode/oclc_abi_version_600.bc create mode 100644 clang/test/Driver/Inputs/rocm_resource_dir/lib64/amdgcn/bitcode/oclc_abi_version_600.bc create mode 100644 llvm/test/CodeGen/AMDGPU/default_amdhsa_code_object_version.ll diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5026d4475b38aa..37180d15bf9fc8 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -818,6 +818,8 @@ Target Specific Changes AMDGPU Support ^^^^^^^^^^^^^^ +- Bump the default code object version to 6. + - Initial support for gfx950 - Added headers ``gpuintrin.h`` and ``amdgpuintrin.h`` that contains common diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 4bc0b97ea68f2f..29db1aa21ed298 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5140,12 +5140,12 @@ defm amdgpu_ieee : BoolMOption<"amdgpu-ieee", NegFlag<SetFalse, [], [ClangOption, CC1Option]>>; def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>, - HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">, + HelpText<"Specify code object ABI version. Defaults to 6. (AMDGPU only)">, Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>, Values<"none,4,5,6">, NormalizedValuesScope<"llvm::CodeObjectVersionKind">, NormalizedValues<["COV_None", "COV_4", "COV_5", "COV_6"]>, - MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_5">; + MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_6">; defm cumode : SimpleMFlag<"cumode", "Specify CU wavefront", "Specify WGP wavefront", diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 8d977149e62485..4e6ace48c3ffb5 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2705,7 +2705,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D, unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D, const llvm::opt::ArgList &Args) { - unsigned CodeObjVer = 5; // default + unsigned CodeObjVer = 6; // default if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer); return CodeObjVer; diff --git a/clang/test/CodeGen/amdgpu-address-spaces.cpp b/clang/test/CodeGen/amdgpu-address-spaces.cpp index ae2c61439f4ca5..b121b559f58dc3 100644 --- a/clang/test/CodeGen/amdgpu-address-spaces.cpp +++ b/clang/test/CodeGen/amdgpu-address-spaces.cpp @@ -29,7 +29,7 @@ int [[clang::address_space(999)]] bbb = 1234; // CHECK: @u = addrspace(5) global i32 undef, align 4 // CHECK: @aaa = addrspace(6) global i32 1000, align 4 // CHECK: @bbb = addrspace(999) global i32 1234, align 4 -// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 +// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 //. // CHECK-LABEL: define dso_local amdgpu_kernel void @foo( // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu index ffe12544917f7f..aa0e3edec3f6a3 100644 --- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu +++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu @@ -1,7 +1,7 @@ // Create module flag for code object version. // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -o - %s | FileCheck %s -check-prefix=V5 +// RUN: -o - %s | FileCheck %s -check-prefix=V6 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=4 -o - %s | FileCheck -check-prefix=V4 %s diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp index 271d9ede79d0c4..7eebdf68115a98 100644 --- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp +++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp @@ -13,7 +13,7 @@ B fail; // CHECK: @_ZTI1B = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1B, ptr addrspace(1) @_ZTI1A }, comdat, align 8 // CHECK: @_ZTVN10__cxxabiv120__si_class_type_infoE = external addrspace(1) global [0 x ptr addrspace(1)] // CHECK: @_ZTS1B = linkonce_odr addrspace(1) constant [3 x i8] c"1B\00", comdat, align 1 -// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 +// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 //. // WITH-NONZERO-DEFAULT-AS: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1B, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZN1A1fEv to ptr addrspace(1))] }, comdat, align 8 // WITH-NONZERO-DEFAULT-AS: @fail = addrspace(1) global { ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV1B, i32 0, i32 0, i32 2) }, align 8 @@ -118,11 +118,11 @@ const B& f(A *a) { // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn } //. -// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} // CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} //. -// WITH-NONZERO-DEFAULT-AS: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +// WITH-NONZERO-DEFAULT-AS: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} // WITH-NONZERO-DEFAULT-AS: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // WITH-NONZERO-DEFAULT-AS: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} //. diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip index 1b53ebec9b5821..f4dbad021987f1 100644 --- a/clang/test/CodeGenHIP/default-attributes.hip +++ b/clang/test/CodeGenHIP/default-attributes.hip @@ -8,7 +8,7 @@ //. // OPTNONE: @__hip_cuid_ = addrspace(1) global i8 0 // OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata" -// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 +// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 //. __device__ void extern_func(); @@ -39,7 +39,7 @@ __global__ void kernel() { // OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } // OPTNONE: attributes #[[ATTR3]] = { convergent nounwind } //. -// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} // OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} // OPTNONE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} //. diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index 62b5661da9dbd8..7f2a17b6ef8c55 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -62,7 +62,7 @@ kernel void test_target_features_kernel(global int *i) { //. // CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0 -// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 +// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 //. // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone // NOCPU-LABEL: define {{[^@]+}}@callee @@ -759,7 +759,7 @@ kernel void test_target_features_kernel(global int *i) { // GFX900: attributes #[[ATTR8]] = { nounwind } // GFX900: attributes #[[ATTR9]] = { convergent nounwind } //. -// NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +// NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} // NOCPU: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // NOCPU: [[META2:![0-9]+]] = !{i32 2, i32 0} // NOCPU: [[META3]] = !{i32 1, i32 0, i32 1, i32 0} @@ -777,7 +777,7 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU: [[META15]] = !{i32 1} // NOCPU: [[META16]] = !{!"int*"} //. -// GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +// GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} // GFX900: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // GFX900: [[META2:![0-9]+]] = !{i32 2, i32 0} // GFX900: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} diff --git a/clang/test/Driver/Inputs/rocm-spack/llvm-amdgpu-4.0.0-ieagcs7inf7runpyfvepqkurasoglq4z/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm-spack/llvm-amdgpu-4.0.0-ieagcs7inf7runpyfvepqkurasoglq4z/amdgcn/bitcode/oclc_abi_version_600.bc new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/clang/test/Driver/Inputs/rocm_resource_dir/lib/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm_resource_dir/lib/amdgcn/bitcode/oclc_abi_version_600.bc new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/clang/test/Driver/Inputs/rocm_resource_dir/lib64/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm_resource_dir/lib64/amdgcn/bitcode/oclc_abi_version_600.bc new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index 6f1d31508e3302..3ae384cf05d972 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -157,7 +157,7 @@ // Test default code object version. // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5 +// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI6 // Test default code object version with old device library without abi_version_400.bc // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ diff --git a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp index 9f1e68d4ea0fec..d728dc1233e2c7 100644 --- a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp +++ b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp @@ -29,7 +29,7 @@ S A; // CHECK: @A = addrspace(1) global %struct.S zeroinitializer, align 4 // CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_amdgcn_target_global_constructor.cpp, ptr null }] // CHECK: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__dtor_A, ptr null }] -// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 +// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 //. // CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init // CHECK-SAME: () #[[ATTR0:[0-9]+]] { @@ -104,7 +104,7 @@ S A; // CHECK: attributes #[[ATTR4]] = { convergent nounwind } //. // CHECK: [[META0:![0-9]+]] = !{i32 1, !"A", i32 0, i32 0} -// CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +// CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} // CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 51} // CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 51} diff --git a/libc/cmake/modules/prepare_libc_gpu_build.cmake b/libc/cmake/modules/prepare_libc_gpu_build.cmake index 937bd22451c5fb..f8f5a954e5e916 100644 --- a/libc/cmake/modules/prepare_libc_gpu_build.cmake +++ b/libc/cmake/modules/prepare_libc_gpu_build.cmake @@ -104,7 +104,7 @@ if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) # The AMDGPU environment uses different code objects to encode the ABI for # kernel calls and intrinsic functions. We want to specify this manually to # conform to whatever the test suite was built to handle. - set(LIBC_GPU_CODE_OBJECT_VERSION 5) + set(LIBC_GPU_CODE_OBJECT_VERSION 6) endif() if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index dc3f3aeb735f87..347af71953bce6 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -130,6 +130,8 @@ Changes to the AArch64 Backend Changes to the AMDGPU Backend ----------------------------- +* Bump the default `.amdhsa_code_object_version` to 6. + * Removed `llvm.amdgcn.flat.atomic.fadd` and `llvm.amdgcn.global.atomic.fadd` intrinsics. Users should use the {ref}`atomicrmw <i_atomicrmw>` instruction with `fadd` and diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 5a0e812748fbb7..48eecc511bdaeb 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -34,7 +34,7 @@ static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion( "amdhsa-code-object-version", llvm::cl::Hidden, - llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5), + llvm::cl::init(llvm::AMDGPU::AMDHSA_COV6), llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)")); diff --git a/llvm/test/CodeGen/AMDGPU/default_amdhsa_code_object_version.ll b/llvm/test/CodeGen/AMDGPU/default_amdhsa_code_object_version.ll new file mode 100644 index 00000000000000..6f79cf23bfbf77 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/default_amdhsa_code_object_version.ll @@ -0,0 +1,7 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck %s + +; CHECK: .amdhsa_code_object_version 6 + +define amdgpu_kernel void @kernel() { + ret void +} diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp index 88642fd5b56400..18b5ad3351b12d 100644 --- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp +++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp @@ -64,8 +64,9 @@ checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) { if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA) return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA"); if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V4 && - Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5) - return createError("Invalid AMD ABI version, must be version 4 or 5"); + Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 && + Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6) + return createError("Invalid AMD ABI version, must be version above 4"); if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 || (Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX1201) return createError("Unsupported AMDGPU architecture"); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits