Author: Juan Manuel Martinez CaamaƱo Date: 2025-04-03T09:22:38+02:00 New Revision: 041e84261a502a28401813bf55aa778ee0bbcdeb
URL: https://github.com/llvm/llvm-project/commit/041e84261a502a28401813bf55aa778ee0bbcdeb DIFF: https://github.com/llvm/llvm-project/commit/041e84261a502a28401813bf55aa778ee0bbcdeb.diff LOG: [Clang][AMDGPU] Expose buffer load lds as a clang builtin (#132048) CK is using either inline assembly or inline LLVM-IR builtins to generate buffer_load_dword lds instructions. This patch exposes this instruction as a Clang builtin available on gfx9 and gfx10. Related to SWDEV-519702 and SWDEV-518861 Added: clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-target-error.cl Modified: clang/include/clang/Basic/BuiltinsAMDGPU.def clang/include/clang/Basic/DiagnosticSemaKinds.td clang/lib/Sema/SemaAMDGPU.cpp clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl llvm/include/llvm/IR/IntrinsicsAMDGPU.td Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index c6c0bf7d8388d..cbef637be213a 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -163,6 +163,8 @@ BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2UiQbiiIi", "n") BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n") BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n") +TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "t", "vmem-to-lds-load-insts") + //===----------------------------------------------------------------------===// // Ballot builtins. //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 3f9ba933582da..1993cd5accc22 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -13056,6 +13056,6 @@ def err_acc_decl_for_routine : Error<"expected function or lambda declaration for 'routine' construct">; // AMDGCN builtins diagnostics -def err_amdgcn_global_load_lds_size_invalid_value : Error<"invalid size value">; -def note_amdgcn_global_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">; +def err_amdgcn_load_lds_size_invalid_value : Error<"invalid size value">; +def note_amdgcn_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">; } // end of sema component. diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index a4d075dfd0768..7fec099374152 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -35,6 +35,7 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, Builtin::evaluateRequiredTargetFeatures("gfx950-insts", CallerFeatureMap); switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_lds: case AMDGPU::BI__builtin_amdgcn_global_load_lds: { constexpr const int SizeIdx = 2; llvm::APSInt Size; @@ -54,11 +55,9 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, [[fallthrough]]; } default: - Diag(ArgExpr->getExprLoc(), - diag::err_amdgcn_global_load_lds_size_invalid_value) + Diag(ArgExpr->getExprLoc(), diag::err_amdgcn_load_lds_size_invalid_value) << ArgExpr->getSourceRange(); - Diag(ArgExpr->getExprLoc(), - diag::note_amdgcn_global_load_lds_size_valid_value) + Diag(ArgExpr->getExprLoc(), diag::note_amdgcn_load_lds_size_valid_value) << HasGFX950Insts << ArgExpr->getSourceRange(); return true; } diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl index 3403b69e07e4b..5e3ed9027c17a 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl @@ -170,3 +170,12 @@ v3u32 test_amdgcn_raw_ptr_buffer_load_b96_non_const_soffset(__amdgpu_buffer_rsrc v4u32 test_amdgcn_raw_ptr_buffer_load_b128_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, soffset, /*aux=*/0); } + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_lds( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 1, i32 [[OFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3) +// CHECK-NEXT: ret void +// +void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int offset, int soffset) { + __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 1, offset, soffset, 2, 3); +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl new file mode 100644 index 0000000000000..5915393ae7f56 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -S -verify=gfx90a,expected -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -S -verify=gfx950,expected -o - %s +// REQUIRES: amdgpu-registered-target + +void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int offset, int soffset, int x) { + __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, x, offset, soffset, 0, 0); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}} + __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, x, 0); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}} + __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}} + __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 3, offset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-target-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-target-error.cl new file mode 100644 index 0000000000000..768f894e9180d --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-target-error.cl @@ -0,0 +1,6 @@ +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -verify -o - %s +// REQUIRES: amdgpu-registered-target + +void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int offset, int soffset, int x) { + __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index ebac0f9029791..217e43fcce4fd 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1863,7 +1863,9 @@ class AMDGPURawBufferLoadLDS : Intrinsic < ImmArg<ArgIndex<6>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_raw_buffer_load_lds : AMDGPURawBufferLoadLDS; -class AMDGPURawPtrBufferLoadLDS : Intrinsic < +class AMDGPURawPtrBufferLoadLDS : + ClangBuiltin<"__builtin_amdgcn_raw_ptr_buffer_load_lds">, + Intrinsic < [], [AMDGPUBufferRsrcTy, // rsrc(SGPR) LLVMQualPointerType<3>, // LDS base offset _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits