Author: David Stuttard Date: 2022-11-28T11:26:15Z New Revision: 7940888c5987de2b5cbb4ec45b482df88e822f67
URL: https://github.com/llvm/llvm-project/commit/7940888c5987de2b5cbb4ec45b482df88e822f67 DIFF: https://github.com/llvm/llvm-project/commit/7940888c5987de2b5cbb4ec45b482df88e822f67.diff LOG: [AMDGPU] Intrinsic to expose s_wait_event for export ready Differential Revision: https://reviews.llvm.org/D138216 Added: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll Modified: clang/include/clang/Basic/BuiltinsAMDGPU.def clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl llvm/include/llvm/IR/IntrinsicsAMDGPU.td llvm/lib/Target/AMDGPU/SOPInstructions.td Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index d4d16d5a9563d..5e64f830fb850 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -261,6 +261,7 @@ TARGET_BUILTIN(__builtin_amdgcn_image_bvh_intersect_ray_lh, "V4UiWUifV4fV4hV4hV4 // TODO: This is a no-op in wave32. Should the builtin require wavefrontsize64? TARGET_BUILTIN(__builtin_amdgcn_permlane64, "UiUi", "nc", "gfx11-insts") +TARGET_BUILTIN(__builtin_amdgcn_s_wait_event_export_ready, "v", "n", "gfx11-insts") //===----------------------------------------------------------------------===// // WMMA builtins. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl index a4f2d610afa83..59a16900fb1a4 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl @@ -37,3 +37,9 @@ void test_ds_bvh_stack_rtn(global uint2* out, uint addr, uint data, uint4 data1) void test_permlane64(global uint* out, uint a) { *out = __builtin_amdgcn_permlane64(a); } + +// CHECK-LABEL: @test_s_wait_event_export_ready +// CHECK: call void @llvm.amdgcn.s.wait.event.export.ready +void test_s_wait_event_export_ready() { + __builtin_amdgcn_s_wait_event_export_ready(); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 8f05eb10920c7..3e9233b1f86f9 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2067,6 +2067,10 @@ def int_amdgcn_wmma_bf16_16x16x16_bf16 : AMDGPUWmmaIntrinsicOPSEL<llvm_v16i16_ty def int_amdgcn_wmma_i32_16x16x16_iu8 : AMDGPUWmmaIntrinsicIU<llvm_v4i32_ty, llvm_anyint_ty>; def int_amdgcn_wmma_i32_16x16x16_iu4 : AMDGPUWmmaIntrinsicIU<llvm_v2i32_ty, llvm_anyint_ty>; +def int_amdgcn_s_wait_event_export_ready : + ClangBuiltin<"__builtin_amdgcn_s_wait_event_export_ready">, + Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn] +>; //===----------------------------------------------------------------------===// // Deep learning intrinsics. diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 674da1f0ae4a5..ce0b0dfc48ced 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1388,7 +1388,9 @@ let SubtargetPredicate = isGFX10Plus in { let SubtargetPredicate = isGFX11Plus in { def S_WAIT_EVENT : SOPP_Pseudo<"s_wait_event", (ins s16imm:$simm16), - "$simm16">; + "$simm16"> { + let hasSideEffects = 1; + } def S_DELAY_ALU : SOPP_Pseudo<"s_delay_alu", (ins DELAY_FLAG:$simm16), "$simm16">; } // End SubtargetPredicate = isGFX11Plus @@ -1430,6 +1432,10 @@ def : GCNPat< (S_SEXT_I32_I16 $src) >; +def : GCNPat < + (int_amdgcn_s_wait_event_export_ready), + (S_WAIT_EVENT (i16 0)) +>; //===----------------------------------------------------------------------===// // SOP2 Patterns diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll new file mode 100644 index 0000000000000..3e95e4dec67a2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll @@ -0,0 +1,15 @@ +; RUN: llc -global-isel=0 -march=amdgcn -verify-machineinstrs -mcpu=gfx1100 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -mcpu=gfx1100 < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}test_wait_event: +; GCN: s_wait_event 0x0 + +define amdgpu_ps void @test_wait_event() #0 { +entry: + call void @llvm.amdgcn.s.wait.event.export.ready() #0 + ret void +} + +declare void @llvm.amdgcn.s.wait.event.export.ready() #0 + +attributes #0 = { nounwind } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits