https://github.com/easyonaadit updated https://github.com/llvm/llvm-project/pull/150170
>From b45b107ffbad9b83816612ca1239061a9572ff52 Mon Sep 17 00:00:00 2001 From: Aaditya <aaditya.alokdeshpa...@amd.com> Date: Sat, 19 Jul 2025 12:57:27 +0530 Subject: [PATCH] Add builtins for wave reduction intrinsics --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 25 ++ clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 58 +++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 378 +++++++++++++++++++ 3 files changed, 461 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 878543566f0e3..c8b324193e9fb 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -351,6 +351,31 @@ BUILTIN(__builtin_amdgcn_endpgm, "v", "nr") BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n") BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n") +//===----------------------------------------------------------------------===// + +// Wave Reduction builtins. + +//===----------------------------------------------------------------------===// + +BUILTIN(__builtin_amdgcn_wave_reduce_add_i32, "iii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_sub_i32, "iii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_min_i32, "iii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_min_u32, "UiUii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_max_i32, "iii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_max_u32, "UiUii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_and_b32, "iii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_or_b32, "iii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_xor_b32, "iii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_add_i64, "WiWii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_sub_i64, "WiWii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_min_i64, "WiWii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_min_u64, "WUiWUii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_max_i64, "WiWii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_max_u64, "WUiWUii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_and_b64, "WiWii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_or_b64, "WiWii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_xor_b64, "WiWii", "nc") + //===----------------------------------------------------------------------===// // R600-NI only builtins. //===----------------------------------------------------------------------===// diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 7dccf82b1a7a3..28ea918b97cc5 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -295,11 +295,69 @@ void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs)); } +static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) { + switch (BuiltinID) { + default: + llvm_unreachable("Unknown BuiltinID for wave reduction"); + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_i32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_i64: + return Intrinsic::amdgcn_wave_reduce_add; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_i32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_i64: + return Intrinsic::amdgcn_wave_reduce_sub; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64: + return Intrinsic::amdgcn_wave_reduce_min; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64: + return Intrinsic::amdgcn_wave_reduce_umin; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64: + return Intrinsic::amdgcn_wave_reduce_max; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64: + return Intrinsic::amdgcn_wave_reduce_umax; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64: + return Intrinsic::amdgcn_wave_reduce_and; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64: + return Intrinsic::amdgcn_wave_reduce_or; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: + return Intrinsic::amdgcn_wave_reduce_xor; + } +} + Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_i32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_i32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_i64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_i64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: { + Intrinsic::ID IID = getIntrinsicIDforWaveReduction(BuiltinID); + llvm::Value *Value = EmitScalarExpr(E->getArg(0)); + llvm::Value *Strategy = EmitScalarExpr(E->getArg(1)); + llvm::Function *F = CGM.getIntrinsic(IID, {Value->getType()}); + return Builder.CreateCall(F, {Value, Strategy}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index bf022bc6eb446..16f5a524f3094 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -398,6 +398,384 @@ void test_s_sendmsghalt_var(int in) __builtin_amdgcn_s_sendmsghalt(1, in); } +// CHECK-LABEL: @test_wave_reduce_add_i32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.add.i32( +void test_wave_reduce_add_i32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_add_i32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_add_i64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.add.i64( +void test_wave_reduce_add_i64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_add_i64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_add_i32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.add.i32( +void test_wave_reduce_add_i32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_add_i32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_add_i64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.add.i64( +void test_wave_reduce_add_i64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_add_i64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_add_i32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.add.i32( +void test_wave_reduce_add_i32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_add_i32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_add_i64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.add.i64( +void test_wave_reduce_add_i64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_add_i64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_sub_i32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.sub.i32( +void test_wave_reduce_sub_i32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_sub_i32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_sub_i64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.sub.i64( +void test_wave_reduce_sub_i64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_sub_i64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_sub_i32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.sub.i32( +void test_wave_reduce_sub_i32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_sub_i32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_sub_i64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.sub.i64( +void test_wave_reduce_sub_i64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_sub_i64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_sub_i32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.sub.i32( +void test_wave_reduce_sub_i32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_sub_i32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_sub_i64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.sub.i64( +void test_wave_reduce_sub_i64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_sub_i64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_and_b32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.and.i32( +void test_wave_reduce_and_b32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_and_b32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_and_b64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.and.i64( +void test_wave_reduce_and_b64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_and_b64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_and_b32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.and.i32( +void test_wave_reduce_and_b32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_and_b32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_and_b64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.and.i64( +void test_wave_reduce_and_b64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_and_b64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_and_b32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.and.i32( +void test_wave_reduce_and_b32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_and_b32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_and_b64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.and.i64( +void test_wave_reduce_and_b64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_and_b64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_or_b32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.or.i32( +void test_wave_reduce_or_b32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_or_b32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_or_b64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.or.i64( +void test_wave_reduce_or_b64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_or_b64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_or_b32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.or.i32( +void test_wave_reduce_or_b32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_or_b32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_or_b64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.or.i64( +void test_wave_reduce_or_b64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_or_b64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_or_b32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.or.i32( +void test_wave_reduce_or_b32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_or_b32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_or_b64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.or.i64( +void test_wave_reduce_or_b64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_or_b64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_xor_b32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.xor.i32( +void test_wave_reduce_xor_b32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_xor_b32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_xor_b64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.xor.i64( +void test_wave_reduce_xor_b64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_xor_b64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_xor_b32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.xor.i32( +void test_wave_reduce_xor_b32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_xor_b32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_xor_b64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.xor.i64( +void test_wave_reduce_xor_b64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_xor_b64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_xor_b32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.xor.i32( +void test_wave_reduce_xor_b32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_xor_b32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_xor_b64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.xor.i64( +void test_wave_reduce_xor_b64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_xor_b64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_min_i32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.min.i32( +void test_wave_reduce_min_i32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_min_i32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_min_i64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.min.i64( +void test_wave_reduce_min_i64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_min_i64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_min_i32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.min.i32( +void test_wave_reduce_min_i32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_min_i32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_min_i64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.min.i64( +void test_wave_reduce_min_i64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_min_i64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_min_i32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.min.i32( +void test_wave_reduce_min_i32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_min_i32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_min_i64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.min.i64( +void test_wave_reduce_min_i64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_min_i64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_min_u32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umin.i32( +void test_wave_reduce_min_u32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_min_u32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_min_u64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.umin.i64( +void test_wave_reduce_min_u64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_min_u64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_min_u32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umin.i32( +void test_wave_reduce_min_u32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_min_u32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_min_u64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.umin.i64( +void test_wave_reduce_min_u64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_min_u64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_min_u32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umin.i32( +void test_wave_reduce_min_u32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_min_u32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_min_u64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.umin.i64( +void test_wave_reduce_min_u64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_min_u64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_max_i32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.max.i32( +void test_wave_reduce_max_i32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_max_i32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_max_i64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.max.i64( +void test_wave_reduce_max_i64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_max_i64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_max_i32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.max.i32( +void test_wave_reduce_max_i32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_max_i32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_max_i64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.max.i64( +void test_wave_reduce_max_i64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_max_i64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_max_i32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.max.i32( +void test_wave_reduce_max_i32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_max_i32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_max_i64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.max.i64( +void test_wave_reduce_max_i64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_max_i64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_max_u32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umax.i32( +void test_wave_reduce_max_u32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_max_u32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_max_u64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.umax.i64( +void test_wave_reduce_max_u64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_max_u64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_max_u32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umax.i32( +void test_wave_reduce_max_u32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_max_u32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_max_u64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.umax.i64( +void test_wave_reduce_max_u64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_max_u64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_max_u32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umax.i32( +void test_wave_reduce_max_u32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_max_u32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_max_u64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.umax.i64( +void test_wave_reduce_max_u64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_max_u64(in, 2); +} + // CHECK-LABEL: @test_s_barrier // CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.barrier( void test_s_barrier() _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits