https://github.com/ranapratap55 created https://github.com/llvm/llvm-project/pull/69567
Currently __builtin_read_exec_hi lowers to llvm.read_register, this patch lowers it to use amdgcn_ballot. >From 340e633da9e3ab10efc0c0d430b9546cd2f19cfe Mon Sep 17 00:00:00 2001 From: ranapratap55 <ranapratapreddy.nimmakay...@amd.com> Date: Thu, 19 Oct 2023 12:52:13 +0530 Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot --- clang/lib/CodeGen/CGBuiltin.cpp | 27 +++++++++++++++------ clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 4 ++- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index db9f354fa8386d3..d60826f293f0c46 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7997,14 +7997,26 @@ enum SpecialRegisterAccessKind { static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, - llvm::Type *ValueType) { + llvm::Type *ValueType, bool isExecHi) { CodeGen::CGBuilderTy &Builder = CGF.Builder; CodeGen::CodeGenModule &CGM = CGF.CGM; llvm::Type *ResultType = CGF.ConvertType(E->getType()); - Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); - llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); - return Call; + llvm::Value *Call; + Function *F; + + if (isExecHi) { + F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType}); + Call = Builder.CreateCall(F, {Builder.getInt1(true)}); + Value *C1 = llvm::ConstantInt::get(ValueType, 32); + Value *Rt2 = Builder.CreateLShr(Call, C1); + Rt2 = Builder.CreateTruncOrBitCast(Rt2, CGF.Int32Ty); + return Rt2; + } else { + F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); + Call = Builder.CreateCall(F, {Builder.getInt1(true)}); + return Call; + } } // Generates the IR for the read/write special register builtin, @@ -17837,10 +17849,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1}); } case AMDGPU::BI__builtin_amdgcn_read_exec: + return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false); case AMDGPU::BI__builtin_amdgcn_read_exec_lo: - case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { - return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty); - } + return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false); + case AMDGPU::BI__builtin_amdgcn_read_exec_hi: + return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true); case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 8938642e3b19f8c..0bc9a54682d3e31 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -526,7 +526,9 @@ void test_read_exec_lo(global uint* out) { // CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]] // CHECK-LABEL: @test_read_exec_hi( -// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +// CHECK: lshr i64 [[A:%.*]], 32 +// CHECK: trunc i64 [[B:%.*]] to i32 void test_read_exec_hi(global uint* out) { *out = __builtin_amdgcn_read_exec_hi(); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits