Author: David Stuttard Date: 2022-09-07T08:37:18+02:00 New Revision: dca7f087109debc1f376946dc244ed5d7e27a257
URL: https://github.com/llvm/llvm-project/commit/dca7f087109debc1f376946dc244ed5d7e27a257 DIFF: https://github.com/llvm/llvm-project/commit/dca7f087109debc1f376946dc244ed5d7e27a257.diff LOG: AMDGPU: mbcnt allow for non-zero src1 for known-bits Src1 for mbcnt can be a non-zero literal or register. Take this into account when calculating known bits. Differential Revision: https://reviews.llvm.org/D131478 (cherry picked from commit 1d1cc05539e275ae7666fc4b44bf725ec335078a) Added: Modified: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index bf520a5604043..c0a94cc758bb2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4600,9 +4600,16 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( case Intrinsic::amdgcn_mbcnt_hi: { const GCNSubtarget &ST = DAG.getMachineFunction().getSubtarget<GCNSubtarget>(); - // These return at most the wavefront size - 1. + // These return at most the (wavefront size - 1) + src1 + // As long as src1 is an immediate we can calc known bits + KnownBits Src1Known = DAG.computeKnownBits(Op.getOperand(2), Depth + 1); + unsigned Src1ValBits = Src1Known.countMaxActiveBits(); + unsigned MaxActiveBits = std::max(Src1ValBits, ST.getWavefrontSizeLog2()); + // Cater for potential carry + MaxActiveBits += Src1ValBits ? 1 : 0; unsigned Size = Op.getValueType().getSizeInBits(); - Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2()); + if (MaxActiveBits < Size) + Known.Zero.setHighBits(Size - MaxActiveBits); break; } case Intrinsic::amdgcn_workitem_id_x: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll index 04405470aff0c..88d6bea38b100 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll @@ -14,24 +14,79 @@ main_body: ret void } -; GCN-LABEL: {{^}}mbcnt_lo_known_bits: +; GCN-LABEL: {{^}}mbcnt_lo_known_bits_1: ; GCN: v_mbcnt_lo_u32_b32 -; GCN-NOT: and -define i32 @mbcnt_lo_known_bits(i32 %x, i32 %y) #0 { +; GCN: v_and_b32_e32 +define i32 @mbcnt_lo_known_bits_1(i32 %x, i32 %y) #0 { %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 %y) %mask = and i32 %lo, 63 ret i32 %mask } -; GCN-LABEL: {{^}}mbcnt_hi_known_bits: -; GCN: v_mbcnt_hi_u32_b32 +; GCN-LABEL: {{^}}mbcnt_lo_known_bits_2: +; GCN: v_mbcnt_lo_u32_b32 +; GCN-NOT: and +define i32 @mbcnt_lo_known_bits_2(i32 %x) #0 { + %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 0) + %mask = and i32 %lo, 63 + ret i32 %mask +} + +; GCN-LABEL: {{^}}mbcnt_lo_known_bits_3: +; GCN: v_mbcnt_lo_u32_b32 ; GCN-NOT: and -define i32 @mbcnt_hi_known_bits(i32 %x, i32 %y) #0 { +define i32 @mbcnt_lo_known_bits_3(i32 %x) #0 { + %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 15) + %mask = and i32 %lo, 127 + ret i32 %mask +} + +; GCN-LABEL: {{^}}mbcnt_lo_known_bits_4: +; GCN: v_mbcnt_lo_u32_b32 +; GCN: v_and_b32_e32 +define i32 @mbcnt_lo_known_bits_4(i32 %x) #0 { + %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 15) + %mask = and i32 %lo, 63 + ret i32 %mask +} + + +; GCN-LABEL: {{^}}mbcnt_hi_known_bits_1: +; GCN: v_mbcnt_hi_u32_b32 +; GCN: v_and_b32_e32 +define i32 @mbcnt_hi_known_bits_1(i32 %x, i32 %y) #0 { %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 %y) %mask = and i32 %hi, 63 ret i32 %mask } +; GCN-LABEL: {{^}}mbcnt_hi_known_bits_2: +; GCN: v_mbcnt_hi_u32_b32 +; GCN-NOT: and +define i32 @mbcnt_hi_known_bits_2(i32 %x) #0 { + %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 0) + %mask = and i32 %hi, 63 + ret i32 %mask +} + +; GCN-LABEL: {{^}}mbcnt_hi_known_bits_3: +; GCN: v_mbcnt_hi_u32_b32 +; GCN-NOT: and +define i32 @mbcnt_hi_known_bits_3(i32 %x) #0 { + %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 15) + %mask = and i32 %hi, 127 + ret i32 %mask +} + +; GCN-LABEL: {{^}}mbcnt_hi_known_bits_4: +; GCN: v_mbcnt_hi_u32_b32 +; GCN: v_and_b32_e32 +define i32 @mbcnt_hi_known_bits_4(i32 %x) #0 { + %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 15) + %mask = and i32 %hi, 63 + ret i32 %mask +} + declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits