llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Fabian Ritter (ritter-x2a) <details> <summary>Changes</summary> There are more places in SIISelLowering.cpp and AMDGPUISelDAGToDAG.cpp that check for ISD::ADD in a pointer context, but as far as I can tell those are only relevant for 32-bit pointer arithmetic (like frame indices/scratch addresses and LDS), for which we don't enable PTRADD generation yet. For SWDEV-516125. --- Patch is 21.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/145330.diff 6 Files Affected: - (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+1-1) - (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+16-5) - (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (+3-3) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+4-3) - (modified) llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll (+22-45) - (modified) llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll (+59-137) ``````````diff diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 0f5a943d663d7..06953bdb31ea4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8219,7 +8219,7 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { GlobalAddressSDNode *G = nullptr; if (Src.getOpcode() == ISD::GlobalAddress) G = cast<GlobalAddressSDNode>(Src); - else if (Src.getOpcode() == ISD::ADD && + else if (Src->isAnyAdd() && Src.getOperand(0).getOpcode() == ISD::GlobalAddress && Src.getOperand(1).getOpcode() == ISD::Constant) { G = cast<GlobalAddressSDNode>(Src.getOperand(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 66717135c9adf..63ca47bb119e5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -615,8 +615,14 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, // operands on the new node are also disjoint. SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint : SDNodeFlags::None); + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::PTRADD) { + // It isn't a ptradd anymore if it doesn't operate on the entire + // pointer. + Opcode = ISD::ADD; + } SDValue X = DAG.getNode( - Op.getOpcode(), dl, SmallVT, + Opcode, dl, SmallVT, DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)), DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags); assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?"); @@ -2851,6 +2857,11 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo(Op, And1); } [[fallthrough]]; + case ISD::PTRADD: + if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType()) + break; + // PTRADD behaves like ADD if pointers are represented as integers. + [[fallthrough]]; case ISD::ADD: case ISD::SUB: { // Add, Sub, and Mul don't demand any bits in positions beyond that @@ -2960,10 +2971,10 @@ bool TargetLowering::SimplifyDemandedBits( if (Op.getOpcode() == ISD::MUL) { Known = KnownBits::mul(KnownOp0, KnownOp1); - } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB. + } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB. Known = KnownBits::computeForAddSub( - Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(), - Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1); + Op->isAnyAdd(), Flags.hasNoSignedWrap(), Flags.hasNoUnsignedWrap(), + KnownOp0, KnownOp1); } break; } @@ -5593,7 +5604,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA, return true; } - if (N->getOpcode() == ISD::ADD) { + if (N->isAnyAdd()) { SDValue N1 = N->getOperand(0); SDValue N2 = N->getOperand(1); if (isGAPlusOffset(N1.getNode(), GA, Offset)) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 6e990cb2e160c..ee73ad5dda945 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1449,7 +1449,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr, C1 = nullptr; } - if (N0.getOpcode() == ISD::ADD) { + if (N0->isAnyAdd()) { // (add N2, N3) -> addr64, or // (add (add N2, N3), C1) -> addr64 SDValue N2 = N0.getOperand(0); @@ -1899,7 +1899,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, } // Match the variable offset. - if (Addr.getOpcode() == ISD::ADD) { + if (Addr->isAnyAdd()) { LHS = Addr.getOperand(0); RHS = Addr.getOperand(1); @@ -2230,7 +2230,7 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue N0, N1; // Extract the base and offset if possible. - if (CurDAG->isBaseWithConstantOffset(Addr) || Addr.getOpcode() == ISD::ADD) { + if (CurDAG->isBaseWithConstantOffset(Addr) || Addr->isAnyAdd()) { N0 = Addr.getOperand(0); N1 = Addr.getOperand(1); } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ec57d231dab5d..029ea2370e18d 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -10488,7 +10488,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, SDValue VOffset; // Try to split SAddr and VOffset. Global and LDS pointers share the same // immediate offset, so we cannot use a regular SelectGlobalSAddr(). - if (Addr->isDivergent() && Addr.getOpcode() == ISD::ADD) { + if (Addr->isDivergent() && Addr->isAnyAdd()) { SDValue LHS = Addr.getOperand(0); SDValue RHS = Addr.getOperand(1); @@ -12038,8 +12038,7 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N, unsigned AddrSpace, // We only do this to handle cases where it's profitable when there are // multiple uses of the add, so defer to the standard combine. - if ((N0.getOpcode() != ISD::ADD && N0.getOpcode() != ISD::OR) || - N0->hasOneUse()) + if ((!N0->isAnyAdd() && N0.getOpcode() != ISD::OR) || N0->hasOneUse()) return SDValue(); const ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N1); @@ -12078,6 +12077,8 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N, unsigned AddrSpace, N->getFlags().hasNoUnsignedWrap() && (N0.getOpcode() == ISD::OR || N0->getFlags().hasNoUnsignedWrap())); + // Use ISD::ADD even if the original operation was ISD::PTRADD, since we can't + // be sure that the new left operand is a proper base pointer. return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset, Flags); } diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll index fab56383ffa8a..ff90f1f175c3c 100644 --- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll +++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll @@ -5,50 +5,26 @@ ; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectMUBUF. define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { -; GFX6_PTRADD-LABEL: v_add_i32: -; GFX6_PTRADD: ; %bb.0: -; GFX6_PTRADD-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 -; GFX6_PTRADD-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX6_PTRADD-NEXT: s_mov_b32 s7, 0x100f000 -; GFX6_PTRADD-NEXT: s_mov_b32 s10, 0 -; GFX6_PTRADD-NEXT: s_mov_b32 s11, s7 -; GFX6_PTRADD-NEXT: s_waitcnt lgkmcnt(0) -; GFX6_PTRADD-NEXT: v_mov_b32_e32 v1, s3 -; GFX6_PTRADD-NEXT: v_add_i32_e32 v0, vcc, s2, v0 -; GFX6_PTRADD-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX6_PTRADD-NEXT: s_mov_b32 s8, s10 -; GFX6_PTRADD-NEXT: s_mov_b32 s9, s10 -; GFX6_PTRADD-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc -; GFX6_PTRADD-NEXT: s_waitcnt vmcnt(0) -; GFX6_PTRADD-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc -; GFX6_PTRADD-NEXT: s_waitcnt vmcnt(0) -; GFX6_PTRADD-NEXT: s_mov_b32 s6, -1 -; GFX6_PTRADD-NEXT: s_mov_b32 s4, s0 -; GFX6_PTRADD-NEXT: s_mov_b32 s5, s1 -; GFX6_PTRADD-NEXT: v_add_i32_e32 v0, vcc, v2, v0 -; GFX6_PTRADD-NEXT: buffer_store_dword v0, off, s[4:7], 0 -; GFX6_PTRADD-NEXT: s_endpgm -; -; GFX6_LEGACY-LABEL: v_add_i32: -; GFX6_LEGACY: ; %bb.0: -; GFX6_LEGACY-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 -; GFX6_LEGACY-NEXT: s_mov_b32 s7, 0x100f000 -; GFX6_LEGACY-NEXT: s_mov_b32 s10, 0 -; GFX6_LEGACY-NEXT: s_mov_b32 s11, s7 -; GFX6_LEGACY-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX6_LEGACY-NEXT: s_waitcnt lgkmcnt(0) -; GFX6_LEGACY-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX6_LEGACY-NEXT: v_mov_b32_e32 v1, 0 -; GFX6_LEGACY-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc -; GFX6_LEGACY-NEXT: s_waitcnt vmcnt(0) -; GFX6_LEGACY-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc -; GFX6_LEGACY-NEXT: s_waitcnt vmcnt(0) -; GFX6_LEGACY-NEXT: s_mov_b32 s6, -1 -; GFX6_LEGACY-NEXT: s_mov_b32 s4, s0 -; GFX6_LEGACY-NEXT: s_mov_b32 s5, s1 -; GFX6_LEGACY-NEXT: v_add_i32_e32 v0, vcc, v2, v0 -; GFX6_LEGACY-NEXT: buffer_store_dword v0, off, s[4:7], 0 -; GFX6_LEGACY-NEXT: s_endpgm +; GFX6-LABEL: v_add_i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 +; GFX6-NEXT: s_mov_b32 s7, 0x100f000 +; GFX6-NEXT: s_mov_b32 s10, 0 +; GFX6-NEXT: s_mov_b32 s11, s7 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX6-NEXT: v_mov_b32_e32 v1, 0 +; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_mov_b32 s4, s0 +; GFX6-NEXT: s_mov_b32 s5, s1 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid %b_ptr = getelementptr i32, ptr addrspace(1) %gep, i32 1 @@ -60,4 +36,5 @@ define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX6: {{.*}} +; GFX6_LEGACY: {{.*}} +; GFX6_PTRADD: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll index 0cd920616c515..893deb35fe822 100644 --- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll +++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll @@ -294,27 +294,15 @@ define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) { ; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectGlobalSAddr. define amdgpu_kernel void @uniform_base_varying_offset_imm(ptr addrspace(1) %p) { -; GFX942_PTRADD-LABEL: uniform_base_varying_offset_imm: -; GFX942_PTRADD: ; %bb.0: ; %entry -; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX942_PTRADD-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, 0 -; GFX942_PTRADD-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX942_PTRADD-NEXT: v_mov_b32_e32 v2, 1 -; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0) -; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] -; GFX942_PTRADD-NEXT: global_store_dword v[0:1], v2, off offset:16 -; GFX942_PTRADD-NEXT: s_endpgm -; -; GFX942_LEGACY-LABEL: uniform_base_varying_offset_imm: -; GFX942_LEGACY: ; %bb.0: ; %entry -; GFX942_LEGACY-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX942_LEGACY-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX942_LEGACY-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, 1 -; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0) -; GFX942_LEGACY-NEXT: global_store_dword v0, v1, s[0:1] offset:16 -; GFX942_LEGACY-NEXT: s_endpgm +; GFX942-LABEL: uniform_base_varying_offset_imm: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] offset:16 +; GFX942-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() %shift = shl i32 %tid, 2 @@ -328,33 +316,18 @@ entry: ; Adjusted from global-saddr-load.ll. Tests PTRADD handling in ; AMDGPUDAGToDAGISel::SelectSMRDBaseOffset. define amdgpu_kernel void @global_load_saddr_i32_uniform_offset(ptr addrspace(1) %sbase, i32 %soffset, ptr addrspace(1) %r) { -; GFX942_PTRADD-LABEL: global_load_saddr_i32_uniform_offset: -; GFX942_PTRADD: ; %bb.0: -; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX942_PTRADD-NEXT: s_load_dword s6, s[4:5], 0x8 -; GFX942_PTRADD-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10 -; GFX942_PTRADD-NEXT: v_mov_b32_e32 v0, 0 -; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0) -; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, s6 -; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, 0 -; GFX942_PTRADD-NEXT: s_load_dword s0, s[0:1], 0x0 -; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0) -; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, s0 -; GFX942_PTRADD-NEXT: global_store_dword v0, v1, s[2:3] -; GFX942_PTRADD-NEXT: s_endpgm -; -; GFX942_LEGACY-LABEL: global_load_saddr_i32_uniform_offset: -; GFX942_LEGACY: ; %bb.0: -; GFX942_LEGACY-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX942_LEGACY-NEXT: s_load_dword s6, s[4:5], 0x8 -; GFX942_LEGACY-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10 -; GFX942_LEGACY-NEXT: v_mov_b32_e32 v0, 0 -; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0) -; GFX942_LEGACY-NEXT: s_load_dword s0, s[0:1], s6 offset:0x0 -; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0) -; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, s0 -; GFX942_LEGACY-NEXT: global_store_dword v0, v1, s[2:3] -; GFX942_LEGACY-NEXT: s_endpgm +; GFX942-LABEL: global_load_saddr_i32_uniform_offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NEXT: s_load_dword s6, s[4:5], 0x8 +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_load_dword s0, s[0:1], s6 offset:0x0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm %zext.offset = zext i32 %soffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %load = load i32, ptr addrspace(1) %gep0 @@ -366,28 +339,15 @@ define amdgpu_kernel void @global_load_saddr_i32_uniform_offset(ptr addrspace(1) ; Adjusted from llvm.amdgcn.global.load.lds.ll, tests the offset lowering for ; Intrinsic::amdgcn_global_load_lds. define void @global_load_lds_dword_saddr_and_vaddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr, i32 %voffset) { -; GFX942_PTRADD-LABEL: global_load_lds_dword_saddr_and_vaddr: -; GFX942_PTRADD: ; %bb.0: ; %main_body -; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942_PTRADD-NEXT: v_mov_b32_e32 v2, v1 -; GFX942_PTRADD-NEXT: v_mov_b32_e32 v3, 0 -; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[2:3], s[0:1], 0, v[2:3] -; GFX942_PTRADD-NEXT: v_readfirstlane_b32 s0, v0 -; GFX942_PTRADD-NEXT: s_mov_b32 m0, s0 -; GFX942_PTRADD-NEXT: s_nop 0 -; GFX942_PTRADD-NEXT: global_load_lds_dword v[2:3], off offset:48 sc1 -; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31] -; -; GFX942_LEGACY-LABEL: global_load_lds_dword_saddr_and_vaddr: -; GFX942_LEGACY: ; %bb.0: ; %main_body -; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942_LEGACY-NEXT: v_readfirstlane_b32 s2, v0 -; GFX942_LEGACY-NEXT: s_mov_b32 m0, s2 -; GFX942_LEGACY-NEXT: s_nop 0 -; GFX942_LEGACY-NEXT: global_load_lds_dword v1, s[0:1] offset:48 sc1 -; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_load_lds_dword_saddr_and_vaddr: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_readfirstlane_b32 s2, v0 +; GFX942-NEXT: s_mov_b32 m0, s2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: global_load_lds_dword v1, s[0:1] offset:48 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %voffset.64 = zext i32 %voffset to i64 %gep = getelementptr i8, ptr addrspace(1) %gptr, i64 %voffset.64 @@ -398,29 +358,17 @@ main_body: ; Taken from shl_add_ptr_global.ll, tests PTRADD handling in ; SITargetLowering::performSHLPtrCombine. define void @shl_base_global_ptr_global_atomic_fadd(ptr addrspace(1) %out, ptr addrspace(1) %extra.use, ptr addrspace(1) %ptr) { -; GFX942_PTRADD-LABEL: shl_base_global_ptr_global_atomic_fadd: -; GFX942_PTRADD: ; %bb.0: -; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942_PTRADD-NEXT: s_mov_b64 s[0:1], 0x80 -; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1] -; GFX942_PTRADD-NEXT: v_lshlrev_b64 v[4:5], 2, v[0:1] -; GFX942_PTRADD-NEXT: v_mov_b32_e32 v6, 0x42c80000 -; GFX942_PTRADD-NEXT: global_atomic_add_f32 v[4:5], v6, off -; GFX942_PTRADD-NEXT: global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1 -; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) -; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31] -; -; GFX942_LEGACY-LABEL: shl_base_global_ptr_global_atomic_fadd: -; GFX942_LEGACY: ; %bb.0: -; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942_LEGACY-NEXT: v_lshlrev_b64 v[0:1], 2, v[4:5] -; GFX942_LEGACY-NEXT: v_mov_b32_e32 v6, 0x42c80000 -; GFX942_LEGACY-NEXT: global_atomic_add_f32 v[0:1], v6, off offset:512 -; GFX942_LEGACY-NEXT: s_mov_b64 s[0:1], 0x80 -; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1] -; GFX942_LEGACY-NEXT: global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1 -; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) -; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shl_base_global_ptr_global_atomic_fadd: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b64 v[0:1], 2, v[4:5] +; GFX942-NEXT: v_mov_b32_e32 v6, 0x42c80000 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v6, off offset:512 +; GFX942-NEXT: s_mov_b64 s[0:1], 0x80 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1] +; GFX942-NEXT: global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 32 %cast = ptrtoint ptr addrspace(1) %arrayidx0 to i64 %shl = shl i64 %cast, 2 @@ -433,27 +381,16 @@ define void @shl_base_global_ptr_global_atomic_fadd(ptr addrspace(1) %out, ptr a ; Test PTRADD handling in TargetLowering::SimplifyDemandedBits and ; TargetLowering::ShrinkDemandedOp. define i32 @gep_in_const_as_cast_to_const32_as(ptr addrspace(4) %src, i64 %offset) { -; GFX942_PTRADD-LABEL: gep_in_const_as_cast_to_const32_as: -; GFX942_PTRADD: ; %bb.0: ; %entry -; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] -; GFX942_PTRADD-NEXT: s_mov_b32 s1, 0 -; GFX942_PTRADD-NEXT: v_readfirstlane_b32 s0, v0 -; GFX942_PTRADD-NEXT: s_load_dword s0, s[0:1], 0x0 -; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0) -; GFX942_PTRADD-NEXT: v_mov_b32_e32 v0, s0 -; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31] -; -; GFX942_LEGACY-LABEL: gep_in_const_as_cast_to_const32_as: -; GFX942_LEGACY: ; %bb.0: ; %entry -; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942_LEGACY-NEXT: v_add_u32_e32 v0, v0, v2 -; GFX942_LEGACY-NEXT: s_mov_b32 s1, 0 -; GFX942_LEGACY-NEXT: v_readfirstlane_b32 s0, v0 -; GFX942_LEGACY-NEXT: s_load_dword s0, s[0:1], 0x0 -; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0) -; GFX942_LEGACY-NEXT: v_mov_b32_e32 v0, s0 -; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: gep_in_const_as_cast_to_const32_as: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX942-NEXT: s_mov_b32 s1, 0 +; GFX942-NEXT: v_readfirstlane_b32 s0, v0 +; GFX942-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] entry: %gep = getelementptr i8, ptr addrspace(4) %src, i64 %offset %gep.cast = addrspacecast ptr addrspace(4) %gep to ptr addrspace(6) @@ -465,29 +402,14 @@ entry: ; Test PTRADD handling in isMemSrcFromConstant. defi... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/145330 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits