On Fri, Feb 07, 2014 at 10:31:20AM -0800, Matt Arsenault wrote: > I didn't think to try this. Where is the address folding happening? >
There are TableGen patterns that do the folding. I recently added several new ones: r200932-r200935. -Tom > On 02/07/2014 07:46 AM, Tom Stellard wrote: > >From: Tom Stellard <[email protected]> > > > >--- > > lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 48 > > ++++++++++++++++++++++++++++++++++ > > lib/Target/R600/SIISelLowering.cpp | 29 -------------------- > > lib/Target/R600/SIISelLowering.h | 1 - > > test/CodeGen/R600/add.ll | 10 +++++++ > > test/CodeGen/R600/add_i64.ll | 23 +++++++++++----- > > 5 files changed, 75 insertions(+), 36 deletions(-) > > > >diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp > >b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp > >index a989135..fea875c 100644 > >--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp > >+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp > >@@ -200,6 +200,54 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { > > } > > switch (Opc) { > > default: break; > >+ // We are selecting i64 ADD here instead of custom lower it during > >+ // DAG legalization, so we can fold some i64 ADDs used for address > >+ // calculation into the LOAD and STORE instructions. > >+ case ISD::ADD: { > >+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); > >+ if (N->getValueType(0) != MVT::i64 || > >+ ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) > >+ break; > >+ > >+ SDLoc DL(N); > >+ SDValue LHS = N->getOperand(0); > >+ SDValue RHS = N->getOperand(1); > >+ > >+ SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); > >+ SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); > >+ > >+ SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, > >+ DL, MVT::i32, LHS, Sub0); > >+ SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, > >+ DL, MVT::i32, LHS, Sub1); > >+ > >+ SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, > >+ DL, MVT::i32, RHS, Sub0); > >+ SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, > >+ DL, MVT::i32, RHS, Sub1); > >+ > >+ SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); > >+ > >+ SmallVector<SDValue, 8> AddLoArgs; > >+ AddLoArgs.push_back(SDValue(Lo0, 0)); > >+ AddLoArgs.push_back(SDValue(Lo1, 0)); > >+ > >+ SDNode *AddLo = CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, > >+ VTList, AddLoArgs); > >+ SDValue Carry = SDValue(AddLo, 1); > >+ SDNode *AddHi = CurDAG->getMachineNode(AMDGPU::S_ADDC_U32, DL, > >+ MVT::i32, SDValue(Hi0, 0), > >+ SDValue(Hi1, 0), Carry); > >+ > >+ SDValue Args[5] = { > >+ CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), > >+ SDValue(AddLo,0), > >+ Sub0, > >+ SDValue(AddHi,0), > >+ Sub1, > >+ }; > >+ return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args, 5); > >+ } > > case ISD::BUILD_VECTOR: { > > unsigned RegClassID; > > const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); > >diff --git a/lib/Target/R600/SIISelLowering.cpp > >b/lib/Target/R600/SIISelLowering.cpp > >index 0a22d16..4d2f370 100644 > >--- a/lib/Target/R600/SIISelLowering.cpp > >+++ b/lib/Target/R600/SIISelLowering.cpp > >@@ -76,7 +76,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : > > setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand); > > setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand); > >- setOperationAction(ISD::ADD, MVT::i64, Legal); > > setOperationAction(ISD::ADD, MVT::i32, Legal); > > setOperationAction(ISD::ADDC, MVT::i32, Legal); > > setOperationAction(ISD::ADDE, MVT::i32, Legal); > >@@ -475,7 +474,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, > >SelectionDAG &DAG) const { > > SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); > > switch (Op.getOpcode()) { > > default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); > >- case ISD::ADD: return LowerADD(Op, DAG); > > case ISD::BRCOND: return LowerBRCOND(Op, DAG); > > case ISD::LOAD: { > > LoadSDNode *Load = dyn_cast<LoadSDNode>(Op); > >@@ -613,33 +611,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, > >SelectionDAG &DAG) const { > > return SDValue(); > > } > >-SDValue SITargetLowering::LowerADD(SDValue Op, > >- SelectionDAG &DAG) const { > >- if (Op.getValueType() != MVT::i64) > >- return SDValue(); > >- > >- SDLoc DL(Op); > >- SDValue LHS = Op.getOperand(0); > >- SDValue RHS = Op.getOperand(1); > >- > >- SDValue Zero = DAG.getConstant(0, MVT::i32); > >- SDValue One = DAG.getConstant(1, MVT::i32); > >- > >- SDValue Lo0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, Zero); > >- SDValue Hi0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, One); > >- > >- SDValue Lo1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, Zero); > >- SDValue Hi1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, One); > >- > >- SDVTList VTList = DAG.getVTList(MVT::i32, MVT::Glue); > >- > >- SDValue AddLo = DAG.getNode(ISD::ADDC, DL, VTList, Lo0, Lo1); > >- SDValue Carry = AddLo.getValue(1); > >- SDValue AddHi = DAG.getNode(ISD::ADDE, DL, VTList, Hi0, Hi1, Carry); > >- > >- return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, AddLo, > >AddHi.getValue(0)); > >-} > >- > > /// \brief Helper function for LowerBRCOND > > static SDNode *findUser(SDValue Value, unsigned Opcode) { > >diff --git a/lib/Target/R600/SIISelLowering.h > >b/lib/Target/R600/SIISelLowering.h > >index c9a43e6..ce94a13 100644 > >--- a/lib/Target/R600/SIISelLowering.h > >+++ b/lib/Target/R600/SIISelLowering.h > >@@ -31,7 +31,6 @@ class SITargetLowering : public AMDGPUTargetLowering { > > SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; > > SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; > > SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; > >- SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const; > > SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; > > SDValue ResourceDescriptorToi128(SDValue Op, SelectionDAG &DAG) const; > >diff --git a/test/CodeGen/R600/add.ll b/test/CodeGen/R600/add.ll > >index e4e7bc6..8de87f4 100644 > >--- a/test/CodeGen/R600/add.ll > >+++ b/test/CodeGen/R600/add.ll > >@@ -75,3 +75,13 @@ entry: > > store <8 x i32> %0, <8 x i32> addrspace(1)* %out > > ret void > > } > >+ > >+; FUNC-LABEL: @add64 > >+; SI-CHECK: S_ADD_I32 > >+; SI-CHECK: S_ADDC_U32 > >+define void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) { > >+entry: > >+ %0 = add i64 %a, %b > >+ store i64 %0, i64 addrspace(1)* %out > >+ ret void > >+} > >diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll > >index 303a1cb..82cd8a9 100644 > >--- a/test/CodeGen/R600/add_i64.ll > >+++ b/test/CodeGen/R600/add_i64.ll > >@@ -1,14 +1,13 @@ > >-; XFAIL: * > >-; This will fail until i64 add is enabled > >- > > ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s > >-declare i32 @llvm.SI.tid() readnone > >+declare i32 @llvm.r600.read.tidig.x() readnone > > ; SI-LABEL: @test_i64_vreg: > >+; SI: V_ADD_I32 > >+; SI: V_ADDC_U32 > > define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 > > addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) { > >- %tid = call i32 @llvm.SI.tid() readnone > >+ %tid = call i32 @llvm.r600.read.tidig.x() readnone > > %a_ptr = getelementptr i64 addrspace(1)* %inA, i32 %tid > > %b_ptr = getelementptr i64 addrspace(1)* %inB, i32 %tid > > %a = load i64 addrspace(1)* %a_ptr > >@@ -20,6 +19,8 @@ define void @test_i64_vreg(i64 addrspace(1)* noalias %out, > >i64 addrspace(1)* noa > > ; Check that the SGPR add operand is correctly moved to a VGPR. > > ; SI-LABEL: @sgpr_operand: > >+; SI: V_ADD_I32 > >+; SI: V_ADDC_U32 > > define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 > > addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) { > > %foo = load i64 addrspace(1)* %in, align 8 > > %result = add i64 %foo, %a > >@@ -31,6 +32,8 @@ define void @sgpr_operand(i64 addrspace(1)* noalias %out, > >i64 addrspace(1)* noal > > ; SGPR as other operand. > > ; > > ; SI-LABEL: @sgpr_operand_reversed: > >+; SI: V_ADD_I32 > >+; SI: V_ADDC_U32 > > define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 > > addrspace(1)* noalias %in, i64 %a) { > > %foo = load i64 addrspace(1)* %in, align 8 > > %result = add i64 %a, %foo > >@@ -40,6 +43,10 @@ define void @sgpr_operand_reversed(i64 addrspace(1)* > >noalias %out, i64 addrspace > > ; SI-LABEL: @test_v2i64_sreg: > >+; SI: S_ADD_I32 > >+; SI: S_ADDC_U32 > >+; SI: S_ADD_I32 > >+; SI: S_ADDC_U32 > > define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x > > i64> %a, <2 x i64> %b) { > > %result = add <2 x i64> %a, %b > > store <2 x i64> %result, <2 x i64> addrspace(1)* %out > >@@ -47,8 +54,12 @@ define void @test_v2i64_sreg(<2 x i64> addrspace(1)* > >noalias %out, <2 x i64> %a, > > } > > ; SI-LABEL: @test_v2i64_vreg: > >+; SI: V_ADD_I32 > >+; SI: V_ADDC_U32 > >+; SI: V_ADD_I32 > >+; SI: V_ADDC_U32 > > define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x > > i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) { > >- %tid = call i32 @llvm.SI.tid() readnone > >+ %tid = call i32 @llvm.r600.read.tidig.x() readnone > > %a_ptr = getelementptr <2 x i64> addrspace(1)* %inA, i32 %tid > > %b_ptr = getelementptr <2 x i64> addrspace(1)* %inB, i32 %tid > > %a = load <2 x i64> addrspace(1)* %a_ptr > > _______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
