--- lib/Target/AMDGPU/AMDILDevice.cpp | 4 +- lib/Target/AMDGPU/R600ISelLowering.cpp | 69 ++++++++++++++++++++++++---------- lib/Target/AMDGPU/R600Instructions.td | 4 +- 3 files changed, 54 insertions(+), 23 deletions(-)
diff --git a/lib/Target/AMDGPU/AMDILDevice.cpp b/lib/Target/AMDGPU/AMDILDevice.cpp index 3955828..b440aa6 100644 --- a/lib/Target/AMDGPU/AMDILDevice.cpp +++ b/lib/Target/AMDGPU/AMDILDevice.cpp @@ -129,8 +129,8 @@ std::string AMDGPUDevice::getDataLayout() const { return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16" - "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" - "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" + "-i32:128:128-i64:128:128-f32:128:128-f64:128:128-f80:128:128" + "-v16:16:16-v24:32:32-v32:128:128-v48:128:128-v64:128:128" "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" "-v512:512:512-v1024:1024:1024-v2048:2048:2048" "-n8:16:32:64"); diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 712dd3f..e6418b2 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -94,15 +94,22 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); setOperationAction(ISD::VSELECT, MVT::v4i32, Expand); // Legalize loads and stores to the private address space. + setOperationAction(ISD::LOAD, MVT::f32, Custom); setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::LOAD, MVT::v2f32, Custom); + setOperationAction(ISD::LOAD, MVT::v2i32, Custom); setOperationAction(ISD::LOAD, MVT::v4f32, Custom); setOperationAction(ISD::LOAD, MVT::v4i32, Custom); setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom); setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom); setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom); setOperationAction(ISD::STORE, MVT::i8, Custom); + setOperationAction(ISD::STORE, MVT::f32, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::v2f32, Custom); + setOperationAction(ISD::STORE, MVT::v2i32, Custom); setOperationAction(ISD::STORE, MVT::v4f32, Custom); + setOperationAction(ISD::STORE, MVT::v4i32, Custom); setOperationAction(ISD::FrameIndex, MVT::i32, Custom); @@ -522,6 +529,17 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: return; case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); + case ISD::LOAD: { + SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode(); + Results.push_back(SDValue(Node, 0)); + Results.push_back(SDValue(Node, 1)); + return; + } + case ISD::STORE: + SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode(); + Results.push_back(SDValue(Node, 0)); + Results.push_back(SDValue(Node, 1)); + return; } } @@ -818,26 +836,32 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const return SDValue(); } + // LLVM generates byte-addresing pointers, but we need to convert this to a + // register index. Each register holds 16 bytes (4 x 32), so in order to + // get the register index, we need to divide the pointer by 16. + Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(4, MVT::i32)); + if (VT.isVector()) { + unsigned NumElemVT = VT.getVectorNumElements(); EVT ElemVT = VT.getVectorElementType(); SDValue Loads[4]; - // LLVM generates byte-addresing pointers, but we need to convert this to a - // register index. Each register holds 16 bytes (4 x 32), so in order to - // get the register index, we need to divide the pointer by 16. - Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(4, MVT::i32)); - for (unsigned i = 0; i < 4; ++i) { + for (unsigned i = 0; i < NumElemVT; ++i) { Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT, Chain, Ptr, - DAG.getTargetConstant(i, MVT::i32), // Channel + DAG.getConstant(i, MVT::i32), // Channel Op.getOperand(2)); } - LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Loads, 4); + for (unsigned i = NumElemVT; i < 4; ++i) { + Loads[i] = DAG.getUNDEF(ElemVT); + } + EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4); + LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4); } else { LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT, Chain, Ptr, - DAG.getTargetConstant(0, MVT::i32), // Channel + DAG.getConstant(0, MVT::i32), // Channel Op.getOperand(2)); } @@ -863,32 +887,39 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const return SDValue(); } + // LLVM generates byte-addresing pointers, but we need to convert this to a + // register index. Each register holds 16 bytes (4 x 32), so in order to + // get the register index, we need to divide the pointer by 16. + Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(4, MVT::i32)); + if (VT.isVector()) { + unsigned NumElemVT = VT.getVectorNumElements(); EVT ElemVT = VT.getVectorElementType(); SDValue Stores[4]; - // LLVM generates byte-addresing pointers, but we need to convert this to a - // register index. Each register holds 16 bytes (4 x 32), so in order to - // get the register index, we need to divide the pointer by 16. - Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(4, MVT::i32)); - - for (unsigned i = 0; i < 4; ++i) { + for (unsigned i = 0; i < NumElemVT; ++i) { SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value, DAG.getConstant(i, MVT::i32)); Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Elem, Ptr, - DAG.getTargetConstant(i, MVT::i32)); // Channel + DAG.getConstant(i, MVT::i32)); // Channel MFI->IndirectChannels.set(i); } - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, 4); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT); } else { if (VT == MVT::i8) { Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value); } + // We can go here with f32 elements from a v3f32. + // Such elements have a non 16 bytes aligned addresses that we can use + SDValue Channel = DAG.getNode(ISD::AND, DL, MVT::i32, Op.getOperand(2), + DAG.getConstant(15, MVT::i32)); + Channel = DAG.getNode(ISD::SRL, DL, MVT::i32, Channel, + DAG.getConstant(2, MVT::i32)); Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr, - DAG.getTargetConstant(0, MVT::i32)); // Channel + Channel); // Channel MFI->IndirectChannels.set(0); } diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index d081824..09183e8 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -1389,14 +1389,14 @@ class RegisterLoad <ValueType vt> : InstR600 <0x0, (outs R600_Reg32:$dst), (ins FRAMEri:$addr, i32imm:$chan), "RegisterLoad $dst, $addr", [(set (vt R600_Reg32:$dst), (REGISTER_LOAD ADDRIndirect:$addr, - (i32 timm:$chan)))], + (i32 imm:$chan)))], NullALU >; class RegisterStore <ValueType vt> : InstR600 <0x0, (outs), (ins R600_Reg32:$val, FRAMEri:$addr, i32imm:$chan), "RegisterStore_i32 $val, $addr", - [(REGISTER_STORE (vt R600_Reg32:$val), ADDRIndirect:$addr, (i32 timm:$chan))], + [(REGISTER_STORE (vt R600_Reg32:$val), ADDRIndirect:$addr, (i32 imm:$chan))], NullALU >; -- 1.7.11.7 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev