On Sun, 2012-11-11 at 22:22 +0100, Vincent Lejeune wrote: > --- > lib/Target/AMDGPU/AMDILDevice.cpp | 4 +- > lib/Target/AMDGPU/R600ISelLowering.cpp | 69 > ++++++++++++++++++++++++---------- > lib/Target/AMDGPU/R600Instructions.td | 4 +- > 3 files changed, 54 insertions(+), 23 deletions(-) > > diff --git a/lib/Target/AMDGPU/AMDILDevice.cpp > b/lib/Target/AMDGPU/AMDILDevice.cpp > index 3955828..b440aa6 100644 > --- a/lib/Target/AMDGPU/AMDILDevice.cpp > +++ b/lib/Target/AMDGPU/AMDILDevice.cpp > @@ -129,8 +129,8 @@ std::string > AMDGPUDevice::getDataLayout() const > { > return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16" > - "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" > - "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" > + "-i32:128:128-i64:128:128-f32:128:128-f64:128:128-f80:128:128" > + "-v16:16:16-v24:32:32-v32:128:128-v48:128:128-v64:128:128" > "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" > "-v512:512:512-v1024:1024:1024-v2048:2048:2048" > "-n8:16:32:64"); > diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp > b/lib/Target/AMDGPU/R600ISelLowering.cpp > index 712dd3f..e6418b2 100644 > --- a/lib/Target/AMDGPU/R600ISelLowering.cpp > +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp > @@ -94,15 +94,22 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) > : > setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); > setOperationAction(ISD::VSELECT, MVT::v4i32, Expand); > // Legalize loads and stores to the private address space. > + setOperationAction(ISD::LOAD, MVT::f32, Custom); > setOperationAction(ISD::LOAD, MVT::i32, Custom); > + setOperationAction(ISD::LOAD, MVT::v2f32, Custom); > + setOperationAction(ISD::LOAD, MVT::v2i32, Custom); > setOperationAction(ISD::LOAD, MVT::v4f32, Custom); > setOperationAction(ISD::LOAD, MVT::v4i32, Custom); > setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom); > setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom); > setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom); > setOperationAction(ISD::STORE, MVT::i8, Custom); > + setOperationAction(ISD::STORE, MVT::f32, Custom); > setOperationAction(ISD::STORE, MVT::i32, Custom); > + setOperationAction(ISD::STORE, MVT::v2f32, Custom); > + setOperationAction(ISD::STORE, MVT::v2i32, Custom); > setOperationAction(ISD::STORE, MVT::v4f32, Custom); > + setOperationAction(ISD::STORE, MVT::v4i32, Custom); > > setOperationAction(ISD::FrameIndex, MVT::i32, Custom); > > @@ -522,6 +529,17 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, > switch (N->getOpcode()) { > default: return; > case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), > DAG));
Does this and should this fall through? > + case ISD::LOAD: { > + SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode(); > + Results.push_back(SDValue(Node, 0)); > + Results.push_back(SDValue(Node, 1)); > + return; > + } > + case ISD::STORE: > + SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode(); > + Results.push_back(SDValue(Node, 0)); > + Results.push_back(SDValue(Node, 1)); > + return; > } > } > > @@ -818,26 +836,32 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, > SelectionDAG &DAG) const > return SDValue(); > } > > + // LLVM generates byte-addresing pointers, but we need to convert this to a > + // register index. Each register holds 16 bytes (4 x 32), so in order to > + // get the register index, we need to divide the pointer by 16. > + Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr, > + DAG.getConstant(4, MVT::i32)); > + > if (VT.isVector()) { > + unsigned NumElemVT = VT.getVectorNumElements(); > EVT ElemVT = VT.getVectorElementType(); > SDValue Loads[4]; > - // LLVM generates byte-addresing pointers, but we need to convert this > to a > - // register index. Each register holds 16 bytes (4 x 32), so in order to > - // get the register index, we need to divide the pointer by 16. > - Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr, > - DAG.getConstant(4, MVT::i32)); > > - for (unsigned i = 0; i < 4; ++i) { > + for (unsigned i = 0; i < NumElemVT; ++i) { > Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT, > Chain, Ptr, > - DAG.getTargetConstant(i, MVT::i32), // Channel > + DAG.getConstant(i, MVT::i32), // Channel > Op.getOperand(2)); > } > - LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Loads, 4); > + for (unsigned i = NumElemVT; i < 4; ++i) { > + Loads[i] = DAG.getUNDEF(ElemVT); > + } > + EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4); > + LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4); > } else { > LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT, > Chain, Ptr, > - DAG.getTargetConstant(0, MVT::i32), // Channel > + DAG.getConstant(0, MVT::i32), // Channel > Op.getOperand(2)); > } > > @@ -863,32 +887,39 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, > SelectionDAG &DAG) const > return SDValue(); > } > > + // LLVM generates byte-addresing pointers, but we need to convert this to a > + // register index. Each register holds 16 bytes (4 x 32), so in order to > + // get the register index, we need to divide the pointer by 16. > + Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr, > + DAG.getConstant(4, MVT::i32)); > + > if (VT.isVector()) { > + unsigned NumElemVT = VT.getVectorNumElements(); > EVT ElemVT = VT.getVectorElementType(); > SDValue Stores[4]; > > - // LLVM generates byte-addresing pointers, but we need to convert this > to a > - // register index. Each register holds 16 bytes (4 x 32), so in order to > - // get the register index, we need to divide the pointer by 16. > - Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr, > - DAG.getConstant(4, MVT::i32)); > - > - for (unsigned i = 0; i < 4; ++i) { > + for (unsigned i = 0; i < NumElemVT; ++i) { > SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, > Value, DAG.getConstant(i, MVT::i32)); > > Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, > Chain, Elem, Ptr, > - DAG.getTargetConstant(i, MVT::i32)); // Channel > + DAG.getConstant(i, MVT::i32)); // Channel > MFI->IndirectChannels.set(i); > } > - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, 4); > + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, > NumElemVT); > } else { > if (VT == MVT::i8) { > Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value); > } > + // We can go here with f32 elements from a v3f32. > + // Such elements have a non 16 bytes aligned addresses that we can use > + SDValue Channel = DAG.getNode(ISD::AND, DL, MVT::i32, Op.getOperand(2), > + DAG.getConstant(15, MVT::i32)); > + Channel = DAG.getNode(ISD::SRL, DL, MVT::i32, Channel, > + DAG.getConstant(2, MVT::i32)); > Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, > Value, Ptr, > - DAG.getTargetConstant(0, MVT::i32)); // Channel > + Channel); // Channel > MFI->IndirectChannels.set(0); > } > > diff --git a/lib/Target/AMDGPU/R600Instructions.td > b/lib/Target/AMDGPU/R600Instructions.td > index d081824..09183e8 100644 > --- a/lib/Target/AMDGPU/R600Instructions.td > +++ b/lib/Target/AMDGPU/R600Instructions.td > @@ -1389,14 +1389,14 @@ class RegisterLoad <ValueType vt> : InstR600 <0x0, > (outs R600_Reg32:$dst), (ins FRAMEri:$addr, i32imm:$chan), > "RegisterLoad $dst, $addr", > [(set (vt R600_Reg32:$dst), (REGISTER_LOAD ADDRIndirect:$addr, > - (i32 timm:$chan)))], > + (i32 imm:$chan)))], > NullALU > >; > > class RegisterStore <ValueType vt> : InstR600 <0x0, > (outs), (ins R600_Reg32:$val, FRAMEri:$addr, i32imm:$chan), > "RegisterStore_i32 $val, $addr", > - [(REGISTER_STORE (vt R600_Reg32:$val), ADDRIndirect:$addr, (i32 > timm:$chan))], > + [(REGISTER_STORE (vt R600_Reg32:$val), ADDRIndirect:$addr, (i32 > imm:$chan))], > NullALU > >; > > -- > 1.7.11.7 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev Matthew
signature.asc
Description: This is a digitally signed message part
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev