load

Matthew W.S. Bell Sun, 11 Nov 2012 22:27:59 -0800

On Sun, 2012-11-11 at 22:22 +0100, Vincent Lejeune wrote:
> ---
>  lib/Target/AMDGPU/AMDILDevice.cpp      |  4 +-
>  lib/Target/AMDGPU/R600ISelLowering.cpp | 69 
> ++++++++++++++++++++++++----------
>  lib/Target/AMDGPU/R600Instructions.td  |  4 +-
>  3 files changed, 54 insertions(+), 23 deletions(-)
> 
> diff --git a/lib/Target/AMDGPU/AMDILDevice.cpp 
> b/lib/Target/AMDGPU/AMDILDevice.cpp
> index 3955828..b440aa6 100644
> --- a/lib/Target/AMDGPU/AMDILDevice.cpp
> +++ b/lib/Target/AMDGPU/AMDILDevice.cpp
> @@ -129,8 +129,8 @@ std::string
>  AMDGPUDevice::getDataLayout() const
>  {
>      return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
> -      "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
> -      "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
> +      "-i32:128:128-i64:128:128-f32:128:128-f64:128:128-f80:128:128"
> +      "-v16:16:16-v24:32:32-v32:128:128-v48:128:128-v64:128:128"
>        "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
>        "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
>        "-n8:16:32:64");
> diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp 
> b/lib/Target/AMDGPU/R600ISelLowering.cpp
> index 712dd3f..e6418b2 100644
> --- a/lib/Target/AMDGPU/R600ISelLowering.cpp
> +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
> @@ -94,15 +94,22 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) 
> :
>    setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
>    setOperationAction(ISD::VSELECT, MVT::v4i32, Expand);
>    // Legalize loads and stores to the private address space.
> +  setOperationAction(ISD::LOAD, MVT::f32, Custom);
>    setOperationAction(ISD::LOAD, MVT::i32, Custom);
> +  setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
> +  setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
>    setOperationAction(ISD::LOAD, MVT::v4f32, Custom);
>    setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
>    setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
>    setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
>    setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
>    setOperationAction(ISD::STORE, MVT::i8, Custom);
> +  setOperationAction(ISD::STORE, MVT::f32, Custom);
>    setOperationAction(ISD::STORE, MVT::i32, Custom);
> +  setOperationAction(ISD::STORE, MVT::v2f32, Custom);
> +  setOperationAction(ISD::STORE, MVT::v2i32, Custom);
>    setOperationAction(ISD::STORE, MVT::v4f32, Custom);
> +  setOperationAction(ISD::STORE, MVT::v4i32, Custom);
>  
>    setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
>  
> @@ -522,6 +529,17 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
>    switch (N->getOpcode()) {
>    default: return;
>    case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), 
> DAG));


Does this and should this fall through?

> +  case ISD::LOAD: {
> +    SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
> +    Results.push_back(SDValue(Node, 0));
> +    Results.push_back(SDValue(Node, 1));
> +    return;
> +  }
> +  case ISD::STORE:
> +    SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
> +    Results.push_back(SDValue(Node, 0));
> +    Results.push_back(SDValue(Node, 1));
> +    return;
>    }
>  }
>  
> @@ -818,26 +836,32 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, 
> SelectionDAG &DAG) const
>      return SDValue();
>    }
>  
> +  // LLVM generates byte-addresing pointers, but we need to convert this to a
> +  // register index.  Each register holds 16 bytes (4 x 32), so in order to
> +  // get the register index, we need to divide the pointer by 16.
> +  Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
> +                    DAG.getConstant(4, MVT::i32));
> +
>    if (VT.isVector()) {
> +    unsigned NumElemVT = VT.getVectorNumElements();
>      EVT ElemVT = VT.getVectorElementType();
>      SDValue Loads[4];
> -    // LLVM generates byte-addresing pointers, but we need to convert this 
> to a
> -    // register index.  Each register holds 16 bytes (4 x 32), so in order to
> -    // get the register index, we need to divide the pointer by 16.
> -    Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
> -                      DAG.getConstant(4, MVT::i32));
>  
> -    for (unsigned i = 0; i < 4; ++i) {
> +    for (unsigned i = 0; i < NumElemVT; ++i) {
>        Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
>                               Chain, Ptr,
> -                             DAG.getTargetConstant(i, MVT::i32), // Channel
> +                             DAG.getConstant(i, MVT::i32), // Channel
>                               Op.getOperand(2));
>      }
> -    LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Loads, 4);
> +    for (unsigned i = NumElemVT; i < 4; ++i) {
> +      Loads[i] = DAG.getUNDEF(ElemVT);
> +    }
> +    EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
> +    LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
>    } else {
>      LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
>                                Chain, Ptr,
> -                              DAG.getTargetConstant(0, MVT::i32), // Channel
> +                              DAG.getConstant(0, MVT::i32), // Channel
>                                Op.getOperand(2));
>    }
>  
> @@ -863,32 +887,39 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, 
> SelectionDAG &DAG) const
>      return SDValue();
>    }
>  
> +  // LLVM generates byte-addresing pointers, but we need to convert this to a
> +  // register index.  Each register holds 16 bytes (4 x 32), so in order to
> +  // get the register index, we need to divide the pointer by 16.
> +  Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
> +                    DAG.getConstant(4, MVT::i32));
> +
>    if (VT.isVector()) {
> +    unsigned NumElemVT = VT.getVectorNumElements();
>      EVT ElemVT = VT.getVectorElementType();
>      SDValue Stores[4];
>  
> -    // LLVM generates byte-addresing pointers, but we need to convert this 
> to a
> -    // register index.  Each register holds 16 bytes (4 x 32), so in order to
> -    // get the register index, we need to divide the pointer by 16.
> -    Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
> -                      DAG.getConstant(4, MVT::i32));
> -
> -    for (unsigned i = 0; i < 4; ++i) {
> +    for (unsigned i = 0; i < NumElemVT; ++i) {
>        SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
>                                   Value, DAG.getConstant(i, MVT::i32));
>  
>        Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
>                                Chain, Elem, Ptr,
> -                              DAG.getTargetConstant(i, MVT::i32)); // Channel
> +                              DAG.getConstant(i, MVT::i32)); // Channel
>        MFI->IndirectChannels.set(i);
>      }
> -     Chain =  DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, 4);
> +     Chain =  DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, 
> NumElemVT);
>     } else {
>      if (VT == MVT::i8) {
>        Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
>      }
> +    // We can go here with f32 elements from a v3f32.
> +    // Such elements have a non 16 bytes aligned addresses that we can use
> +    SDValue Channel = DAG.getNode(ISD::AND, DL, MVT::i32, Op.getOperand(2),
> +        DAG.getConstant(15, MVT::i32));
> +    Channel = DAG.getNode(ISD::SRL, DL, MVT::i32, Channel,
> +            DAG.getConstant(2, MVT::i32));
>      Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, 
> Value, Ptr,
> -    DAG.getTargetConstant(0, MVT::i32)); // Channel 
> +        Channel); // Channel 
>      MFI->IndirectChannels.set(0);
>    }
>  
> diff --git a/lib/Target/AMDGPU/R600Instructions.td 
> b/lib/Target/AMDGPU/R600Instructions.td
> index d081824..09183e8 100644
> --- a/lib/Target/AMDGPU/R600Instructions.td
> +++ b/lib/Target/AMDGPU/R600Instructions.td
> @@ -1389,14 +1389,14 @@ class RegisterLoad <ValueType vt> : InstR600 <0x0,
>    (outs R600_Reg32:$dst), (ins FRAMEri:$addr, i32imm:$chan),
>    "RegisterLoad $dst, $addr",
>    [(set (vt R600_Reg32:$dst), (REGISTER_LOAD ADDRIndirect:$addr,
> -                               (i32 timm:$chan)))],
> +                               (i32 imm:$chan)))],
>    NullALU
>  >;
>  
>  class RegisterStore <ValueType vt> : InstR600 <0x0,
>    (outs), (ins R600_Reg32:$val, FRAMEri:$addr, i32imm:$chan),
>    "RegisterStore_i32 $val, $addr",
> -  [(REGISTER_STORE (vt R600_Reg32:$val), ADDRIndirect:$addr, (i32 
> timm:$chan))],
> +  [(REGISTER_STORE (vt R600_Reg32:$val), ADDRIndirect:$addr, (i32 
> imm:$chan))],
>    NullALU
>  >;
>  
> -- 
> 1.7.11.7
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Matthew

signature.asc
Description: This is a digitally signed message part

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeon/llvm: support for f32, v2f32, v3f32 store/load

Reply via email to