Author: Jeffrey Byrnes Date: 2022-10-17T15:29:54-07:00 New Revision: 37c65ebbcc0b7106fba7bb791a36d7ddabc60ece
URL: https://github.com/llvm/llvm-project/commit/37c65ebbcc0b7106fba7bb791a36d7ddabc60ece DIFF: https://github.com/llvm/llvm-project/commit/37c65ebbcc0b7106fba7bb791a36d7ddabc60ece.diff LOG: legalize IVE, v2i8, v4i8 Added: Modified: llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp llvm/lib/Target/AMDGPU/SIISelLowering.cpp llvm/lib/Target/AMDGPU/SIInstructions.td Removed: ################################################################################ diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 62ac1fcd95ce..37d907059687 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -577,6 +577,10 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { break; } + if (VT.getScalarSizeInBits() == 8) { + break; + } + assert(VT.getVectorElementType().bitsEq(MVT::i32)); unsigned RegClassID = SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID(); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index c32f92cd0da0..f25bcdd28d9c 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -171,7 +171,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32, MVT::i1, MVT::v32i32}, Custom); - + + //setTruncStoreAction(MVT::i8, MVT::i32, Expand); setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); setTruncStoreAction(MVT::v3i32, MVT::v3i16, Expand); setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); @@ -5729,6 +5730,12 @@ SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, if (NumElts == 4 && EltSize == 16 && KIdx) { + //errs() << "special case for v4i16\n"; + //errs() << "VecVT, Op1VT, EltVT: "; + errs() << VecVT.getEVTString() << " " << InsVal.getValueType().getEVTString() << " "; + errs() << EltVT.getEVTString() << "\n"; + + SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Vec); SDValue LoHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BCVec, @@ -5755,6 +5762,46 @@ SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, return DAG.getNode(ISD::BITCAST, SL, VecVT, Concat); } + if (NumElts == 4 && EltSize == 8 && KIdx) { + errs() << "special case for v4i8\n"; + errs() << "VecVT, Op1VT, EltVT: "; + errs() << VecVT.getEVTString() << " " << InsVal.getValueType().getEVTString() << " "; + errs() << EltVT.getEVTString() << "\n"; + + + errs() << "First bitcast\n"; + SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, Vec); + + SDValue LoHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i16, BCVec, + DAG.getConstant(0, SL, MVT::i32)); + SDValue HiHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i16, BCVec, + DAG.getConstant(1, SL, MVT::i32)); + + errs() << "Second bitcast\n"; + SDValue LoVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i8, LoHalf); + errs() << "Third bitcast\n"; + SDValue HiVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i8, HiHalf); + + unsigned Idx = KIdx->getZExtValue(); + bool InsertLo = Idx < 2; + SDValue InsHalf = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, MVT::v2i8, + InsertLo ? LoVec : HiVec, + DAG.getNode(ISD::BITCAST, SL, MVT::i8, InsVal), + DAG.getConstant(InsertLo ? Idx : (Idx - 2), SL, MVT::i32)); + + errs() << "Fourth bitcast\n"; + InsHalf = DAG.getNode(ISD::BITCAST, SL, MVT::i16, InsHalf); + + SDValue Concat = InsertLo ? + DAG.getBuildVector(MVT::v2i16, SL, { InsHalf, HiHalf }) : + DAG.getBuildVector(MVT::v2i16, SL, { LoHalf, InsHalf }); + + return DAG.getNode(ISD::BITCAST, SL, VecVT, Concat); + } + + + + // Static indexing does not lower to stack access, and hence there is no need // for special custom lowering to avoid stack access. if (isa<ConstantSDNode>(Idx)) @@ -5885,11 +5932,12 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result); } +/* if (ResultVT == MVT::i8) { SDValue Result = DAG.getNode(ISD::TRUNCATE, SL, MVT::i8, Elt); return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result); } - +*/ return DAG.getAnyExtOrTrunc(Elt, SL, ResultVT); } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index b0bf6aca56b5..f9129eaf3828 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2400,6 +2400,8 @@ def : GCNPat < (S_CMP_EQ_U32 (S_AND_B32 (i32 1), $a), (i32 1)) >; + + def : GCNPat < (i1 (UniformUnaryFrag<trunc> i16:$a)), (S_CMP_EQ_U32 (S_AND_B32 (i32 1), $a), (i32 1)) @@ -2794,9 +2796,9 @@ def : GCNPat < (v2i8 (V_LSHLREV_B32_e64 (i8 8), SReg_32:$src1)) >; +/* def : GCNPat < - (v4i8 (build_vector (i8 SReg_32:$src0), (i8 SReg_32:$src1), (i8 SReg_32:$src2), (i8 SReg_32:$src3))), - + (v4i8 (build_vector (i8 SReg_32:$src0), (i8 SReg_32:$src1), (i8 SReg_32:$src2), (i8 SReg_32:$src3))), (v4i8 @@ -2833,32 +2835,8 @@ def : GCNPat < ) >; - -/* -def : GCNPat < - (v4i8 (build_vector (i8 SReg_32:$src0), (i8 SReg_32:$src1), (i8 SReg_32:$src2), (i8 SReg_32:$src3))), - (v4i8 (i32 (V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src3, (i32 24))), (i32 (V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src2, (i32 16))), (i32 (V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src1, (i32 8))), SReg_32:$src0))))))) ->; -*/ -/* -def : GCNPat < - (v2i8 (build_vector (i8:$src0), (i8:$src1))), - (v2i8 (i16 (V_OR_B32_e64 (i16 (S_LSHL_B32 SReg_32:$src1, (i32 8))), SReg_32:$src0))) ->; - - -def : GCNPat < - (v2i8 (build_vector i8:$src0, (i8 undef))), - (COPY $src0) ->; - -def : GCNPat < - (v2i8 (DivergentBinFrag<build_vector> (i8 undef), (i8 SReg_32:$src1))), - (V_LSHLREV_B32_e64 (i32 16), SReg_32:$src1) ->; */ - foreach Ty = [i16, f16] in { defvar vecTy = !if(!eq(Ty, i16), v2i16, v2f16); _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits