Author: Jeffrey Byrnes Date: 2022-10-13T14:13:27-07:00 New Revision: f431123ac5be268c4707d7f16878039c6051e71c
URL: https://github.com/llvm/llvm-project/commit/f431123ac5be268c4707d7f16878039c6051e71c DIFF: https://github.com/llvm/llvm-project/commit/f431123ac5be268c4707d7f16878039c6051e71c.diff LOG: resolved issues with ret v2i8 Added: Modified: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp llvm/lib/Target/AMDGPU/SIISelLowering.cpp Removed: ################################################################################ diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 528ee108408f..081d8d96c9e2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -958,6 +958,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { /// Return a legal replacement for the given operation, with all legal operands. void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { + errs() << "\n\n\nDAG BEFORE\n"; + DAG.dump(); + errs() << "\n"; + LLVM_DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); // Allow illegal target nodes and illegal registers. @@ -1310,10 +1314,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n"); return; case TargetLowering::Custom: + errs() << "from legalizeDAG.cpp\n"; LLVM_DEBUG(dbgs() << "Trying custom legalization\n"); // FIXME: The handling for custom lowering with multiple results is // a complete mess. if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { + errs() << "TLI.LowerOperation returned\n"; if (!(Res.getNode() != Node || Res.getResNo() != 0)) return; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 95cd5371814e..5573acb5f6e5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1131,6 +1131,7 @@ SDValue SelectionDAGBuilder::getControlRoot() { void SelectionDAGBuilder::visit(const Instruction &I) { // Set up outgoing PHI node register values before emitting the terminator. if (I.isTerminator()) { + errs() << "Is terminator\n"; HandlePHINodesInSuccessorBlocks(I.getParent()); } @@ -1149,6 +1150,7 @@ void SelectionDAGBuilder::visit(const Instruction &I) { DAG, [&](SDNode *) { NodeInserted = true; }); } + errs() << "calling visit with opcode " << I.getOpcodeName() << "\n"; visit(I.getOpcode(), I); if (!I.isTerminator() && !HasTailCall && @@ -1936,6 +1938,7 @@ void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) { } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { + errs() << "in visitRet\n"; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto &DL = DAG.getDataLayout(); SDValue Chain = getControlRoot(); @@ -1955,6 +1958,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { } if (!FuncInfo.CanLowerReturn) { + errs() << "!CanLowerReturn\n"; unsigned DemoteReg = FuncInfo.DemoteRegister; const Function *F = I.getParent()->getParent(); @@ -1998,9 +2002,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, Chains); } else if (I.getNumOperands() != 0) { + errs() << "CanReturn && NumOpers !=0\n"; SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); + errs() << "NumValues: " << NumValues << "\n"; if (NumValues) { SDValue RetOp = getValue(I.getOperand(0)); @@ -2027,9 +2033,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { CallingConv::ID CC = F->getCallingConv(); + errs() << "calling getNumRegs for CallConv\n"; unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT); MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT); SmallVector<SDValue, 4> Parts(NumParts); + errs() << "Calling getCopyToParts with NumParts: " << NumParts << "\n"; getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), &Parts[0], NumParts, PartVT, &I, CC, ExtendKind); @@ -2067,6 +2075,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { } } + errs() << "Made it passed end of condition\n"; + // Push in swifterror virtual register as the last element of Outs. This makes // sure swifterror virtual register will be returned in the swifterror // physical register. @@ -2086,6 +2096,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { EVT(TLI.getPointerTy(DL)))); } + errs() << "doing calling conv stuff\n"; bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg(); CallingConv::ID CallConv = DAG.getMachineFunction().getFunction().getCallingConv(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 52b8eee7bcc7..35b099e8f1f0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -371,6 +371,7 @@ static void computeUsesMSVCFloatingPoint(const Triple &TT, const Function &F, } bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { + errs() << "SelectionDAGIsel ROMF\n"; // If we already selected that function, we do not need to run SDISel. if (mf.getProperties().hasProperty( MachineFunctionProperties::Property::Selected)) @@ -673,8 +674,12 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, // Lower the instructions. If a call is emitted as a tail call, cease emitting // nodes for this block. for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) { - if (!ElidedArgCopyInstrs.count(&*I)) + if (!ElidedArgCopyInstrs.count(&*I)) { + errs() << "SelectBB :SDB visit:"; + I->dump(); + errs() << "\n"; SDB->visit(*I); + } } // Make sure the root of the DAG is up-to-date. @@ -1342,6 +1347,7 @@ static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) { } void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { + errs() << "Select ALL basic blocks\n"; FastISelFailed = false; // Initialize the Fast-ISel state, if needed. FastISel *FastIS = nullptr; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index 523788106db6..86683544a4c9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -22,14 +22,14 @@ def CC_SI_Gfx : CallingConv<[ // 32 is reserved for the stack pointer // 33 is reserved for the frame pointer // 34 is reserved for the base pointer - CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8] , CCAssignToReg<[ + CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8, v2i8] , CCAssignToReg<[ SGPR4, SGPR5, SGPR6, SGPR7, SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29 ]>>>, - CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8] , CCAssignToReg<[ + CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8, v2i8] , CCAssignToReg<[ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, @@ -43,7 +43,7 @@ def RetCC_SI_Gfx : CallingConv<[ CCIfType<[i1], CCPromoteToType<i32>>, CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>, - CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8] , CCAssignToReg<[ + CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8, v2i8] , CCAssignToReg<[ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, @@ -66,7 +66,7 @@ def RetCC_SI_Gfx : CallingConv<[ def CC_SI_SHADER : CallingConv<[ - CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8] , CCAssignToReg<[ + CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8, v2i8] , CCAssignToReg<[ SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, @@ -76,7 +76,7 @@ def CC_SI_SHADER : CallingConv<[ ]>>>, // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs. - CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8] , CCAssignToReg<[ + CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8, v2i8] , CCAssignToReg<[ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, @@ -99,7 +99,7 @@ def CC_SI_SHADER : CallingConv<[ def RetCC_SI_Shader : CallingConv<[ CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>, - CCIfType<[i32, i16, v4i8] , CCAssignToReg<[ + CCIfType<[i32, i16, v4i8, v2i8] , CCAssignToReg<[ SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, @@ -183,19 +183,19 @@ def CC_AMDGPU_Func : CallingConv<[ CCIfByVal<CCPassByVal<4, 4>>, CCIfType<[i1], CCPromoteToType<i32>>, CCIfType<[i8, i16], CCIfExtend<CCPromoteToType<i32>>>, - CCIfType<[i32, f32, i16, f16, v2i16, v2f16, v4i8, i1], CCAssignToReg<[ + CCIfType<[i32, f32, i16, f16, v2i16, v2f16, v4i8, v2i8, i1], CCAssignToReg<[ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, - CCIfType<[i32, f32, v2i16, v2f16, i16, f16, v4i8, i1], CCAssignToStack<4, 4>> + CCIfType<[i32, f32, v2i16, v2f16, i16, f16, v4i8, v2i8, i1], CCAssignToStack<4, 4>> ]>; // Calling convention for leaf functions def RetCC_AMDGPU_Func : CallingConv<[ CCIfType<[i1], CCPromoteToType<i32>>, CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>, - CCIfType<[i32, f32, i16, f16, v2i16, v2f16, v4i8], CCAssignToReg<[ + CCIfType<[i32, f32, i16, f16, v2i16, v2f16, v4i8, v2i8], CCAssignToReg<[ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 303cbe5657bc..62ac1fcd95ce 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -124,6 +124,7 @@ AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel( } bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { + errs() << "amdgpuisel ROMF\n"; #ifdef EXPENSIVE_CHECKS DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 9980e851f982..d814733580b6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1141,6 +1141,7 @@ SDValue AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + errs() << "AMDGPUTL::LowerOp\n"; switch (Op.getOpcode()) { default: Op->print(errs(), &DAG); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f92bde72867a..f9638eda346d 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -85,6 +85,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v4i8, &AMDGPU::SReg_32RegClass); addRegisterClass(MVT::v2i8, &AMDGPU::SReg_32RegClass); + addRegisterClass(MVT::i8, &AMDGPU::SReg_32RegClass); addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass); @@ -651,7 +652,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECTOR_SHUFFLE, {MVT::v4f16, MVT::v4i16, MVT::v8f16, MVT::v8i16, - MVT::v16f16, MVT::v16i16, MVT::v4i8}, + MVT::v16f16, MVT::v16i16, MVT::v4i8, MVT::v2i8}, Custom); for (MVT VT : {MVT::v4i16, MVT::v8i16, MVT::v16i16}) @@ -851,7 +852,7 @@ unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, EVT VT) const { if (CC == CallingConv::AMDGPU_KERNEL) return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); - + if (VT.isVector()) { unsigned NumElts = VT.getVectorNumElements(); EVT ScalarVT = VT.getScalarType(); @@ -859,7 +860,7 @@ unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, // FIXME: Should probably promote 8-bit vectors to i16. if (Size == 8 && Subtarget->has16BitInsts()) - return (NumElts + 1) / 4; + return (NumElts + 2) / 4; if (Size == 16 && Subtarget->has16BitInsts()) return (NumElts + 1) / 2; @@ -4670,6 +4671,10 @@ SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op, SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + errs() << "SITL::LowerOp on op\n"; + Op.dump(); + errs() << "\n"; + switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); @@ -4703,6 +4708,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INSERT_VECTOR_ELT: return lowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: + errs() << "calling lowerEVE\n"; return lowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); @@ -5779,6 +5785,8 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, SDLoc SL(Op); + errs() << "in lowerEVE\n"; + EVT ResultVT = Op.getValueType(); SDValue Vec = Op.getOperand(0); @@ -5879,6 +5887,7 @@ static bool elementPairIsContiguous(ArrayRef<int> Mask, int Elt) { SDValue SITargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { + errs() << "in SIISelLowering lowerVECTOR_SHUFFLE\n"; SDLoc SL(Op); EVT ResultVT = Op.getValueType(); ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); @@ -5887,6 +5896,7 @@ SDValue SITargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, EVT EltVT; auto ScalarSize = ResultVT.getVectorElementType().getSizeInBits() ; if (ScalarSize == 8) { + // PackVT is PackVT = MVT::v2i8; } else { @@ -5958,6 +5968,8 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op, SDLoc SL(Op); EVT VT = Op.getValueType(); + errs() << "in lowerBuild_Vector with VT: " << VT.getEVTString() << "\n"; + if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 || VT == MVT::v8f16) { EVT HalfVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(), @@ -6013,7 +6025,8 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op, EVT BCVT = (SrcVT) == MVT::f16 ? MVT::i16 : SrcVT; unsigned VecSize = VT.getSizeInBits(); // 16, 32 - EVT EltVT = SrcVT.getVectorElementType(); + EVT EltVT = SrcVT.isVector() ? SrcVT.getVectorElementType() : SrcVT.getScalarType(); + unsigned EltSize = EltVT.getSizeInBits(); SDValue ScaleFactor = DAG.getConstant(Log2_32(EltSize), SL, MVT::i32); _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits