Hi Vincent, >From now on, please cc llvm-comm...@cs.uiuc.edu when you submit a patch. I'm cc'ing that list now.
This looks OK to me at first glance, but I would like to test it with compute shaders before you merge it. On Mon, Feb 18, 2013 at 05:27:30PM +0100, Vincent Lejeune wrote: > From: Vadim Girlin <vadimgir...@gmail.com> > > This is a skeleton for a pre-RA MachineInstr scheduler strategy. Currently > it only tries to expose more parallelism for ALU instructions (this also > makes the distribution of GPR channels more uniform and increases the > chances of ALU instructions to be packed together in a single VLIW group). > Also it tries to reduce clause switching by grouping instruction of the > same kind (ALU/FETCH/CF) together. > > Vincent Lejeune: > - Support for VLIW4 Slot assignement > - Recomputation of ScheduleDAG to get more parallelism opportunities > > Tom Stellard: > - Fix assertion failure when trying to determine an instruction's slot > based on its destination register's class > - Fix some compiler warnings > > Vincent Lejeune: [v2] > - Remove recomputation of ScheduleDAG (will be provided in a later patch) > - Improve estimation of an ALU clause size so that heuristic does not emit cf > instructions at the wrong position. > - Make schedule heuristic smarter using SUnit Depth > - Take constant read limitations into account > --- > lib/Target/R600/AMDGPUTargetMachine.cpp | 17 +- > lib/Target/R600/R600MachineScheduler.cpp | 483 > +++++++++++++++++++++++++++++++ > lib/Target/R600/R600MachineScheduler.h | 121 ++++++++ > test/CodeGen/R600/fdiv.v4f32.ll | 6 +- > 4 files changed, 623 insertions(+), 4 deletions(-) > create mode 100644 lib/Target/R600/R600MachineScheduler.cpp > create mode 100644 lib/Target/R600/R600MachineScheduler.h > > diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp > b/lib/Target/R600/AMDGPUTargetMachine.cpp > index 70b34b0..eb58853 100644 > --- a/lib/Target/R600/AMDGPUTargetMachine.cpp > +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp > @@ -17,6 +17,7 @@ > #include "AMDGPU.h" > #include "R600ISelLowering.h" > #include "R600InstrInfo.h" > +#include "R600MachineScheduler.h" > #include "SIISelLowering.h" > #include "SIInstrInfo.h" > #include "llvm/Analysis/Passes.h" > @@ -39,6 +40,14 @@ extern "C" void LLVMInitializeR600Target() { > RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget); > } > > +static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) > { > + return new ScheduleDAGMI(C, new R600SchedStrategy()); > +} > + > +static MachineSchedRegistry > +SchedCustomRegistry("r600", "Run R600's custom scheduler", > + createR600MachineScheduler); > + > AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, > StringRef CPU, StringRef FS, > TargetOptions Options, > @@ -70,7 +79,13 @@ namespace { > class AMDGPUPassConfig : public TargetPassConfig { > public: > AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM) > - : TargetPassConfig(TM, PM) {} > + : TargetPassConfig(TM, PM) { > + const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); > + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { > + enablePass(&MachineSchedulerID); > + MachineSchedRegistry::setDefault(createR600MachineScheduler); > + } > + } > > AMDGPUTargetMachine &getAMDGPUTargetMachine() const { > return getTM<AMDGPUTargetMachine>(); > diff --git a/lib/Target/R600/R600MachineScheduler.cpp > b/lib/Target/R600/R600MachineScheduler.cpp > new file mode 100644 > index 0000000..efd9490 > --- /dev/null > +++ b/lib/Target/R600/R600MachineScheduler.cpp > @@ -0,0 +1,483 @@ > +//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ > -*-----===// > +// > +// The LLVM Compiler Infrastructure > +// > +// This file is distributed under the University of Illinois Open Source > +// License. See LICENSE.TXT for details. > +// > +//===----------------------------------------------------------------------===// > +// > +/// \file > +/// \brief R600 Machine Scheduler interface > +// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS > slot > +// > +//===----------------------------------------------------------------------===// > + > +#define DEBUG_TYPE "misched" > + > +#include "R600MachineScheduler.h" > +#include "llvm/CodeGen/MachineRegisterInfo.h" > +#include "llvm/CodeGen/LiveIntervalAnalysis.h" > +#include "llvm/Pass.h" > +#include "llvm/PassManager.h" > +#include <set> > +#include <iostream> > +using namespace llvm; > + > +void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { > + > + DAG = dag; > + TII = static_cast<const R600InstrInfo*>(DAG->TII); > + TRI = static_cast<const R600RegisterInfo*>(DAG->TRI); > + MRI = &DAG->MRI; > + Available[IDAlu]->clear(); > + Available[IDFetch]->clear(); > + Available[IDOther]->clear(); > + CurInstKind = IDOther; > + CurEmitted = 0; > + memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate)); > + InstKindLimit[IDAlu] = 120; // 120 minus 8 for security > + > + > + const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>(); > + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) { > + InstKindLimit[IDFetch] = 7; // 8 minus 1 for security > + } else { > + InstKindLimit[IDFetch] = 15; // 16 minus 1 for security > + } > +} > + > +void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst) > +{ > + if (QSrc->empty()) > + return; > + for (ReadyQueue::iterator I = QSrc->begin(), > + E = QSrc->end(); I != E; ++I) { > + (*I)->NodeQueueId &= ~QSrc->getID(); > + QDst->push(*I); > + } > + QSrc->clear(); > +} > + > +SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { > + SUnit *SU = 0; > + IsTopNode = true; > + NextInstKind = IDOther; > + > + // check if we might want to switch current clause type > + bool AllowSwitchToAlu = (CurInstKind == IDOther) || > + (CurEmitted > InstKindLimit[CurInstKind]) || > + (Available[CurInstKind]->empty()); > + bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) && > + (!Available[IDFetch]->empty() || !Available[IDOther]->empty()); > + > + if ((AllowSwitchToAlu && CurInstKind != IDAlu) || > + (!AllowSwitchFromAlu && CurInstKind == IDAlu)) { > + // try to pick ALU > + SU = pickAlu(); > + if (SU) > + if (CurEmitted > InstKindLimit[IDAlu]) > + CurEmitted = 0; > + NextInstKind = IDAlu; > + } > + > + if (!SU) { > + // try to pick FETCH > + SU = pickOther(IDFetch); > + if (SU) > + NextInstKind = IDFetch; > + } > + > + // try to pick other > + if (!SU) { > + SU = pickOther(IDOther); > + if (SU) > + NextInstKind = IDOther; > + } > + > + DEBUG( > + if (SU) { > + dbgs() << "picked node: "; > + SU->dump(DAG); > + } else { > + dbgs() << "NO NODE "; > + for (int i = 0; i < IDLast; ++i) { > + Available[i]->dump(); > + Pending[i]->dump(); > + } > + for (unsigned i = 0; i < DAG->SUnits.size(); i++) { > + const SUnit &S = DAG->SUnits[i]; > + if (!S.isScheduled) > + S.dump(DAG); > + } > + } > + ); > + > + return SU; > +} > + > +void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { > + > + DEBUG(dbgs() << "scheduled: "); > + DEBUG(SU->dump(DAG)); > + > + if (NextInstKind != CurInstKind) { > + DEBUG(dbgs() << "Instruction Type Switch\n"); > + if (NextInstKind != IDAlu) > + OccupedSlotsMask = 0; > + CurEmitted = 0; > + CurInstKind = NextInstKind; > + } > + > + if (CurInstKind == IDAlu) { > + switch (getAluKind(SU)) { > + case AluT_XYZW: > + CurEmitted += 4; > + break; > + case AluDiscarded: > + break; > + default: { > + ++CurEmitted; > + for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(), > + E = SU->getInstr()->operands_end(); It != E; ++It) { > + MachineOperand &MO = *It; > + if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X) > + ++CurEmitted; > + } > + } > + } > + } else { > + ++CurEmitted; > + } > + > + > + DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n"); > + > + if (CurInstKind != IDFetch) { > + MoveUnits(Pending[IDFetch], Available[IDFetch]); > + } > + MoveUnits(Pending[IDOther], Available[IDOther]); > +} > + > +void R600SchedStrategy::releaseTopNode(SUnit *SU) { > + int IK = getInstKind(SU); > + > + DEBUG(dbgs() << IK << " <= "); > + DEBUG(SU->dump(DAG)); > + > + Pending[IK]->push(SU); > +} > + > +void R600SchedStrategy::releaseBottomNode(SUnit *SU) { > +} > + > +bool R600SchedStrategy::regBelongsToClass(unsigned Reg, > + const TargetRegisterClass *RC) > const { > + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { > + return RC->contains(Reg); > + } else { > + return MRI->getRegClass(Reg) == RC; > + } > +} > + > +R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { > + MachineInstr *MI = SU->getInstr(); > + > + switch (MI->getOpcode()) { > + case AMDGPU::INTERP_PAIR_XY: > + case AMDGPU::INTERP_PAIR_ZW: > + case AMDGPU::INTERP_VEC_LOAD: > + return AluT_XYZW; > + case AMDGPU::COPY: > + if > (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) { > + // %vregX = COPY Tn_X is likely to be discarded in favor of an > + // assignement of Tn_X to %vregX, don't considers it in scheduling > + return AluDiscarded; > + } > + else if (MI->getOperand(1).isUndef()) { > + // MI will become a KILL, don't considers it in scheduling > + return AluDiscarded; > + } > + default: > + break; > + } > + > + // Does the instruction take a whole IG ? > + if(TII->isVector(*MI) || > + TII->isCubeOp(MI->getOpcode()) || > + TII->isReductionOp(MI->getOpcode())) > + return AluT_XYZW; > + > + // Is the result already assigned to a channel ? > + unsigned DestSubReg = MI->getOperand(0).getSubReg(); > + switch (DestSubReg) { > + case AMDGPU::sub0: > + return AluT_X; > + case AMDGPU::sub1: > + return AluT_Y; > + case AMDGPU::sub2: > + return AluT_Z; > + case AMDGPU::sub3: > + return AluT_W; > + default: > + break; > + } > + > + // Is the result already member of a X/Y/Z/W class ? > + unsigned DestReg = MI->getOperand(0).getReg(); > + if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) || > + regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass)) > + return AluT_X; > + if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass)) > + return AluT_Y; > + if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass)) > + return AluT_Z; > + if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass)) > + return AluT_W; > + if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass)) > + return AluT_XYZW; > + > + return AluAny; > + > +} > + > +int R600SchedStrategy::getInstKind(SUnit* SU) { > + int Opcode = SU->getInstr()->getOpcode(); > + > + if (TII->isALUInstr(Opcode)) { > + return IDAlu; > + } > + > + switch (Opcode) { > + case AMDGPU::COPY: > + case AMDGPU::CONST_COPY: > + case AMDGPU::INTERP_PAIR_XY: > + case AMDGPU::INTERP_PAIR_ZW: > + case AMDGPU::INTERP_VEC_LOAD: > + case AMDGPU::DOT4_eg_pseudo: > + case AMDGPU::DOT4_r600_pseudo: > + return IDAlu; > + case AMDGPU::TEX_VTX_CONSTBUF: > + case AMDGPU::TEX_VTX_TEXBUF: > + case AMDGPU::TEX_LD: > + case AMDGPU::TEX_GET_TEXTURE_RESINFO: > + case AMDGPU::TEX_GET_GRADIENTS_H: > + case AMDGPU::TEX_GET_GRADIENTS_V: > + case AMDGPU::TEX_SET_GRADIENTS_H: > + case AMDGPU::TEX_SET_GRADIENTS_V: > + case AMDGPU::TEX_SAMPLE: > + case AMDGPU::TEX_SAMPLE_C: > + case AMDGPU::TEX_SAMPLE_L: > + case AMDGPU::TEX_SAMPLE_C_L: > + case AMDGPU::TEX_SAMPLE_LB: > + case AMDGPU::TEX_SAMPLE_C_LB: > + case AMDGPU::TEX_SAMPLE_G: > + case AMDGPU::TEX_SAMPLE_C_G: > + case AMDGPU::TXD: > + case AMDGPU::TXD_SHADOW: > + return IDFetch; > + default: > + DEBUG( > + dbgs() << "other inst: "; > + SU->dump(DAG); > + ); > + return IDOther; > + } > +} > + > +class ConstPairs { > +private: > + unsigned XYPair; > + unsigned ZWPair; > +public: > + ConstPairs(unsigned ReadConst[3]) { > + for (unsigned i = 0; i < 3; i++) { > + unsigned ReadConstChan = ReadConst[i] & 3; > + unsigned ReadConstIndex = ReadConst[i] & (~3); > + if (ReadConstChan < 2) { > + if (!XYPair) { > + XYPair = ReadConstIndex; > + } > + } else { > + if (!ZWPair) { > + ZWPair = ReadConstIndex; > + } > + } > + } > + } > + > + bool isCompatibleWith(const ConstPairs& CP) const { > + return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) && > + (!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair); > + } > +}; > + > +static > +const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) { > + unsigned ReadConsts[3] = {0, 0, 0}; > + R600Operands::Ops OpTable[3][2] = { > + {R600Operands::SRC0, R600Operands::SRC0_SEL}, > + {R600Operands::SRC1, R600Operands::SRC1_SEL}, > + {R600Operands::SRC2, R600Operands::SRC2_SEL}, > + }; > + > + if (!TII->isALUInstr(MI.getOpcode())) > + return ConstPairs(ReadConsts); > + > + for (unsigned i = 0; i < 3; i++) { > + int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]); > + if (SrcIdx < 0) > + break; > + if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) > + ReadConsts[i] =MI.getOperand( > + TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm(); > + } > + return ConstPairs(ReadConsts); > +} > + > +bool > +R600SchedStrategy::isBundleable(const MachineInstr& MI) { > + const ConstPairs &MIPair = getPairs(TII, MI); > + for (unsigned i = 0; i < 4; i++) { > + if (!InstructionsGroupCandidate[i]) > + continue; > + const ConstPairs &IGPair = getPairs(TII, > + *InstructionsGroupCandidate[i]->getInstr()); > + if (!IGPair.isCompatibleWith(MIPair)) > + return false; > + } > + return true; > +} > + > +SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) { > + if (Q.empty()) > + return NULL; > + for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end(); > + It != E; ++It) { > + SUnit *SU = *It; > + if (isBundleable(*SU->getInstr())) { > + Q.erase(It); > + return SU; > + } > + } > + return NULL; > +} > + > +void R600SchedStrategy::LoadAlu() { > + ReadyQueue *QSrc = Pending[IDAlu]; > + for (ReadyQueue::iterator I = QSrc->begin(), > + E = QSrc->end(); I != E; ++I) { > + (*I)->NodeQueueId &= ~QSrc->getID(); > + AluKind AK = getAluKind(*I); > + AvailableAlus[AK].insert(*I); > + } > + QSrc->clear(); > +} > + > +void R600SchedStrategy::PrepareNextSlot() { > + DEBUG(dbgs() << "New Slot\n"); > + OccupedSlotsMask = 0; > + memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate)); > + LoadAlu(); > +} > + > +void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { > + unsigned DestReg = MI->getOperand(0).getReg(); > + // PressureRegister crashes if an operand is def and used in the same inst > + // and we try to constraint its regclass > + for (MachineInstr::mop_iterator It = MI->operands_begin(), > + E = MI->operands_end(); It != E; ++It) { > + MachineOperand &MO = *It; > + if (MO.isReg() && !MO.isDef() && > + MO.getReg() == MI->getOperand(0).getReg()) > + return; > + } > + // Constrains the regclass of DestReg to assign it to Slot > + switch (Slot) { > + case 0: > + MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass); > + break; > + case 1: > + MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass); > + break; > + case 2: > + MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass); > + break; > + case 3: > + MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass); > + break; > + } > +} > + > +SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) { > + static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W}; > + SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]); > + SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]); > + if (!UnslotedSU) { > + return SlotedSU; > + } else if (!SlotedSU) { > + AssignSlot(UnslotedSU->getInstr(), Slot); > + return UnslotedSU; > + } else { > + //Determine which one to pick (the lesser one) > + if (CompareSUnit()(SlotedSU, UnslotedSU)) { > + AvailableAlus[AluAny].insert(UnslotedSU); > + return SlotedSU; > + } else { > + AvailableAlus[IndexToID[Slot]].insert(SlotedSU); > + AssignSlot(UnslotedSU->getInstr(), Slot); > + return UnslotedSU; > + } > + } > +} > + > +bool R600SchedStrategy::isAvailablesAluEmpty() const { > + return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() && > + AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() && > + AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() && > + AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty(); > +} > + > +SUnit* R600SchedStrategy::pickAlu() { > + while (!isAvailablesAluEmpty()) { > + if (!OccupedSlotsMask) { > + // Flush physical reg copies (RA will discard them) > + if (!AvailableAlus[AluDiscarded].empty()) { > + return PopInst(AvailableAlus[AluDiscarded]); > + } > + // If there is a T_XYZW alu available, use it > + if (!AvailableAlus[AluT_XYZW].empty()) { > + OccupedSlotsMask = 15; > + return PopInst(AvailableAlus[AluT_XYZW]); > + } > + } > + for (unsigned Chan = 0; Chan < 4; ++Chan) { > + bool isOccupied = OccupedSlotsMask & (1 << Chan); > + if (!isOccupied) { > + SUnit *SU = AttemptFillSlot(Chan); > + if (SU) { > + OccupedSlotsMask |= (1 << Chan); > + InstructionsGroupCandidate[Chan] = SU; > + return SU; > + } > + } > + } > + PrepareNextSlot(); > + } > + return NULL; > +} > + > +SUnit* R600SchedStrategy::pickOther(int QID) { > + SUnit *SU = 0; > + ReadyQueue *AQ = Available[QID]; > + > + if (AQ->empty()) { > + MoveUnits(Pending[QID], AQ); > + } > + if (!AQ->empty()) { > + SU = *AQ->begin(); > + AQ->remove(AQ->begin()); > + } > + return SU; > +} > + > diff --git a/lib/Target/R600/R600MachineScheduler.h > b/lib/Target/R600/R600MachineScheduler.h > new file mode 100644 > index 0000000..d74ff1e > --- /dev/null > +++ b/lib/Target/R600/R600MachineScheduler.h > @@ -0,0 +1,121 @@ > +//===-- R600MachineScheduler.h - R600 Scheduler Interface -*- C++ > -*-------===// > +// > +// The LLVM Compiler Infrastructure > +// > +// This file is distributed under the University of Illinois Open Source > +// License. See LICENSE.TXT for details. > +// > +//===----------------------------------------------------------------------===// > +// > +/// \file > +/// \brief R600 Machine Scheduler interface > +// > +//===----------------------------------------------------------------------===// > + > +#ifndef R600MACHINESCHEDULER_H_ > +#define R600MACHINESCHEDULER_H_ > + > +#include "R600InstrInfo.h" > +#include "llvm/CodeGen/MachineScheduler.h" > +#include "llvm/Support/Debug.h" > +#include "llvm/ADT/PriorityQueue.h" > + > +using namespace llvm; > + > +namespace llvm { > + > +class CompareSUnit { > +public: > + bool operator()(const SUnit *S1, const SUnit *S2) { > + return S1->getDepth() > S2->getDepth(); > + } > +}; > + > +class R600SchedStrategy : public MachineSchedStrategy { > + > + const ScheduleDAGMI *DAG; > + const R600InstrInfo *TII; > + const R600RegisterInfo *TRI; > + MachineRegisterInfo *MRI; > + > + enum InstQueue { > + QAlu = 1, > + QFetch = 2, > + QOther = 4 > + }; > + > + enum InstKind { > + IDAlu, > + IDFetch, > + IDOther, > + IDLast > + }; > + > + enum AluKind { > + AluAny, > + AluT_X, > + AluT_Y, > + AluT_Z, > + AluT_W, > + AluT_XYZW, > + AluDiscarded, // LLVM Instructions that are going to be eliminated > + AluLast > + }; > + > + ReadyQueue *Available[IDLast], *Pending[IDLast]; > + std::multiset<SUnit *, CompareSUnit> AvailableAlus[AluLast]; > + > + InstKind CurInstKind; > + int CurEmitted; > + InstKind NextInstKind; > + > + int InstKindLimit[IDLast]; > + > + int OccupedSlotsMask; > + > +public: > + R600SchedStrategy() : > + DAG(0), TII(0), TRI(0), MRI(0) { > + Available[IDAlu] = new ReadyQueue(QAlu, "AAlu"); > + Available[IDFetch] = new ReadyQueue(QFetch, "AFetch"); > + Available[IDOther] = new ReadyQueue(QOther, "AOther"); > + Pending[IDAlu] = new ReadyQueue(QAlu<<4, "PAlu"); > + Pending[IDFetch] = new ReadyQueue(QFetch<<4, "PFetch"); > + Pending[IDOther] = new ReadyQueue(QOther<<4, "POther"); > + } > + > + virtual ~R600SchedStrategy() { > + for (unsigned I = 0; I < IDLast; ++I) { > + delete Available[I]; > + delete Pending[I]; > + } > + } > + > + virtual void initialize(ScheduleDAGMI *dag); > + virtual SUnit *pickNode(bool &IsTopNode); > + virtual void schedNode(SUnit *SU, bool IsTopNode); > + virtual void releaseTopNode(SUnit *SU); > + virtual void releaseBottomNode(SUnit *SU); > + > +private: > + SUnit *InstructionsGroupCandidate[4]; > + > + int getInstKind(SUnit *SU); > + bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const; > + AluKind getAluKind(SUnit *SU) const; > + void LoadAlu(); > + bool isAvailablesAluEmpty() const; > + SUnit *AttemptFillSlot (unsigned Slot); > + void PrepareNextSlot(); > + SUnit *PopInst(std::multiset<SUnit *, CompareSUnit> &Q); > + > + void AssignSlot(MachineInstr *MI, unsigned Slot); > + SUnit* pickAlu(); > + SUnit* pickOther(int QID); > + bool isBundleable(const MachineInstr& MI); > + void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst); > +}; > + > +} // namespace llvm > + > +#endif /* R600MACHINESCHEDULER_H_ */ > diff --git a/test/CodeGen/R600/fdiv.v4f32.ll b/test/CodeGen/R600/fdiv.v4f32.ll > index 459fd11..79e677f 100644 > --- a/test/CodeGen/R600/fdiv.v4f32.ll > +++ b/test/CodeGen/R600/fdiv.v4f32.ll > @@ -1,13 +1,13 @@ > ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s > > ;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > -;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > ;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > -;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > ;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > -;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > ;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > ;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > > define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* > %in) { > %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 > -- > 1.8.1.2 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev