On Sun, Apr 07, 2013 at 09:43:43PM +0200, Vincent Lejeune wrote: Reviewed-by: Tom Stellard <thomas.stell...@amd.com>
> --- > lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 30 ++-- > lib/Target/R600/R600ControlFlowFinalizer.cpp | 84 +++++++-- > lib/Target/R600/R600Instructions.td | 198 > +++++++++++++++------ > 3 files changed, 240 insertions(+), 72 deletions(-) > > diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp > b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp > index 927bcbd..469a8ad 100644 > --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp > +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp > @@ -266,17 +266,27 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst > &MI, raw_ostream &OS, > Emit(Inst, OS); > break; > } > - case AMDGPU::CF_TC: > - case AMDGPU::CF_VC: > - case AMDGPU::CF_CALL_FS: > + case AMDGPU::CF_TC_EG: > + case AMDGPU::CF_VC_EG: > + case AMDGPU::CF_CALL_FS_EG: > + case AMDGPU::CF_TC_R600: > + case AMDGPU::CF_VC_R600: > + case AMDGPU::CF_CALL_FS_R600: > return; > - case AMDGPU::WHILE_LOOP: > - case AMDGPU::END_LOOP: > - case AMDGPU::LOOP_BREAK: > - case AMDGPU::CF_CONTINUE: > - case AMDGPU::CF_JUMP: > - case AMDGPU::CF_ELSE: > - case AMDGPU::POP: { > + case AMDGPU::WHILE_LOOP_EG: > + case AMDGPU::END_LOOP_EG: > + case AMDGPU::LOOP_BREAK_EG: > + case AMDGPU::CF_CONTINUE_EG: > + case AMDGPU::CF_JUMP_EG: > + case AMDGPU::CF_ELSE_EG: > + case AMDGPU::POP_EG: > + case AMDGPU::WHILE_LOOP_R600: > + case AMDGPU::END_LOOP_R600: > + case AMDGPU::LOOP_BREAK_R600: > + case AMDGPU::CF_CONTINUE_R600: > + case AMDGPU::CF_JUMP_R600: > + case AMDGPU::CF_ELSE_R600: > + case AMDGPU::POP_R600: { > uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); > EmitByte(INSTR_NATIVE, OS); > Emit(Inst, OS); > diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp > b/lib/Target/R600/R600ControlFlowFinalizer.cpp > index 3a6c7ea..cfaa36e 100644 > --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp > +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp > @@ -30,9 +30,22 @@ namespace llvm { > class R600ControlFlowFinalizer : public MachineFunctionPass { > > private: > + enum ControlFlowInstruction { > + CF_TC, > + CF_CALL_FS, > + CF_WHILE_LOOP, > + CF_END_LOOP, > + CF_LOOP_BREAK, > + CF_LOOP_CONTINUE, > + CF_JUMP, > + CF_ELSE, > + CF_POP > + }; > + > static char ID; > const R600InstrInfo *TII; > unsigned MaxFetchInst; > + const AMDGPUSubtarget &ST; > > bool isFetch(const MachineInstr *MI) const { > switch (MI->getOpcode()) { > @@ -70,6 +83,52 @@ private: > } > } > > + const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { > + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) { > + switch (CFI) { > + case CF_TC: > + return TII->get(AMDGPU::CF_TC_R600); > + case CF_CALL_FS: > + return TII->get(AMDGPU::CF_CALL_FS_R600); > + case CF_WHILE_LOOP: > + return TII->get(AMDGPU::WHILE_LOOP_R600); > + case CF_END_LOOP: > + return TII->get(AMDGPU::END_LOOP_R600); > + case CF_LOOP_BREAK: > + return TII->get(AMDGPU::LOOP_BREAK_R600); > + case CF_LOOP_CONTINUE: > + return TII->get(AMDGPU::CF_CONTINUE_R600); > + case CF_JUMP: > + return TII->get(AMDGPU::CF_JUMP_R600); > + case CF_ELSE: > + return TII->get(AMDGPU::CF_ELSE_R600); > + case CF_POP: > + return TII->get(AMDGPU::POP_R600); > + } > + } else { > + switch (CFI) { > + case CF_TC: > + return TII->get(AMDGPU::CF_TC_EG); > + case CF_CALL_FS: > + return TII->get(AMDGPU::CF_CALL_FS_EG); > + case CF_WHILE_LOOP: > + return TII->get(AMDGPU::WHILE_LOOP_EG); > + case CF_END_LOOP: > + return TII->get(AMDGPU::END_LOOP_EG); > + case CF_LOOP_BREAK: > + return TII->get(AMDGPU::LOOP_BREAK_EG); > + case CF_LOOP_CONTINUE: > + return TII->get(AMDGPU::CF_CONTINUE_EG); > + case CF_JUMP: > + return TII->get(AMDGPU::CF_JUMP_EG); > + case CF_ELSE: > + return TII->get(AMDGPU::CF_ELSE_EG); > + case CF_POP: > + return TII->get(AMDGPU::POP_EG); > + } > + } > + } > + > MachineBasicBlock::iterator > MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, > unsigned CfAddress) const { > @@ -85,7 +144,7 @@ private: > break; > } > BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), > - TII->get(AMDGPU::CF_TC)) > + getHWInstrDesc(CF_TC)) > .addImm(CfAddress) // ADDR > .addImm(AluInstCount); // COUNT > return I; > @@ -104,7 +163,8 @@ private: > > public: > R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID), > - TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { > + TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())), > + ST(tm.getSubtarget<AMDGPUSubtarget>()) { > const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>(); > if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) > MaxFetchInst = 8; > @@ -124,7 +184,7 @@ public: > R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); > if (MFI->ShaderType == 1) { > BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), > - TII->get(AMDGPU::CF_CALL_FS)); > + getHWInstrDesc(CF_CALL_FS)); > CfCount++; > } > for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); > @@ -154,7 +214,7 @@ public: > CurrentStack++; > MaxStack = std::max(MaxStack, CurrentStack); > MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), > - TII->get(AMDGPU::WHILE_LOOP)) > + getHWInstrDesc(CF_WHILE_LOOP)) > .addImm(2); > std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount, > std::set<MachineInstr *>()); > @@ -170,7 +230,7 @@ public: > LoopStack.back(); > LoopStack.pop_back(); > CounterPropagateAddr(Pair.second, CfCount); > - BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP)) > + BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) > .addImm(Pair.first + 1); > MI->eraseFromParent(); > CfCount++; > @@ -178,7 +238,7 @@ public: > } > case AMDGPU::IF_PREDICATE_SET: { > MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), > - TII->get(AMDGPU::CF_JUMP)) > + getHWInstrDesc(CF_JUMP)) > .addImm(0) > .addImm(0); > IfThenElseStack.push_back(MIb); > @@ -192,7 +252,7 @@ public: > IfThenElseStack.pop_back(); > CounterPropagateAddr(JumpInst, CfCount); > MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), > - TII->get(AMDGPU::CF_ELSE)) > + getHWInstrDesc(CF_ELSE)) > .addImm(0) > .addImm(1); > DEBUG(dbgs() << CfCount << ":"; MIb->dump();); > @@ -207,7 +267,7 @@ public: > IfThenElseStack.pop_back(); > CounterPropagateAddr(IfOrElseInst, CfCount + 1); > MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), > - TII->get(AMDGPU::POP)) > + getHWInstrDesc(CF_POP)) > .addImm(CfCount + 1) > .addImm(1); > DEBUG(dbgs() << CfCount << ":"; MIb->dump();); > @@ -218,13 +278,13 @@ public: > case AMDGPU::PREDICATED_BREAK: { > CurrentStack--; > CfCount += 3; > - BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP)) > + BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP)) > .addImm(CfCount) > .addImm(1); > MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), > - TII->get(AMDGPU::LOOP_BREAK)) > + getHWInstrDesc(CF_LOOP_BREAK)) > .addImm(0); > - BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP)) > + BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP)) > .addImm(CfCount) > .addImm(1); > LoopStack.back().second.insert(MIb); > @@ -233,7 +293,7 @@ public: > } > case AMDGPU::CONTINUE: { > MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), > - TII->get(AMDGPU::CF_CONTINUE)) > + getHWInstrDesc(CF_LOOP_CONTINUE)) > .addImm(0); > LoopStack.back().second.insert(MIb); > MI->eraseFromParent(); > diff --git a/lib/Target/R600/R600Instructions.td > b/lib/Target/R600/R600Instructions.td > index 663b41a..b4c45e1 100644 > --- a/lib/Target/R600/R600Instructions.td > +++ b/lib/Target/R600/R600Instructions.td > @@ -823,97 +823,103 @@ i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, > i32imm:$COUNT), > let Inst{63-32} = Word1; > } > > -class CF_WORD0 { > +class CF_WORD0_R600 { > field bits<32> Word0; > > - bits<24> ADDR; > - bits<3> JUMPTABLE_SEL; > + bits<32> ADDR; > > - let Word0{23-0} = ADDR; > - let Word0{26-24} = JUMPTABLE_SEL; > + let Word0 = ADDR; > } > > -class CF_WORD1 { > +class CF_WORD1_R600 { > field bits<32> Word1; > > bits<3> POP_COUNT; > bits<5> CF_CONST; > bits<2> COND; > - bits<6> COUNT; > + bits<3> COUNT; > + bits<6> CALL_COUNT; > + bits<1> COUNT_3; > + bits<1> END_OF_PROGRAM; > bits<1> VALID_PIXEL_MODE; > - bits<8> CF_INST; > + bits<7> CF_INST; > + bits<1> WHOLE_QUAD_MODE; > bits<1> BARRIER; > > let Word1{2-0} = POP_COUNT; > let Word1{7-3} = CF_CONST; > let Word1{9-8} = COND; > - let Word1{15-10} = COUNT; > - let Word1{20} = VALID_PIXEL_MODE; > - let Word1{29-22} = CF_INST; > + let Word1{12-10} = COUNT; > + let Word1{18-13} = CALL_COUNT; > + let Word1{19} = COUNT_3; > + let Word1{21} = END_OF_PROGRAM; > + let Word1{22} = VALID_PIXEL_MODE; > + let Word1{29-23} = CF_INST; > + let Word1{30} = WHOLE_QUAD_MODE; > let Word1{31} = BARRIER; > } > > -class CF_CLAUSE <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst > <(outs), > -ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 { > +class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst > <(outs), > +ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 { > field bits<64> Inst; > > let CF_INST = inst; > let BARRIER = 1; > - let JUMPTABLE_SEL = 0; > let CF_CONST = 0; > let VALID_PIXEL_MODE = 0; > let COND = 0; > + let CALL_COUNT = 0; > + let COUNT_3 = 0; > + let END_OF_PROGRAM = 0; > + let WHOLE_QUAD_MODE = 0; > > let Inst{31-0} = Word0; > let Inst{63-32} = Word1; > } > > -def CF_TC : CF_CLAUSE<1, (ins i32imm:$ADDR, i32imm:$COUNT), > -"TEX $COUNT @$ADDR"> { > - let POP_COUNT = 0; > -} > - > -def CF_VC : CF_CLAUSE<2, (ins i32imm:$ADDR, i32imm:$COUNT), > -"VTX $COUNT @$ADDR"> { > - let POP_COUNT = 0; > -} > +class CF_WORD0_EG { > + field bits<32> Word0; > > -def WHILE_LOOP : CF_CLAUSE<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> { > - let POP_COUNT = 0; > - let COUNT = 0; > -} > + bits<24> ADDR; > + bits<3> JUMPTABLE_SEL; > > -def END_LOOP : CF_CLAUSE<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { > - let POP_COUNT = 0; > - let COUNT = 0; > + let Word0{23-0} = ADDR; > + let Word0{26-24} = JUMPTABLE_SEL; > } > > -def LOOP_BREAK : CF_CLAUSE<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> { > - let POP_COUNT = 0; > - let COUNT = 0; > -} > +class CF_WORD1_EG { > + field bits<32> Word1; > > -def CF_CONTINUE : CF_CLAUSE<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> { > - let POP_COUNT = 0; > - let COUNT = 0; > -} > + bits<3> POP_COUNT; > + bits<5> CF_CONST; > + bits<2> COND; > + bits<6> COUNT; > + bits<1> VALID_PIXEL_MODE; > + bits<8> CF_INST; > + bits<1> BARRIER; > > -def CF_JUMP : CF_CLAUSE<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP > @$ADDR POP:$POP_COUNT"> { > - let COUNT = 0; > + let Word1{2-0} = POP_COUNT; > + let Word1{7-3} = CF_CONST; > + let Word1{9-8} = COND; > + let Word1{15-10} = COUNT; > + let Word1{20} = VALID_PIXEL_MODE; > + let Word1{29-22} = CF_INST; > + let Word1{31} = BARRIER; > } > > -def CF_ELSE : CF_CLAUSE<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE > @$ADDR POP:$POP_COUNT"> { > - let COUNT = 0; > -} > +class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst > <(outs), > +ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG { > + field bits<64> Inst; > > -def CF_CALL_FS : CF_CLAUSE<19, (ins), "CALL_FS"> { > - let ADDR = 0; > - let COUNT = 0; > - let POP_COUNT = 0; > -} > + let CF_INST = inst; > + let BARRIER = 1; > + let JUMPTABLE_SEL = 0; > + let CF_CONST = 0; > + let VALID_PIXEL_MODE = 0; > + let COND = 0; > > -def POP : CF_CLAUSE<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR > POP:$POP_COUNT"> { > - let COUNT = 0; > + let Inst{31-0} = Word0; > + let Inst{63-32} = Word1; > } > > def CF_ALU : ALU_CLAUSE<8, "ALU">; > @@ -1433,6 +1439,52 @@ let Predicates = [isR600] in { > let Word1{31} = 1; // BARRIER > } > defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>; > + > + def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT), > + "TEX $COUNT @$ADDR"> { > + let POP_COUNT = 0; > + } > + def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT), > + "VTX $COUNT @$ADDR"> { > + let POP_COUNT = 0; > + } > + def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR), > + "LOOP_START_DX10 @$ADDR"> { > + let POP_COUNT = 0; > + let COUNT = 0; > + } > + def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP > @$ADDR"> { > + let POP_COUNT = 0; > + let COUNT = 0; > + } > + def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR), > + "LOOP_BREAK @$ADDR"> { > + let POP_COUNT = 0; > + let COUNT = 0; > + } > + def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR), > + "CONTINUE @$ADDR"> { > + let POP_COUNT = 0; > + let COUNT = 0; > + } > + def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, > i32imm:$POP_COUNT), > + "JUMP @$ADDR POP:$POP_COUNT"> { > + let COUNT = 0; > + } > + def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, > i32imm:$POP_COUNT), > + "ELSE @$ADDR POP:$POP_COUNT"> { > + let COUNT = 0; > + } > + def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> { > + let ADDR = 0; > + let COUNT = 0; > + let POP_COUNT = 0; > + } > + def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), > + "POP @$ADDR POP:$POP_COUNT"> { > + let COUNT = 0; > + } > + > } > > // Helper pattern for normalizing inputs to triginomic instructions for R700+ > @@ -1589,6 +1641,52 @@ let hasSideEffects = 1 in { > } > defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>; > > + def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT), > + "TEX $COUNT @$ADDR"> { > + let POP_COUNT = 0; > + } > + def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT), > + "VTX $COUNT @$ADDR"> { > + let POP_COUNT = 0; > + } > + def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR), > + "LOOP_START_DX10 @$ADDR"> { > + let POP_COUNT = 0; > + let COUNT = 0; > + } > + def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { > + let POP_COUNT = 0; > + let COUNT = 0; > + } > + def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR), > + "LOOP_BREAK @$ADDR"> { > + let POP_COUNT = 0; > + let COUNT = 0; > + } > + def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR), > + "CONTINUE @$ADDR"> { > + let POP_COUNT = 0; > + let COUNT = 0; > + } > + def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), > + "JUMP @$ADDR POP:$POP_COUNT"> { > + let COUNT = 0; > + } > + def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), > + "ELSE @$ADDR POP:$POP_COUNT"> { > + let COUNT = 0; > + } > + def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> { > + let ADDR = 0; > + let COUNT = 0; > + let POP_COUNT = 0; > + } > + def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), > + "POP @$ADDR POP:$POP_COUNT"> { > + let COUNT = 0; > + } > + > + > > //===----------------------------------------------------------------------===// > // Memory read/write instructions > > //===----------------------------------------------------------------------===// > -- > 1.8.1.4 > > _______________________________________________ > llvm-commits mailing list > llvm-comm...@cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev