On Sun, Oct 07, 2012 at 09:11:15PM +0200, Vincent Lejeune wrote: > --- > src/gallium/drivers/radeon/AMDGPUISelLowering.cpp | 1 + > src/gallium/drivers/radeon/AMDGPUISelLowering.h | 1 + > .../radeon/MCTargetDesc/R600MCCodeEmitter.cpp | 11 +- > src/gallium/drivers/radeon/R600ISelLowering.cpp | 68 ++++++++++++ > src/gallium/drivers/radeon/R600Instructions.td | 114 > +++++++++++++++++++++ > .../drivers/radeon/R600IntrinsicsNoOpenCL.td | 6 ++ > src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td | 6 ++ > .../drivers/radeon/R600MachineFunctionInfo.cpp | 4 +- > .../drivers/radeon/R600MachineFunctionInfo.h | 1 + > 9 files changed, 210 insertions(+), 2 deletions(-) >
A few coding style issues I think I missed in the first review, that I've noted below. With those changes: Reviewed-by: Tom Stellard <thomas.stell...@amd.com> > diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp > b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp > index 04dadc3..5cb4d87 100644 > --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp > +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp > @@ -348,5 +348,6 @@ const char* > AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const > NODE_NAME_CASE(URECIP) > NODE_NAME_CASE(INTERP) > NODE_NAME_CASE(INTERP_P0) > + NODE_NAME_CASE(EXPORT) > } > } > diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h > b/src/gallium/drivers/radeon/AMDGPUISelLowering.h > index 2d8ed82..58d2287 100644 > --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h > +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.h > @@ -121,6 +121,7 @@ enum > URECIP, > INTERP, > INTERP_P0, > + EXPORT, > LAST_AMDGPU_ISD_NUMBER > }; > > diff --git a/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp > b/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp > index a11f482..b42ee3c 100644 > --- a/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp > +++ b/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp > @@ -108,7 +108,8 @@ enum InstrTypes { > INSTR_TEX, > INSTR_FC, > INSTR_NATIVE, > - INSTR_VTX > + INSTR_VTX, > + INSTR_EXPORT > }; > > enum FCInstr { > @@ -183,6 +184,14 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst > &MI, raw_ostream &OS, > Emit(InstWord2, OS); > break; > } > + case AMDGPU::EG_Export: > + case AMDGPU::R600_Export: > + { > + uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); > + EmitByte(INSTR_EXPORT, OS); > + Emit(Inst, OS); > + break; > + } > > default: > EmitALUInstr(MI, Fixups, OS); > diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp > b/src/gallium/drivers/radeon/R600ISelLowering.cpp > index 5dd2f53..128d14a 100644 > --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp > +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp > @@ -264,6 +264,26 @@ MachineBasicBlock * > R600TargetLowering::EmitInstrWithCustomInserter( > > return BB; > } > + case AMDGPU::EG_Export: > + case AMDGPU::R600_Export: > + { > + bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0; > + if (!EOP) > + return BB; > + unsigned cf_inst = (MI->getOpcode() == AMDGPU::EG_Export)? 84 : 40; cf_inst Needs to be capitalized and no underscore cf_inst -> CfInst > + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) > + .addOperand(MI->getOperand(0)) > + .addOperand(MI->getOperand(1)) > + .addOperand(MI->getOperand(2)) > + .addOperand(MI->getOperand(3)) > + .addOperand(MI->getOperand(4)) > + .addOperand(MI->getOperand(5)) > + .addOperand(MI->getOperand(6)) > + .addImm(cf_inst) > + .addImm(1); > + break; > + } > + > } > > MI->eraseFromParent(); > @@ -300,6 +320,54 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, > SelectionDAG &DAG) const > } > return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, > Op.getOperand(2)); > } > + case AMDGPUIntrinsic::R600_store_pixel_color: { > + MachineFunction &MF = DAG.getMachineFunction(); > + R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>(); > + int64_t RegIndex = > cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); > + unsigned Slot = RegIndex / 4; > + > + SDNode **outputsMap = MFI->outputs; > + > + if (!outputsMap[Slot]) { > + SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, > + Op.getDebugLoc(), MVT::v4f32, > + DAG.getUNDEF(MVT::v4f32), > + Op.getOperand(2), > + DAG.getConstant(RegIndex % 4, MVT::i32)); > + > + const SDValue Ops[8] = {Chain, Vector, DAG.getConstant(0, MVT::i32), > + DAG.getConstant(Slot, MVT::i32), DAG.getConstant(0, MVT::i32), > + DAG.getConstant(1, MVT::i32), DAG.getConstant(2, MVT::i32), > + DAG.getConstant(3, MVT::i32)}; > + > + SDValue Res = DAG.getNode( > + AMDGPUISD::EXPORT, > + Op.getDebugLoc(), > + MVT::Other, > + Ops, 8); > + outputsMap[Slot] = Res.getNode(); > + return Res; > + } > + > + SDNode *ExportInstruction = (SDNode *) outputsMap[Slot] ; > + SDValue PreviousVector = ExportInstruction->getOperand(1); > + SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, > + Op.getDebugLoc(), MVT::v4f32, > + PreviousVector, > + Op.getOperand(2), > + DAG.getConstant(RegIndex % 4, MVT::i32)); > + > + const SDValue Ops[8] = {ExportInstruction->getOperand(0), Vector, > DAG.getConstant(0, MVT::i32), > + DAG.getConstant(Slot, MVT::i32), DAG.getConstant(0, MVT::i32), > + DAG.getConstant(1, MVT::i32), DAG.getConstant(2, MVT::i32), > + DAG.getConstant(3, MVT::i32)}; > + > + DAG.UpdateNodeOperands(ExportInstruction, > + Ops, 8); > + > + return Chain; > + } > + > // default for switch(IntrinsicID) > default: break; > } > diff --git a/src/gallium/drivers/radeon/R600Instructions.td > b/src/gallium/drivers/radeon/R600Instructions.td > index 1689a2f..a5b4635 100644 > --- a/src/gallium/drivers/radeon/R600Instructions.td > +++ b/src/gallium/drivers/radeon/R600Instructions.td > @@ -74,7 +74,33 @@ class R600_ALU { > > def R600_Pred : PredicateOperand<i32, (ops R600_Predicate), > (ops PRED_SEL_OFF)>; > + > Extra whitespace here. > +def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; > + > +def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, > + [SDNPHasChain, SDNPSideEffect]>; > + > +multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { > + def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg), > + (ExportInst > + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x), > + 0, 61, 0, 7, 7, 7, cf_inst, 0) > + >; > + > + def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg), > + (ExportInst > + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x), > + 0, 61, 7, 0, 7, 7, cf_inst, 0) > + >; > + > + def : Pat<(EXPORT (v4f32 R600_Reg128:$src), > + (i32 imm:$type), (i32 imm:$arraybase), > + (i32 imm:$sw_x), (i32 imm:$sw_y), (i32 imm:$sw_z), (i32 imm:$sw_w)), > + (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, > + imm:$sw_x, imm:$sw_y, imm:$sw_z, imm:$sw_w, cf_inst, 0) > + >; > +} > > class R600_1OP <bits<11> inst, string opName, list<dag> pattern, > InstrItinClass itin = AnyALU> : > @@ -866,6 +892,50 @@ class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 > log_clamped, InstR600 exp_ie > > let Predicates = [isR600] in { > > + let isTerminator = 1, usesCustomInserter = 1 in { > + > + def R600_Export : > + InstR600ISA< > + (outs), > + (ins R600_Reg128:$src, i32imm:$type, i32imm:$arraybase, > + i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst, > + i32imm:$eop), > + !strconcat("EXPORT", " $src"), > + []>{ > + bits<13> arraybase; > + bits<2> type; > + bits<7> src; > + > + bits<3> sw_x; > + bits<3> sw_y; > + bits<3> sw_z; > + bits<3> sw_w; > + > + bits<1> eop; > + bits<8> inst; > + > + let Inst{12-0} = arraybase; > + let Inst{14-13} = type; > + let Inst{21-15} = src; > + let Inst{22} = 0; // RW_REL > + let Inst{29-23} = 0; // INDEX_GPR > + let Inst{31-30} = 3; // ELEM_SIZE > + let Inst{34-32} = sw_x; > + let Inst{37-35} = sw_y; > + let Inst{40-38} = sw_z; > + let Inst{43-41} = sw_w; > + let Inst{52-49} = 1; // BURST_COUNT > + let Inst{53} = 1; // VALID_PIXEL_MODE > + let Inst{54} = eop; > + let Inst{62-55} = inst; > + let Inst{63} = 1; // BARRIER > + } > + > + } // End isTerminator = 1, usesCustomInserter = 1 > + > + defm : ExportPattern<R600_Export, 39>; > + > + > def MUL_LIT_r600 : MUL_LIT_Common<0x0C>; > def MULADD_r600 : MULADD_Common<0x10>; > def CNDE_r600 : CNDE_Common<0x18>; > @@ -945,6 +1015,50 @@ def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; > > let Predicates = [isEGorCayman] in { > > + let isTerminator = 1, usesCustomInserter = 1 in { > + > + def EG_Export : > + InstR600ISA< > + (outs), > + (ins R600_Reg128:$src, i32imm:$type, i32imm:$arraybase, > + i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst, > + i32imm:$eop), > + !strconcat("EXPORT", " $src"), > + []>{ > + bits<13> arraybase; > + bits<2> type; > + bits<7> src; > + > + bits<3> sw_x; > + bits<3> sw_y; > + bits<3> sw_z; > + bits<3> sw_w; > + > + bits<1> eop; > + bits<8> inst; > + > + let Inst{12-0} = arraybase; > + let Inst{14-13} = type; > + let Inst{21-15} = src; > + let Inst{22} = 0; // RW_REL > + let Inst{29-23} = 0; // INDEX_GPR > + let Inst{31-30} = 3; // ELEM_SIZE > + let Inst{34-32} = sw_x; > + let Inst{37-35} = sw_y; > + let Inst{40-38} = sw_z; > + let Inst{43-41} = sw_w; > + let Inst{51-48} = 1; // BURST_COUNT > + let Inst{52} = 1; // VALID_PIXEL_MODE > + let Inst{53} = eop; > + let Inst{61-54} = inst; > + let Inst{62} = 0; // MARK > + let Inst{63} = 1; // BARRIER > + } > + > + } // End isTerminator = 1, usesCustomInserter = 1 > + > + defm : ExportPattern<EG_Export, 83>; > + > // BFE_UINT - bit_extract, an optimization for mask and shift > // Src0 = Input > // Src1 = Offset > diff --git a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td > b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td > index 3b62f0a..e6241d2 100644 > --- a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td > +++ b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td > @@ -23,6 +23,12 @@ let TargetPrefix = "R600", isTarget = 1 in { > Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; > def int_R600_load_input_face : > Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>; > + def int_R600_store_pixel_color : > + Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; > + def int_R600_store_pixel_depth : > + Intrinsic<[], [llvm_float_ty], []>; > + def int_R600_store_pixel_stencil : > + Intrinsic<[], [llvm_float_ty], []>; > } > > let TargetPrefix = "r600", isTarget = 1 in { > diff --git a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td > b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td > index 00877ca..664d76b 100644 > --- a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td > +++ b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td > @@ -23,4 +23,10 @@ let TargetPrefix = "R600", isTarget = 1 in { > Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; > def int_R600_load_input_face : > Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>; > + def int_R600_store_pixel_color : > + Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; > + def int_R600_store_pixel_depth : > + Intrinsic<[], [llvm_float_ty], []>; > + def int_R600_store_pixel_stencil : > + Intrinsic<[], [llvm_float_ty], []>; > } > diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp > b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp > index a31848e..1314468 100644 > --- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp > +++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp > @@ -15,7 +15,9 @@ R600MachineFunctionInfo::R600MachineFunctionInfo(const > MachineFunction &MF) > : MachineFunctionInfo(), > HasLinearInterpolation(false), > HasPerspectiveInterpolation(false) > - { } > + { > + memset(outputs, 0, sizeof(outputs)); > + } > > unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const > { > diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h > b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h > index 68211b2..1fa5b23 100644 > --- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h > +++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h > @@ -25,6 +25,7 @@ class R600MachineFunctionInfo : public MachineFunctionInfo { > public: > R600MachineFunctionInfo(const MachineFunction &MF); > std::vector<unsigned> ReservedRegs; > + class SDNode *outputs[16]; You don't need to use the class keyword here. Also, this variable name should be capitalized: Outputs > bool HasLinearInterpolation; > bool HasPerspectiveInterpolation; > > -- > 1.7.11.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev