I think the clipvertex emulation via clipdistance is rather a driver's job and compiler shouldn't be aware of that, driver should simply emit all corresponding DOT's and exports.
As for the kcache regs, in your patch it looks like you are going to hardcode some logic to access another const buffer where clip planes are stored, but I don't think we need kcache regs until we implement kcache allocation for alu clauses in the backend, and this implies that we need instruction scheduler that knows about clauses etc. IMO first we might want to rework handling of the constants to add proper support for multiple constant buffers and probably get rid of Cxxx regs (I think we'll have to store kc_bank and const index in the instructions instead of using Cxxx regs). Perhaps LOAD_CONST intrinsic will take a kcache bank (constant buffer) index, a constant index/address, and the optional base value for relative addressing (or we can use separate intrinsics for direct/indirect addressing). Then in case of direct addressing LOAD_CONST will be folded into the alu instruction. With proper kcache lines allocation for alu clauses const indices will be mapped to the kcache regs (it's when we'll need them), but for now we can rely on the existing driver's kcache allocation logic and simply pass src_sel = 512 + const_index and kc_bank to the driver. And in case of relative adressing LOAD_CONST will be translated into FETCH (in some cases we can try to do it directly in the alu instructions as well, it should be more efficient but it's not always possible, there are limitations). With support for multiple const buffers, clipvertex emulation should be pretty simple - we'll just need to make the driver emit LOAD_CONST's from clipplane const buffer, DOT's and exports. Vadim On Sun, 2012-12-16 at 21:01 +0100, Vincent Lejeune wrote: > --- > lib/Target/AMDGPU/R600ISelLowering.cpp | 21 ++++++++++++++++++++- > lib/Target/AMDGPU/R600Instructions.td | 12 ++++++++++++ > lib/Target/AMDGPU/R600Intrinsics.td | 2 ++ > lib/Target/AMDGPU/R600RegisterInfo.td | 21 ++++++++++++++++++++- > 4 files changed, 54 insertions(+), 2 deletions(-) > > diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp > b/lib/Target/AMDGPU/R600ISelLowering.cpp > index 3a4283c..6c594cc 100644 > --- a/lib/Target/AMDGPU/R600ISelLowering.cpp > +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp > @@ -328,7 +328,26 @@ MachineBasicBlock * > R600TargetLowering::EmitInstrWithCustomInserter( > > return BB; > } > - > + case AMDGPU::ClipVertexAdjust: { > + unsigned Temp[4]; > + for (unsigned i = 0; i < 4; i++) { > + unsigned KcacheReg = AMDGPU::R600_KCache128RegClass.getRegister(i + 4 > * MI->getOperand(2).getImm()); > + Temp[i] = MRI.createVirtualRegister(&AMDGPU::R600_Reg32RegClass); > + BuildMI(*BB, I, MI->getDebugLoc(), > TII->get(AMDGPU::DOT4_r600_pseudo), Temp[i]) > + .addOperand(MI->getOperand(1)) > + .addReg(KcacheReg); > + } > + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(TargetOpcode::REG_SEQUENCE), > MI->getOperand(0).getReg()) > + .addReg(Temp[0]) > + .addImm(TII->getRegisterInfo().getSubRegFromChannel(0)) > + .addReg(Temp[1]) > + .addImm(TII->getRegisterInfo().getSubRegFromChannel(1)) > + .addReg(Temp[2]) > + .addImm(TII->getRegisterInfo().getSubRegFromChannel(2)) > + .addReg(Temp[3]) > + .addImm(TII->getRegisterInfo().getSubRegFromChannel(3)); > + break; > + } > case AMDGPU::EG_ExportSwz: > case AMDGPU::R600_ExportSwz: { > bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0; > diff --git a/lib/Target/AMDGPU/R600Instructions.td > b/lib/Target/AMDGPU/R600Instructions.td > index d89b03b..c3ffe97 100644 > --- a/lib/Target/AMDGPU/R600Instructions.td > +++ b/lib/Target/AMDGPU/R600Instructions.td > @@ -598,6 +598,18 @@ class ExportBufInst : InstR600ISA<( > let Inst{63-32} = Word1; > } > > +let usesCustomInserter = 1 in { > + > +def ClipVertexAdjust : AMDGPUInst <(outs R600_Reg128:$dst), > + (ins R600_Reg128:$src0, i32imm:$src1), > + "DOT4 $dst $src0", > + [(set R600_Reg128:$dst, > + (int_R600_clipvertex R600_Reg128:$src0, imm:$src1))] > +> { > + field bits<64> Inst; > +} > +} // End usesCustomInserter = 1 > + > let Predicates = [isR600toCayman] in { > > > //===----------------------------------------------------------------------===// > diff --git a/lib/Target/AMDGPU/R600Intrinsics.td > b/lib/Target/AMDGPU/R600Intrinsics.td > index 3825bc4..0186f9d 100644 > --- a/lib/Target/AMDGPU/R600Intrinsics.td > +++ b/lib/Target/AMDGPU/R600Intrinsics.td > @@ -19,6 +19,8 @@ let TargetPrefix = "R600", isTarget = 1 in { > Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; > def int_R600_load_input_linear : > Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; > + def int_R600_clipvertex : > + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; > def int_R600_store_stream_output : > Intrinsic<[], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], []>; > def int_R600_store_pixel_color : > diff --git a/lib/Target/AMDGPU/R600RegisterInfo.td > b/lib/Target/AMDGPU/R600RegisterInfo.td > index 3b21825..67449d8 100644 > --- a/lib/Target/AMDGPU/R600RegisterInfo.td > +++ b/lib/Target/AMDGPU/R600RegisterInfo.td > @@ -50,6 +50,19 @@ foreach Index = 448-464 in { > def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>; > } > > +foreach Index = 160-168 in { > + foreach Chan = [ "X", "Y", "Z", "W" ] in { > + // 32-bit Temporary Registers > + def K#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>; > + } > + def K#Index#_XYZW : R600Reg_128 <"K"#Index#".XYZW", > + [!cast<Register>("K"#Index#"_X"), > + !cast<Register>("K"#Index#"_Y"), > + !cast<Register>("K"#Index#"_Z"), > + !cast<Register>("K"#Index#"_W")], > + Index>; > +} > + > // Special Registers > > def ZERO : R600Reg<"0.0", 248>; > @@ -117,7 +130,13 @@ def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, > (add > def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add > PREDICATE_BIT)>; > > +def R600_KCache128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, > + (add (sequence "K%u_XYZW", 160, 168))> { > + let isAllocatable = 0; > +} > + > def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, > - (add (sequence "T%u_XYZW", 0, 127))> { > + (add (sequence "T%u_XYZW", 0, 127), > + R600_KCache128)> { > let CopyCost = -1; > } _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev