[Mesa-dev] [Bug 55998] Pretty huge slowdown in mesa 9.0
https://bugs.freedesktop.org/show_bug.cgi?id=55998 m...@manki.in changed: What|Removed |Added CC||m...@manki.in -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/6] radeon/llvm: interp instructions emits native outputs
--- lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp | 30 +++ lib/Target/AMDGPU/R600Instructions.td | 25 +- lib/Target/AMDGPU/R600RegisterInfo.td | 10 + 3 files changed, 27 insertions(+), 38 deletions(-) diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp index fabdb4d..f9fd65d 100644 --- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -95,8 +95,9 @@ bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI) for (unsigned i = 0; i < 8; i++) { unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister( 2 * IJIndexBase + ((i + 1) % 2)); -unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister( -4 * MI.getOperand(2).getImm()); +unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( +MI.getOperand(2).getImm()); + unsigned Sel; switch (i % 4) { @@ -109,16 +110,11 @@ bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI) unsigned Res = TRI.getSubReg(DstReg, Sel); -const MCInstrDesc &Opcode = (i < 4)? -TII->get(AMDGPU::INTERP_ZW): -TII->get(AMDGPU::INTERP_XY); +unsigned Opcode = (i < 4)?AMDGPU::INTERP_ZW:AMDGPU::INTERP_XY; -MachineInstr *NewMI = BuildMI(*(MI.getParent()), -I, MI.getParent()->findDebugLoc(I), -Opcode, Res) -.addReg(IJIndex) -.addReg(ReadReg) -.addImm(0); +MachineBasicBlock &MBB = *(MI.getParent()); +MachineInstr *NewMI = +TII->buildDefaultInstruction(MBB, I, Opcode, Res, IJIndex, ReadReg); if (!(i> 1 && i < 6)) { TII->addFlag(NewMI, 0, MO_FLAG_MASK); @@ -143,8 +139,8 @@ bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI) unsigned DstReg = MI.getOperand(0).getReg(); for (unsigned i = 0; i < 4; i++) { -unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister( -4 * MI.getOperand(1).getImm() + i); +unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( +MI.getOperand(1).getImm()); unsigned Sel; switch (i % 4) { @@ -157,11 +153,9 @@ bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI) unsigned Res = TRI.getSubReg(DstReg, Sel); -MachineInstr *NewMI = BuildMI(*(MI.getParent()), -I, MI.getParent()->findDebugLoc(I), -TII->get(AMDGPU::INTERP_LOAD_P0), Res) -.addReg(ReadReg) -.addImm(0); +MachineBasicBlock &MBB = *(MI.getParent()); +MachineInstr *NewMI = TII->buildDefaultInstruction( +MBB, I, AMDGPU::INTERP_LOAD_P0, Res, ReadReg); if (i % 4 != 3) TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index 7cc74e8..b97b094 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -444,32 +444,17 @@ def input_constant : AMDGPUShaderInst < -def INTERP_XY : InstR600 <0xD6, - (outs R600_Reg32:$dst), - (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags), - "INTERP_XY dst", - [], AnyALU> +def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { - let FlagOperandIdx = 3; + let bank_swizzle = 5; } -def INTERP_ZW : InstR600 <0xD7, - (outs R600_Reg32:$dst), - (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags), - "INTERP_ZW dst", - [], AnyALU> +def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> { - let FlagOperandIdx = 3; + let bank_swizzle = 5; } -def INTERP_LOAD_P0 : InstR600 <0xE0, - (outs R600_Reg32:$dst), - (ins R600_Reg32:$src, i32imm:$flags), - "INTERP_LOAD_P0 dst", - [], AnyALU> -{ - let FlagOperandIdx = 2; -} +def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>; let Predicates = [isR600toCayman] in { diff --git a/lib/Target/AMDGPU/R600RegisterInfo.td b/lib/Target/AMDGPU/R600RegisterInfo.td index c682f2b..d3d6d25 100644 --- a/lib/Target/AMDGPU/R600RegisterInfo.td +++ b/lib/Target/AMDGPU/R600RegisterInfo.td @@ -41,6 +41,12 @@ foreach Index = 0-127 in { Index>; } +// Array Base Register holding input in FS +foreach Index = 448-464 in { + def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>; +} + + // Special Registers def ZERO : R600Reg<"0.0", 248>; @@ -56,6 +62,9 @@ def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>; def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>; def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>; +def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "ArrayBase%u", 448, 464))>; + def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add (interleave (interleave (sequence "C%u_X", 0, 127), @@ -83,6 +92,7 @@ def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
[Mesa-dev] [PATCH 2/6] radeon/llvm: Add super reg to reserved reg list
--- lib/Target/AMDGPU/R600ISelLowering.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index a7cb010..094d920 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -169,6 +169,9 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex); MFI->ReservedRegs.push_back(ReservedReg); + unsigned SuperReg = + AMDGPU::R600_Reg128RegClass.getRegister(ReservedIndex / 4); + MFI->ReservedRegs.push_back(SuperReg); break; } -- 1.7.11.7 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/6] radeon/llvm: Remove input.face and input.position intrinsics
--- lib/Target/AMDGPU/R600ISelLowering.cpp | 35 -- lib/Target/AMDGPU/R600ISelLowering.h | 1 - lib/Target/AMDGPU/R600Intrinsics.td| 4 3 files changed, 40 deletions(-) diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 094d920..76cabae 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -357,20 +357,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); } -case AMDGPUIntrinsic::R600_load_input_position: { - unsigned slot = cast(Op.getOperand(1))->getZExtValue(); - unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot); - SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - RegIndex, MVT::f32); - if ((slot % 4) == 3) { -return DAG.getNode(ISD::FDIV, -DL, VT, -DAG.getConstantFP(1.0f, MVT::f32), -Reg); - } else { -return Reg; - } -} case r600_read_ngroups_x: return LowerImplicitParameter(DAG, VT, DL, 0); @@ -424,30 +410,9 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: return; case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); - case ISD::INTRINSIC_WO_CHAIN: -{ - unsigned IntrinsicID = - cast(N->getOperand(0))->getZExtValue(); - if (IntrinsicID == AMDGPUIntrinsic::R600_load_input_face) { -Results.push_back(LowerInputFace(N, DAG)); - } else { -return; - } -} } } -SDValue R600TargetLowering::LowerInputFace(SDNode* Op, SelectionDAG &DAG) const -{ - unsigned slot = cast(Op->getOperand(1))->getZExtValue(); - unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot); - SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - RegIndex, MVT::f32); - return DAG.getNode(ISD::SETCC, Op->getDebugLoc(), MVT::i1, - Reg, DAG.getConstantFP(0.0f, MVT::f32), - DAG.getCondCode(ISD::SETUGT)); -} - SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode( diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h index 8bd4859..fd32f1b 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.h +++ b/lib/Target/AMDGPU/R600ISelLowering.h @@ -59,7 +59,6 @@ private: SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const; SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/AMDGPU/R600Intrinsics.td b/lib/Target/AMDGPU/R600Intrinsics.td index 9c81310..d661366 100644 --- a/lib/Target/AMDGPU/R600Intrinsics.td +++ b/lib/Target/AMDGPU/R600Intrinsics.td @@ -19,8 +19,4 @@ let TargetPrefix = "R600", isTarget = 1 in { Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; def int_R600_load_input_linear : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; - def int_R600_load_input_position : -Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; - def int_R600_load_input_face : -Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>; } -- 1.7.11.7 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/6] radeon/llvm: add support for vector setCC
--- lib/Target/AMDGPU/R600ISelLowering.cpp | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 76cabae..d0711b8 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -719,10 +719,8 @@ SDValue R600TargetLowering::LowerFormalArguments( } EVT R600TargetLowering::getSetCCResultType(EVT VT) const { - if (VT.isVector()) { -return VT; - } - return MVT::i32; + if (!VT.isVector()) return MVT::i32; + return VT.changeVectorElementTypeToInteger(); } //===--===// -- 1.7.11.7 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/6] radeon/llvm: turn select into select_cc
--- lib/Target/AMDGPU/R600ISelLowering.cpp | 16 lib/Target/AMDGPU/R600ISelLowering.h | 1 + 2 files changed, 17 insertions(+) diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index d0711b8..38d68f4 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -64,6 +64,9 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::SETCC, MVT::f32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); + setOperationAction(ISD::SELECT, MVT::i32, Custom); + setOperationAction(ISD::SELECT, MVT::f32, Custom); + setTargetDAGCombine(ISD::FP_ROUND); setSchedulingPreference(Sched::VLIW); @@ -295,6 +298,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::ROTL: return LowerROTL(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::FPOW: return LowerFPOW(Op, DAG); case ISD::INTRINSIC_VOID: { @@ -638,6 +642,18 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const DAG.getCondCode(ISD::SETNE)); } +SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const +{ + return DAG.getNode(ISD::SELECT_CC, + Op.getDebugLoc(), + Op.getValueType(), + Op.getOperand(0), + DAG.getConstant(0, MVT::i32), + Op.getOperand(1), + Op.getOperand(2), + DAG.getCondCode(ISD::SETNE)); +} + SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue Cond; diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h index fd32f1b..d1dfe9f 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.h +++ b/lib/Target/AMDGPU/R600ISelLowering.h @@ -58,6 +58,7 @@ private: SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const; -- 1.7.11.7 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/6] radeon/llvm: Cayman uses vector instruction for SIN/COS/RECIP_CLAMPED_RECIPSQRT_IEEE
--- lib/Target/AMDGPU/R600Instructions.td | 30 -- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index b97b094..472538e 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -938,6 +938,15 @@ def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>; def EXP_IEEE_eg : EXP_IEEE_Common<0x81>; def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; +def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; +def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; +def SIN_eg : SIN_Common<0x8D>; +def COS_eg : COS_Common<0x8E>; + +def : SIN_PAT ; +def : COS_PAT ; +def : Pat<(fsqrt R600_Reg32:$src), + (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>; } // End Predicates = [isEG] //===--===// @@ -982,18 +991,11 @@ let Predicates = [isEGorCayman] in { def CNDGE_eg : CNDGE_Common<0x1B>; def MUL_LIT_eg : MUL_LIT_Common<0x1F>; def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>; - def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; - def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; - def SIN_eg : SIN_Common<0x8D>; - def COS_eg : COS_Common<0x8E>; defm DOT4_eg : DOT4_Common<0xBE>; defm CUBE_eg : CUBE_Common<0xC0>; def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common; - def : SIN_PAT ; - def : COS_PAT ; - def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { let Pattern = []; } @@ -1022,9 +1024,6 @@ let Predicates = [isEGorCayman] in { def : Pat<(fp_to_uint R600_Reg32:$src0), (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>; - def : Pat<(fsqrt R600_Reg32:$src), -(MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>; - //===--===// // Memory read/write instructions //===--===// @@ -1278,8 +1277,15 @@ def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>; def EXP_IEEE_cm : EXP_IEEE_Common<0x81>; def LOG_IEEE_ : LOG_IEEE_Common<0x83>; +def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>; +def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>; +def SIN_cm : SIN_Common<0x8D>; +def COS_cm : COS_Common<0x8E>; } // End isVector = 1 +def : SIN_PAT ; +def : COS_PAT ; + defm DIV_cm : DIV_Common; // RECIP_UINT emulation for Cayman @@ -1289,6 +1295,10 @@ def : Pat < (MOV_IMM_I32 0x4f80))) >; + +def : Pat<(fsqrt R600_Reg32:$src), + (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>; + } // End isCayman //===--===// -- 1.7.11.7 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] Mesa-9.0: generate_builtins.py hangs and uses 100% CPU when generating from *.glsl files
Hi, i try to (cross)compile Mesa-9.0. builtin_compiler is compiled for host with a second hostbuild before i build Mesa for target. if i build Mesa for target the buildprocess hangs at: python2 ./builtins/tools/generate_builtins.py /home/stephan/projects/openelec-master/build.OpenELEC-Intel.x86_64-devel/toolchain/bin/builtin_compiler > builtin_function.cpp || rm -f builtin_function.cpp with no output and uses 100% CPU. i get the same issue running manually this command manually. With Mesa-8.0.4 this was working and the problem was introduced by this commit: http://cgit.freedesktop.org/mesa/mesa/commit/src/glsl/builtins/tools/generate_builtins.py?h=9.0&id=7de1331662816d31fb9bed423b1e5372284a260e if i comment out the line: read_glsl_files(fs) the above command works. so something with the read_glsl_files(fs) function seems wrong here. if i run the above command with python -vvv it hangs after: ... # trying /home/stephan/projects/openelec-master/sources/Mesa/Mesa-9.0/src/glsl/builtins/tools/StringIOmodule.so # trying /home/stephan/projects/openelec-master/sources/Mesa/Mesa-9.0/src/glsl/builtins/tools/StringIO.py # trying /home/stephan/projects/openelec-master/sources/Mesa/Mesa-9.0/src/glsl/builtins/tools/StringIO.pyo # trying /usr/lib64/python2.7/StringIO.so # trying /usr/lib64/python2.7/StringIOmodule.so # trying /usr/lib64/python2.7/StringIO.py # /usr/lib64/python2.7/StringIO.pyo matches /usr/lib64/python2.7/StringIO.py import StringIO # precompiled from /usr/lib64/python2.7/StringIO.pyo until i break with ctrl+c: ^CTraceback (most recent call last): File "./builtins/tools/generate_builtins.py", line 220, in write_function_definitions() File "./builtins/tools/generate_builtins.py", line 68, in write_function_definitions fs = get_builtin_definitions() File "./builtins/tools/generate_builtins.py", line 48, in get_builtin_definitions read_glsl_files(fs) File "./builtins/tools/generate_builtins.py", line 35, in read_glsl_files (output, returncode) = run_compiler([filename]) File "./builtins/tools/generate_builtins.py", line 76, in run_compiler output = p.communicate()[0] File "/usr/lib64/python2.7/subprocess.py", line 746, in communicate stdout = _eintr_retry_call(self.stdout.read) File "/usr/lib64/python2.7/subprocess.py", line 478, in _eintr_retry_call return func(*args) KeyboardInterrupt # clear __builtin__._ # clear sys.path ... what you need for infos to track down the problem? thanks for your help Stephan ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965/vs: Fix debug dumping of VS push constants.
While copying the values into the batch space, we advance the param pointer. The debug code then tries to iterate over all the uploaded values, starting at param...which is now the end of the uploaded data, rather than the start. This patch saves a pointer to the start of push constant space before it gets altered and switches the debug code to use that. Tested by uncommenting the code and examining the output of glsl-vs-clamp-1.shader_test. Previously all values appeared to be zero. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/gen6_vs_state.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index b15bc3d..0e80c27 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -63,6 +63,8 @@ gen6_upload_vs_push_constants(struct brw_context *brw) 4 * sizeof(float), 32, &brw->vs.push_const_offset); + float *save_param = param; + if (brw->vs.prog_data->uses_new_param_layout) { for (i = 0; i < brw->vs.prog_data->nr_params; i++) { *param = *brw->vs.prog_data->param[i]; @@ -102,7 +104,7 @@ gen6_upload_vs_push_constants(struct brw_context *brw) if (0) { printf("VS constant buffer:\n"); for (i = 0; i < params_uploaded; i++) { - float *buf = param + i * 4; + float *buf = save_param + i * 4; printf("%d: %f %f %f %f\n", i, buf[0], buf[1], buf[2], buf[3]); } -- 1.7.12.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC PATCH V2] i965: add ARB_vertex_type_2_10_10_10_rev support
This series adds support for ARB_vertex_type_2_10_10_10_rev on i965. Notable changes from V1: * Extra SURFACEFORMAT flags are no longer needed, so the first patch was dropped. * Spurious extra debug patches removed. * All 2_10_10_10 attributes are uploaded as UINT, and the vertex shader does the rest. Remaining things to do: * Port 5/6 to the new VS backend so it works with GLSL too. * Tidy up the normalization. In its current state, this series passes piglit draw-vertices-2101010 on at least gen6. -- Chris ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/6] i965: implement get_size for 2_10_10_10 formats
Signed-off-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_draw_upload.c | 4 1 file changed, 4 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index f5f65ca..8ffcc57 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -290,6 +290,10 @@ static GLuint get_size( GLenum type ) case GL_UNSIGNED_SHORT: return sizeof(GLushort); case GL_UNSIGNED_BYTE: return sizeof(GLubyte); case GL_FIXED: return sizeof(GLuint); + /* packed formats: always have 4 components, and element size is + * 4 bytes, so pretend each component is 1 byte. */ + case GL_INT_2_10_10_10_REV: return sizeof(GLbyte); + case GL_UNSIGNED_INT_2_10_10_10_REV: return sizeof(GLubyte); default: assert(0); return 0; } } -- 1.7.12.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/6] i965: support 2_10_10_10 formats in get_surface_type.
Always use R10G10B10A2_UINT; Most of the other formats we'd like don't actually work on the hardware. Will emit w/a for scaling, sign recovery and BGRA swizzle in the VS. Signed-off-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_draw_upload.c | 18 +- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 8ffcc57..c6db024 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -252,10 +252,26 @@ static GLuint get_surface_type( GLenum type, GLuint size, else { return ubyte_types_norm[size]; } + /* See GL_ARB_vertex_type_2_10_10_10_rev */ + /* W/A: the hardware doesn't really support the formats we'd + * like to use here, so upload everything as UINT and fix + * it in the shader */ + case GL_INT_2_10_10_10_REV: + case GL_UNSIGNED_INT_2_10_10_10_REV: + assert(size == 4); + return BRW_SURFACEFORMAT_R10G10B10A2_UINT; default: assert(0); return 0; - } + } } else { + /* See GL_ARB_vertex_type_2_10_10_10_rev */ + /* W/A: the hardware doesn't really support the formats we'd + * like to use here, so upload everything as UINT and fix + * it in the shader */ + if (type == GL_INT_2_10_10_10_REV || type == GL_UNSIGNED_INT_2_10_10_10_REV) { + assert(size == 4); +return BRW_SURFACEFORMAT_R10G10B10A2_UINT; + } assert(format == GL_RGBA); /* sanity check */ switch (type) { case GL_DOUBLE: return double_types[size]; -- 1.7.12.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/6] i965: Generalize GL_FIXED VS w/a support
Next few patches build on this to add other workarounds for packed formats. Signed-off-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 14 +++--- src/mesa/drivers/dri/i965/brw_vs.c | 9 + src/mesa/drivers/dri/i965/brw_vs.h | 14 +++--- src/mesa/drivers/dri/i965/brw_vs_emit.c| 20 +++- 4 files changed, 34 insertions(+), 23 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index f54c49e..dba0a82 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -800,13 +800,13 @@ vec4_visitor::visit(ir_variable *ir) * come in as floating point conversions of the integer values. */ for (int i = ir->location; i < ir->location + type_size(ir->type); i++) { -if (!c->key.gl_fixed_input_size[i]) - continue; - -dst_reg dst = *reg; - dst.type = brw_type_for_base_type(ir->type); -dst.writemask = (1 << c->key.gl_fixed_input_size[i]) - 1; -emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f))); + uint8_t wa_flags = c->key.gl_attrib_wa_flags[i]; + if (wa_flags & BRW_ATTRIB_WA_COMPONENTS) { +dst_reg dst = *reg; +dst.type = brw_type_for_base_type(ir->type); +dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENTS)) - 1; +emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f))); + } } break; diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 4e95074..c31092d 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -381,9 +381,9 @@ brw_vs_debug_recompile(struct brw_context *brw, } for (unsigned int i = 0; i < VERT_ATTRIB_MAX; i++) { - found |= key_debug("GL_FIXED rescaling", - old_key->gl_fixed_input_size[i], - key->gl_fixed_input_size[i]); + found |= key_debug("Vertex attrib w/a flags", + old_key->gl_attrib_wa_flags[i], + key->gl_attrib_wa_flags[i]); } found |= key_debug("user clip flags", @@ -465,9 +465,10 @@ static void brw_upload_vs_prog(struct brw_context *brw) /* BRW_NEW_VERTICES */ for (i = 0; i < VERT_ATTRIB_MAX; i++) { + /* TODO: flag w/a for packed vertex formats here too */ if (vp->program.Base.InputsRead & BITFIELD64_BIT(i) && brw->vb.inputs[i].glarray->Type == GL_FIXED) { -key.gl_fixed_input_size[i] = brw->vb.inputs[i].glarray->Size; +key.gl_attrib_wa_flags[i] = brw->vb.inputs[i].glarray->Size; } } diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index adeff7f..9da4cb0 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -39,13 +39,21 @@ #include "brw_program.h" #include "program/program.h" +/* fixup bits for gl_packed_input_flags, + * to enable various VS workarounds */ +#define BRW_ATTRIB_WA_COMPONENTS7 /* mask for GL_FIXED scale channel count */ +#define BRW_ATTRIB_WA_NORMALIZE 8 /* normalize in shader */ +#define BRW_ATTRIB_WA_BGRA 16 /* swap r/b channels in shader */ +#define BRW_ATTRIB_WA_SIGN 32 /* interpret as signed in shader */ +#define BRW_ATTRIB_WA_SCALE 64 /* interpret as scaled in shader */ struct brw_vs_prog_key { GLuint program_string_id; - /** -* Number of channels of the vertex attribute that need GL_FIXED rescaling + + /* +* Per-attribute workaround flags */ - uint8_t gl_fixed_input_size[VERT_ATTRIB_MAX]; + uint8_t gl_attrib_wa_flags[VERT_ATTRIB_MAX]; /** * True if at least one clip flag is enabled, regardless of whether the diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 02239b4..a4742c7 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1582,22 +1582,24 @@ accumulator_contains(struct brw_vs_compile *c, struct brw_reg val) } static void -brw_vs_rescale_gl_fixed(struct brw_vs_compile *c) +brw_vs_apply_attrib_wa(struct brw_vs_compile *c) { struct brw_compile *p = &c->func; int i; for (i = 0; i < VERT_ATTRIB_MAX; i++) { + uint8_t wa_flags = c->key.gl_attrib_wa_flags[i]; if (!(c->prog_data.inputs_read & BITFIELD64_BIT(i))) -continue; + continue; - if (c->key.gl_fixed_input_size[i] != 0) { -struct brw_reg reg = c->regs[PROGRAM_INPUT][i]; - -brw_MUL(p, -brw_writemask(reg, (1 << c->key.gl_fixed_input_size[i]) - 1), -reg, brw_imm_f(1.0 / 65536.0)); + if (wa_flags & BRW_ATTRIB_WA_COMPONENTS) { + struct brw_reg reg = c->regs[PROGRAM_INPUT][i]; + brw_MUL(p, +brw_writemask(reg, (1 << (wa
[Mesa-dev] [PATCH 4/6] i965: set attribute w/a bits for packed formats
Flag the need for various workarounds to be applied by the vertex shader. Signed-off-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_vs.c | 30 ++ 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index c31092d..1cf6394 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -465,10 +465,32 @@ static void brw_upload_vs_prog(struct brw_context *brw) /* BRW_NEW_VERTICES */ for (i = 0; i < VERT_ATTRIB_MAX; i++) { - /* TODO: flag w/a for packed vertex formats here too */ - if (vp->program.Base.InputsRead & BITFIELD64_BIT(i) && - brw->vb.inputs[i].glarray->Type == GL_FIXED) { -key.gl_attrib_wa_flags[i] = brw->vb.inputs[i].glarray->Size; + if (vp->program.Base.InputsRead & BITFIELD64_BIT(i)) { + uint8_t wa_flags = 0; + + switch (brw->vb.inputs[i].glarray->Type) { + + case GL_FIXED: +wa_flags = brw->vb.inputs[i].glarray->Size; +break; + + case GL_INT_2_10_10_10_REV: +wa_flags |= BRW_ATTRIB_WA_SIGN; +/* fallthough */ + + case GL_UNSIGNED_INT_2_10_10_10_REV: +if (brw->vb.inputs[i].glarray->Format == GL_BGRA) + wa_flags |= BRW_ATTRIB_WA_BGRA; + +if (brw->vb.inputs[i].glarray->Normalized) + wa_flags |= BRW_ATTRIB_WA_NORMALIZE; +else if (!brw->vb.inputs[i].glarray->Integer) + wa_flags |= BRW_ATTRIB_WA_SCALE; + +break; + } + + key.gl_attrib_wa_flags[i] = wa_flags; } } -- 1.7.12.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/6] i965: emit w/a for packed attribute formats in VS
Implements BGRA swizzle, sign recovery, and normalization as required by ARB_vertex_type_10_10_10_2_rev. This patch only adds the support to the "old" VS backend; this is what is tested by the piglit tests. Port to the new VS backend is still to come. Normalization is sloppy, and will be revisited for final version. Signed-off-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_vs_emit.c | 49 +++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index a4742c7..bfb617a 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1581,26 +1581,71 @@ accumulator_contains(struct brw_vs_compile *c, struct brw_reg val) } } +/* apply various attribute format workarounds */ static void brw_vs_apply_attrib_wa(struct brw_vs_compile *c) { struct brw_compile *p = &c->func; + struct brw_reg shift_tmp; + struct brw_reg shift_tmp_ud = retype(shift_tmp, BRW_REGISTER_TYPE_UD); int i; + int any_sign_recovery = 0; + + for (i = 0; i < VERT_ATTRIB_MAX; i++) + if (c->prog_data.inputs_read & BITFIELD64_BIT(i)) + if (c->key.gl_attrib_wa_flags[i] & BRW_ATTRIB_WA_SIGN) +any_sign_recovery = 1; + + /* set up the shift value for sign recovery if any attribs needed it */ + if (any_sign_recovery) { + shift_tmp = get_tmp(c); + brw_MOV(p, brw_writemask(shift_tmp_ud, WRITEMASK_XYZ), brw_imm_ud(22)); + brw_MOV(p, brw_writemask(shift_tmp_ud, WRITEMASK_W), brw_imm_ud(30)); + } for (i = 0; i < VERT_ATTRIB_MAX; i++) { uint8_t wa_flags = c->key.gl_attrib_wa_flags[i]; + struct brw_reg reg = c->regs[PROGRAM_INPUT][i]; + struct brw_reg reg_d = retype(reg, BRW_REGISTER_TYPE_D); + struct brw_reg reg_ud = retype(reg, BRW_REGISTER_TYPE_UD); + if (!(c->prog_data.inputs_read & BITFIELD64_BIT(i))) continue; if (wa_flags & BRW_ATTRIB_WA_COMPONENTS) { - struct brw_reg reg = c->regs[PROGRAM_INPUT][i]; brw_MUL(p, brw_writemask(reg, (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENTS)) - 1), reg, brw_imm_f(1.0 / 65536.0)); } - /* TODO: emit other packed vertex attrib w/a shader code here. */ + if (wa_flags & BRW_ATTRIB_WA_SIGN) { + brw_SHL(p, reg_ud, reg_ud, shift_tmp_ud); + brw_ASR(p, reg_d, reg_d, shift_tmp_ud); + } + + if (wa_flags & BRW_ATTRIB_WA_BGRA) { + brw_MOV(p, reg_ud, brw_swizzle(reg_ud, 2,1,0,3)); + } + + if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) { + /* normalize according to GL 3.2 spec eqn 2.2, 2.3? this is sloppy. */ + brw_MOV(p, reg, (wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud); + brw_MUL(p, brw_writemask(reg, WRITEMASK_XYZ), reg, + (wa_flags & BRW_ATTRIB_WA_SIGN) ? brw_imm_f(1.0 / 512.0) + : brw_imm_f(1.0 / 1024.0)); + if (~wa_flags & BRW_ATTRIB_WA_SIGN) +brw_MUL(p, brw_writemask(reg, WRITEMASK_W), reg, + brw_imm_f(1.0 / 3.0)); + } + + if (wa_flags & BRW_ATTRIB_WA_SCALE) { + /* just convert from int to float */ + brw_MOV(p, reg, (wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud); + } } + + if (any_sign_recovery) + release_tmp(c, shift_tmp_ud); } /* Emit the vertex program instructions here. -- 1.7.12.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/6] i965: enable ARB_vertex_type_2_10_10_10_rev
Enabled on Gen4+. Signed-off-by: Chris Forbes --- docs/GL3.txt | 2 +- src/mesa/drivers/dri/intel/intel_extensions.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 876165f..6e66e55 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -82,7 +82,7 @@ GL_ARB_texture_rgb10_a2ui DONE (i965, r600) GL_ARB_texture_swizzleDONE (same as EXT version) (i965, r300, r600, swrast) GL_ARB_timer_queryDONE (i965) GL_ARB_instanced_arrays DONE (i965, r300, r600) -GL_ARB_vertex_type_2_10_10_10_rev DONE (r600) +GL_ARB_vertex_type_2_10_10_10_rev DONE (i965, r600) GL 4.0: diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index ec99c3e..c025d32 100755 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -89,6 +89,9 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.OES_compressed_ETC1_RGB8_texture = true; ctx->Extensions.ARB_texture_rgb10_a2ui = true; + if (intel->gen >= 4) + ctx->Extensions.ARB_vertex_type_2_10_10_10_rev = true; + if (intel->gen >= 6) if (ctx->API == API_OPENGL_CORE) { ctx->Const.GLSLVersion = 140; -- 1.7.12.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev