[Mesa-dev] [Bug 55998] Pretty huge slowdown in mesa 9.0

2012-10-21 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=55998

m...@manki.in changed:

   What|Removed |Added

 CC||m...@manki.in

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/6] radeon/llvm: interp instructions emits native outputs

2012-10-21 Thread Vincent Lejeune
---
 lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp | 30 +++
 lib/Target/AMDGPU/R600Instructions.td | 25 +-
 lib/Target/AMDGPU/R600RegisterInfo.td | 10 +
 3 files changed, 27 insertions(+), 38 deletions(-)

diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp 
b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
index fabdb4d..f9fd65d 100644
--- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -95,8 +95,9 @@ bool 
R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI)
   for (unsigned i = 0; i < 8; i++) {
 unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
 2 * IJIndexBase + ((i + 1) % 2));
-unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
-4 * MI.getOperand(2).getImm());
+unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+MI.getOperand(2).getImm());
+
 
 unsigned Sel;
 switch (i % 4) {
@@ -109,16 +110,11 @@ bool 
R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI)
 
 unsigned Res = TRI.getSubReg(DstReg, Sel);
 
-const MCInstrDesc &Opcode = (i < 4)?
-TII->get(AMDGPU::INTERP_ZW):
-TII->get(AMDGPU::INTERP_XY);
+unsigned Opcode = (i < 4)?AMDGPU::INTERP_ZW:AMDGPU::INTERP_XY;
 
-MachineInstr *NewMI = BuildMI(*(MI.getParent()),
-I, MI.getParent()->findDebugLoc(I),
-Opcode, Res)
-.addReg(IJIndex)
-.addReg(ReadReg)
-.addImm(0);
+MachineBasicBlock &MBB = *(MI.getParent());
+MachineInstr *NewMI =
+TII->buildDefaultInstruction(MBB, I, Opcode, Res, IJIndex, ReadReg);
 
 if (!(i> 1 && i < 6)) {
   TII->addFlag(NewMI, 0, MO_FLAG_MASK);
@@ -143,8 +139,8 @@ bool 
R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI)
   unsigned DstReg = MI.getOperand(0).getReg();
 
   for (unsigned i = 0; i < 4; i++) {
-unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
-4 * MI.getOperand(1).getImm() + i);
+unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+MI.getOperand(1).getImm());
 
 unsigned Sel;
 switch (i % 4) {
@@ -157,11 +153,9 @@ bool 
R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI)
 
 unsigned Res = TRI.getSubReg(DstReg, Sel);
 
-MachineInstr *NewMI = BuildMI(*(MI.getParent()),
-I, MI.getParent()->findDebugLoc(I),
-TII->get(AMDGPU::INTERP_LOAD_P0), Res)
-.addReg(ReadReg)
-.addImm(0);
+MachineBasicBlock &MBB = *(MI.getParent());
+MachineInstr *NewMI = TII->buildDefaultInstruction(
+MBB, I, AMDGPU::INTERP_LOAD_P0, Res, ReadReg);
 
 if (i % 4 !=  3)
   TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
diff --git a/lib/Target/AMDGPU/R600Instructions.td 
b/lib/Target/AMDGPU/R600Instructions.td
index 7cc74e8..b97b094 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -444,32 +444,17 @@ def input_constant :  AMDGPUShaderInst <
 
 
 
-def INTERP_XY : InstR600 <0xD6,
-  (outs R600_Reg32:$dst),
-  (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
-  "INTERP_XY dst",
-  [], AnyALU>
+def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []>
 {
-  let FlagOperandIdx = 3;
+  let bank_swizzle = 5;
 }
 
-def INTERP_ZW : InstR600 <0xD7,
-  (outs R600_Reg32:$dst),
-  (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
-  "INTERP_ZW dst",
-  [], AnyALU>
+def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []>
 {
-  let FlagOperandIdx = 3;
+  let bank_swizzle = 5;
 }
 
-def INTERP_LOAD_P0 : InstR600 <0xE0,
-  (outs R600_Reg32:$dst),
-  (ins R600_Reg32:$src, i32imm:$flags),
-  "INTERP_LOAD_P0 dst",
-  [], AnyALU>
-{
-  let FlagOperandIdx = 2;
-}
+def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
 
 let Predicates = [isR600toCayman] in { 
 
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.td 
b/lib/Target/AMDGPU/R600RegisterInfo.td
index c682f2b..d3d6d25 100644
--- a/lib/Target/AMDGPU/R600RegisterInfo.td
+++ b/lib/Target/AMDGPU/R600RegisterInfo.td
@@ -41,6 +41,12 @@ foreach Index = 0-127 in {
Index>;
 }
 
+// Array Base Register holding input in FS
+foreach Index = 448-464 in {
+  def ArrayBase#Index :  R600Reg<"ARRAY_BASE", Index>;
+}
+
+
 // Special Registers
 
 def ZERO : R600Reg<"0.0", 248>;
@@ -56,6 +62,9 @@ def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
 def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
 def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
 
+def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
+  (add (sequence "ArrayBase%u", 448, 464))>;
+
 def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
   (add (interleave
   (interleave (sequence "C%u_X", 0, 127),
@@ -83,6 +92,7 @@ def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
 def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add

[Mesa-dev] [PATCH 2/6] radeon/llvm: Add super reg to reserved reg list

2012-10-21 Thread Vincent Lejeune
---
 lib/Target/AMDGPU/R600ISelLowering.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp 
b/lib/Target/AMDGPU/R600ISelLowering.cpp
index a7cb010..094d920 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -169,6 +169,9 @@ MachineBasicBlock * 
R600TargetLowering::EmitInstrWithCustomInserter(
   unsigned ReservedReg =
   
AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
   MFI->ReservedRegs.push_back(ReservedReg);
+  unsigned SuperReg =
+  AMDGPU::R600_Reg128RegClass.getRegister(ReservedIndex / 4);
+  MFI->ReservedRegs.push_back(SuperReg);
   break;
 }
 
-- 
1.7.11.7

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] radeon/llvm: Remove input.face and input.position intrinsics

2012-10-21 Thread Vincent Lejeune
---
 lib/Target/AMDGPU/R600ISelLowering.cpp | 35 --
 lib/Target/AMDGPU/R600ISelLowering.h   |  1 -
 lib/Target/AMDGPU/R600Intrinsics.td|  4 
 3 files changed, 40 deletions(-)

diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp 
b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 094d920..76cabae 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -357,20 +357,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, 
SelectionDAG &DAG) const
   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
   DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
 }
-case AMDGPUIntrinsic::R600_load_input_position: {
-  unsigned slot = cast(Op.getOperand(1))->getZExtValue();
-  unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
-  SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
-   RegIndex, MVT::f32);
-  if ((slot % 4) == 3) {
-return DAG.getNode(ISD::FDIV,
-DL, VT,
-DAG.getConstantFP(1.0f, MVT::f32),
-Reg);
-  } else {
-return Reg;
-  }
-}
 
 case r600_read_ngroups_x:
   return LowerImplicitParameter(DAG, VT, DL, 0);
@@ -424,30 +410,9 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
   switch (N->getOpcode()) {
   default: return;
   case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), 
DAG));
-  case ISD::INTRINSIC_WO_CHAIN:
-{
-  unsigned IntrinsicID =
-  cast(N->getOperand(0))->getZExtValue();
-  if (IntrinsicID == AMDGPUIntrinsic::R600_load_input_face) {
-Results.push_back(LowerInputFace(N, DAG));
-  } else {
-return;
-  }
-}
   }
 }
 
-SDValue R600TargetLowering::LowerInputFace(SDNode* Op, SelectionDAG &DAG) const
-{
-  unsigned slot = cast(Op->getOperand(1))->getZExtValue();
-  unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
-  SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
-  RegIndex, MVT::f32);
-  return DAG.getNode(ISD::SETCC, Op->getDebugLoc(), MVT::i1,
-  Reg, DAG.getConstantFP(0.0f, MVT::f32),
-  DAG.getCondCode(ISD::SETUGT));
-}
-
 SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const
 {
   return DAG.getNode(
diff --git a/lib/Target/AMDGPU/R600ISelLowering.h 
b/lib/Target/AMDGPU/R600ISelLowering.h
index 8bd4859..fd32f1b 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/lib/Target/AMDGPU/R600ISelLowering.h
@@ -59,7 +59,6 @@ private:
 
   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const;
   SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
   
diff --git a/lib/Target/AMDGPU/R600Intrinsics.td 
b/lib/Target/AMDGPU/R600Intrinsics.td
index 9c81310..d661366 100644
--- a/lib/Target/AMDGPU/R600Intrinsics.td
+++ b/lib/Target/AMDGPU/R600Intrinsics.td
@@ -19,8 +19,4 @@ let TargetPrefix = "R600", isTarget = 1 in {
 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
   def int_R600_load_input_linear :
 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
-  def int_R600_load_input_position :
-Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
-  def int_R600_load_input_face :
-Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
 }
-- 
1.7.11.7

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] radeon/llvm: add support for vector setCC

2012-10-21 Thread Vincent Lejeune
---
 lib/Target/AMDGPU/R600ISelLowering.cpp | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp 
b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 76cabae..d0711b8 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -719,10 +719,8 @@ SDValue R600TargetLowering::LowerFormalArguments(
 }
 
 EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
-  if (VT.isVector()) {
-return VT;
-  }
-  return MVT::i32;
+   if (!VT.isVector()) return MVT::i32;
+   return VT.changeVectorElementTypeToInteger();
 }
 
 
//===--===//
-- 
1.7.11.7

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] radeon/llvm: turn select into select_cc

2012-10-21 Thread Vincent Lejeune
---
 lib/Target/AMDGPU/R600ISelLowering.cpp | 16 
 lib/Target/AMDGPU/R600ISelLowering.h   |  1 +
 2 files changed, 17 insertions(+)

diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp 
b/lib/Target/AMDGPU/R600ISelLowering.cpp
index d0711b8..38d68f4 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -64,6 +64,9 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::SETCC, MVT::f32, Custom);
   setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
 
+  setOperationAction(ISD::SELECT, MVT::i32, Custom);
+  setOperationAction(ISD::SELECT, MVT::f32, Custom);
+
   setTargetDAGCombine(ISD::FP_ROUND);
 
   setSchedulingPreference(Sched::VLIW);
@@ -295,6 +298,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, 
SelectionDAG &DAG) const
   case ISD::BR_CC: return LowerBR_CC(Op, DAG);
   case ISD::ROTL: return LowerROTL(Op, DAG);
   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+  case ISD::SELECT: return LowerSELECT(Op, DAG);
   case ISD::SETCC: return LowerSETCC(Op, DAG);
   case ISD::FPOW: return LowerFPOW(Op, DAG);
   case ISD::INTRINSIC_VOID: {
@@ -638,6 +642,18 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, 
SelectionDAG &DAG) const
   DAG.getCondCode(ISD::SETNE));
 }
 
+SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
+{
+  return DAG.getNode(ISD::SELECT_CC,
+  Op.getDebugLoc(),
+  Op.getValueType(),
+  Op.getOperand(0),
+  DAG.getConstant(0, MVT::i32),
+  Op.getOperand(1),
+  Op.getOperand(2),
+  DAG.getCondCode(ISD::SETNE));
+}
+
 SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
 {
   SDValue Cond;
diff --git a/lib/Target/AMDGPU/R600ISelLowering.h 
b/lib/Target/AMDGPU/R600ISelLowering.h
index fd32f1b..d1dfe9f 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/lib/Target/AMDGPU/R600ISelLowering.h
@@ -58,6 +58,7 @@ private:
   SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
-- 
1.7.11.7

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] radeon/llvm: Cayman uses vector instruction for SIN/COS/RECIP_CLAMPED_RECIPSQRT_IEEE

2012-10-21 Thread Vincent Lejeune
---
 lib/Target/AMDGPU/R600Instructions.td | 30 --
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/lib/Target/AMDGPU/R600Instructions.td 
b/lib/Target/AMDGPU/R600Instructions.td
index b97b094..472538e 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -938,6 +938,15 @@ def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
 def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
 def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
 def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
+def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
+def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
+def SIN_eg : SIN_Common<0x8D>;
+def COS_eg : COS_Common<0x8E>;
+
+def : SIN_PAT ;
+def : COS_PAT ;
+def : Pat<(fsqrt R600_Reg32:$src),
+  (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>;
 } // End Predicates = [isEG]
 
 
//===--===//
@@ -982,18 +991,11 @@ let Predicates = [isEGorCayman] in {
   def CNDGE_eg : CNDGE_Common<0x1B>;
   def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
   def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
-  def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
-  def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
-  def SIN_eg : SIN_Common<0x8D>;
-  def COS_eg : COS_Common<0x8E>;
   defm DOT4_eg : DOT4_Common<0xBE>;
   defm CUBE_eg : CUBE_Common<0xC0>;
 
   def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common;
 
-  def : SIN_PAT ;
-  def : COS_PAT ;
-
   def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
 let Pattern = [];
   }
@@ -1022,9 +1024,6 @@ let Predicates = [isEGorCayman] in {
   def : Pat<(fp_to_uint R600_Reg32:$src0),
 (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>;
 
-  def : Pat<(fsqrt R600_Reg32:$src),
-(MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>;
-
 
//===--===//
 // Memory read/write instructions
 
//===--===//
@@ -1278,8 +1277,15 @@ def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
 def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
 def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
 def LOG_IEEE_ : LOG_IEEE_Common<0x83>;
+def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
+def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
+def SIN_cm : SIN_Common<0x8D>;
+def COS_cm : COS_Common<0x8E>;
 } // End isVector = 1
 
+def : SIN_PAT ;
+def : COS_PAT ;
+
 defm DIV_cm : DIV_Common;
 
 // RECIP_UINT emulation for Cayman
@@ -1289,6 +1295,10 @@ def : Pat <
 (MOV_IMM_I32 0x4f80)))
 >;
 
+
+def : Pat<(fsqrt R600_Reg32:$src),
+  (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>;
+
 } // End isCayman
 
 
//===--===//
-- 
1.7.11.7

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Mesa-9.0: generate_builtins.py hangs and uses 100% CPU when generating from *.glsl files

2012-10-21 Thread Stephan Raue

Hi,

i try to (cross)compile Mesa-9.0. builtin_compiler is compiled for host 
with a second hostbuild before i build Mesa for target.


if i build Mesa for target the buildprocess hangs at:
python2  ./builtins/tools/generate_builtins.py 
/home/stephan/projects/openelec-master/build.OpenELEC-Intel.x86_64-devel/toolchain/bin/builtin_compiler 
> builtin_function.cpp || rm -f builtin_function.cpp

with no output and uses 100% CPU.

i get the same issue running manually this command manually. With Mesa-8.0.4
 this was working and the problem was introduced by this commit:

http://cgit.freedesktop.org/mesa/mesa/commit/src/glsl/builtins/tools/generate_builtins.py?h=9.0&id=7de1331662816d31fb9bed423b1e5372284a260e

if i comment out the line:

read_glsl_files(fs)

the above command works. so something with the read_glsl_files(fs) 
function seems wrong here.


if i run the above command with python -vvv it hangs after:

...
# trying 
/home/stephan/projects/openelec-master/sources/Mesa/Mesa-9.0/src/glsl/builtins/tools/StringIOmodule.so
# trying 
/home/stephan/projects/openelec-master/sources/Mesa/Mesa-9.0/src/glsl/builtins/tools/StringIO.py
# trying 
/home/stephan/projects/openelec-master/sources/Mesa/Mesa-9.0/src/glsl/builtins/tools/StringIO.pyo

# trying /usr/lib64/python2.7/StringIO.so
# trying /usr/lib64/python2.7/StringIOmodule.so
# trying /usr/lib64/python2.7/StringIO.py
# /usr/lib64/python2.7/StringIO.pyo matches /usr/lib64/python2.7/StringIO.py
import StringIO # precompiled from /usr/lib64/python2.7/StringIO.pyo

until i break with ctrl+c:

^CTraceback (most recent call last):
  File "./builtins/tools/generate_builtins.py", line 220, in 
write_function_definitions()
  File "./builtins/tools/generate_builtins.py", line 68, in 
write_function_definitions

fs = get_builtin_definitions()
  File "./builtins/tools/generate_builtins.py", line 48, in 
get_builtin_definitions

read_glsl_files(fs)
  File "./builtins/tools/generate_builtins.py", line 35, in read_glsl_files
(output, returncode) = run_compiler([filename])
  File "./builtins/tools/generate_builtins.py", line 76, in run_compiler
output = p.communicate()[0]
  File "/usr/lib64/python2.7/subprocess.py", line 746, in communicate
stdout = _eintr_retry_call(self.stdout.read)
  File "/usr/lib64/python2.7/subprocess.py", line 478, in _eintr_retry_call
return func(*args)
KeyboardInterrupt
# clear __builtin__._
# clear sys.path
...

what you need for infos to track down the problem?

thanks for your help

Stephan


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/vs: Fix debug dumping of VS push constants.

2012-10-21 Thread Kenneth Graunke
While copying the values into the batch space, we advance the param
pointer.  The debug code then tries to iterate over all the uploaded
values, starting at param...which is now the end of the uploaded data,
rather than the start.

This patch saves a pointer to the start of push constant space before
it gets altered and switches the debug code to use that.

Tested by uncommenting the code and examining the output of
glsl-vs-clamp-1.shader_test.  Previously all values appeared to be zero.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/gen6_vs_state.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c 
b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index b15bc3d..0e80c27 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -63,6 +63,8 @@ gen6_upload_vs_push_constants(struct brw_context *brw)
  4 * sizeof(float),
  32, &brw->vs.push_const_offset);
 
+  float *save_param = param;
+
   if (brw->vs.prog_data->uses_new_param_layout) {
 for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
*param = *brw->vs.prog_data->param[i];
@@ -102,7 +104,7 @@ gen6_upload_vs_push_constants(struct brw_context *brw)
   if (0) {
 printf("VS constant buffer:\n");
 for (i = 0; i < params_uploaded; i++) {
-   float *buf = param + i * 4;
+   float *buf = save_param + i * 4;
printf("%d: %f %f %f %f\n",
   i, buf[0], buf[1], buf[2], buf[3]);
 }
-- 
1.7.12.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH V2] i965: add ARB_vertex_type_2_10_10_10_rev support

2012-10-21 Thread Chris Forbes
This series adds support for ARB_vertex_type_2_10_10_10_rev on i965.
Notable changes from V1:

* Extra SURFACEFORMAT flags are no longer needed, so the first patch
was dropped.
* Spurious extra debug patches removed.
* All 2_10_10_10 attributes are uploaded as UINT, and the vertex
shader does the rest.

Remaining things to do:

* Port 5/6 to the new VS backend so it works with GLSL too.
* Tidy up the normalization.

In its current state, this series passes piglit draw-vertices-2101010
on at least gen6.

-- Chris

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/6] i965: implement get_size for 2_10_10_10 formats

2012-10-21 Thread Chris Forbes
Signed-off-by: Chris Forbes 
---
 src/mesa/drivers/dri/i965/brw_draw_upload.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c 
b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index f5f65ca..8ffcc57 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -290,6 +290,10 @@ static GLuint get_size( GLenum type )
case GL_UNSIGNED_SHORT: return sizeof(GLushort);
case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
case GL_FIXED: return sizeof(GLuint);
+   /* packed formats: always have 4 components, and element size is
+   * 4 bytes, so pretend each component is 1 byte. */
+   case GL_INT_2_10_10_10_REV: return sizeof(GLbyte);
+   case GL_UNSIGNED_INT_2_10_10_10_REV: return sizeof(GLubyte);
default: assert(0); return 0;
}
 }
-- 
1.7.12.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] i965: support 2_10_10_10 formats in get_surface_type.

2012-10-21 Thread Chris Forbes
Always use R10G10B10A2_UINT; Most of the other formats we'd like
don't actually work on the hardware. Will emit w/a for scaling,
sign recovery and BGRA swizzle in the VS.

Signed-off-by: Chris Forbes 
---
 src/mesa/drivers/dri/i965/brw_draw_upload.c | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c 
b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 8ffcc57..c6db024 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -252,10 +252,26 @@ static GLuint get_surface_type( GLenum type, GLuint size,
  else {
 return ubyte_types_norm[size];
  }
+  /* See GL_ARB_vertex_type_2_10_10_10_rev */
+  /* W/A: the hardware doesn't really support the formats we'd
+   * like to use here, so upload everything as UINT and fix
+   * it in the shader */
+  case GL_INT_2_10_10_10_REV:
+  case GL_UNSIGNED_INT_2_10_10_10_REV:
+ assert(size == 4);
+ return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
   default: assert(0); return 0;
-  }  
+  }
}
else {
+  /* See GL_ARB_vertex_type_2_10_10_10_rev */
+  /* W/A: the hardware doesn't really support the formats we'd
+   * like to use here, so upload everything as UINT and fix
+   * it in the shader */
+  if (type == GL_INT_2_10_10_10_REV || type == 
GL_UNSIGNED_INT_2_10_10_10_REV) {
+ assert(size == 4);
+return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
+  }
   assert(format == GL_RGBA); /* sanity check */
   switch (type) {
   case GL_DOUBLE: return double_types[size];
-- 
1.7.12.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] i965: Generalize GL_FIXED VS w/a support

2012-10-21 Thread Chris Forbes
Next few patches build on this to add other workarounds
for packed formats.

Signed-off-by: Chris Forbes 
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 14 +++---
 src/mesa/drivers/dri/i965/brw_vs.c |  9 +
 src/mesa/drivers/dri/i965/brw_vs.h | 14 +++---
 src/mesa/drivers/dri/i965/brw_vs_emit.c| 20 +++-
 4 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index f54c49e..dba0a82 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -800,13 +800,13 @@ vec4_visitor::visit(ir_variable *ir)
* come in as floating point conversions of the integer values.
*/
   for (int i = ir->location; i < ir->location + type_size(ir->type); i++) {
-if (!c->key.gl_fixed_input_size[i])
-   continue;
-
-dst_reg dst = *reg;
- dst.type = brw_type_for_base_type(ir->type);
-dst.writemask = (1 << c->key.gl_fixed_input_size[i]) - 1;
-emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
+ uint8_t wa_flags = c->key.gl_attrib_wa_flags[i];
+ if (wa_flags & BRW_ATTRIB_WA_COMPONENTS) {
+dst_reg dst = *reg;
+dst.type = brw_type_for_base_type(ir->type);
+dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENTS)) - 1;
+emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
+ }
   }
   break;
 
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
b/src/mesa/drivers/dri/i965/brw_vs.c
index 4e95074..c31092d 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -381,9 +381,9 @@ brw_vs_debug_recompile(struct brw_context *brw,
}
 
for (unsigned int i = 0; i < VERT_ATTRIB_MAX; i++) {
-  found |= key_debug("GL_FIXED rescaling",
- old_key->gl_fixed_input_size[i],
- key->gl_fixed_input_size[i]);
+  found |= key_debug("Vertex attrib w/a flags",
+ old_key->gl_attrib_wa_flags[i],
+ key->gl_attrib_wa_flags[i]);
}
 
found |= key_debug("user clip flags",
@@ -465,9 +465,10 @@ static void brw_upload_vs_prog(struct brw_context *brw)
 
/* BRW_NEW_VERTICES */
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+  /* TODO: flag w/a for packed vertex formats here too */
   if (vp->program.Base.InputsRead & BITFIELD64_BIT(i) &&
  brw->vb.inputs[i].glarray->Type == GL_FIXED) {
-key.gl_fixed_input_size[i] = brw->vb.inputs[i].glarray->Size;
+key.gl_attrib_wa_flags[i] = brw->vb.inputs[i].glarray->Size;
   }
}
 
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h 
b/src/mesa/drivers/dri/i965/brw_vs.h
index adeff7f..9da4cb0 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -39,13 +39,21 @@
 #include "brw_program.h"
 #include "program/program.h"
 
+/* fixup bits for gl_packed_input_flags,
+ * to enable various VS workarounds */
+#define BRW_ATTRIB_WA_COMPONENTS7  /* mask for GL_FIXED scale channel 
count */
+#define BRW_ATTRIB_WA_NORMALIZE 8  /* normalize in shader */
+#define BRW_ATTRIB_WA_BGRA  16 /* swap r/b channels in shader */
+#define BRW_ATTRIB_WA_SIGN  32 /* interpret as signed in shader */
+#define BRW_ATTRIB_WA_SCALE 64 /* interpret as scaled in shader */
 
 struct brw_vs_prog_key {
GLuint program_string_id;
-   /**
-* Number of channels of the vertex attribute that need GL_FIXED rescaling
+
+   /*
+* Per-attribute workaround flags
 */
-   uint8_t gl_fixed_input_size[VERT_ATTRIB_MAX];
+   uint8_t gl_attrib_wa_flags[VERT_ATTRIB_MAX];
 
/**
 * True if at least one clip flag is enabled, regardless of whether the
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c 
b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 02239b4..a4742c7 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1582,22 +1582,24 @@ accumulator_contains(struct brw_vs_compile *c, struct 
brw_reg val)
 }
 
 static void
-brw_vs_rescale_gl_fixed(struct brw_vs_compile *c)
+brw_vs_apply_attrib_wa(struct brw_vs_compile *c)
 {
struct brw_compile *p = &c->func;
int i;
 
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+  uint8_t wa_flags = c->key.gl_attrib_wa_flags[i];
   if (!(c->prog_data.inputs_read & BITFIELD64_BIT(i)))
-continue;
+ continue;
 
-  if (c->key.gl_fixed_input_size[i] != 0) {
-struct brw_reg reg = c->regs[PROGRAM_INPUT][i];
-
-brw_MUL(p,
-brw_writemask(reg, (1 << c->key.gl_fixed_input_size[i]) - 1),
-reg, brw_imm_f(1.0 / 65536.0));
+  if (wa_flags & BRW_ATTRIB_WA_COMPONENTS) {
+ struct brw_reg reg = c->regs[PROGRAM_INPUT][i];
+ brw_MUL(p,
+brw_writemask(reg, (1 << (wa

[Mesa-dev] [PATCH 4/6] i965: set attribute w/a bits for packed formats

2012-10-21 Thread Chris Forbes
Flag the need for various workarounds to be applied by
the vertex shader.

Signed-off-by: Chris Forbes 
---
 src/mesa/drivers/dri/i965/brw_vs.c | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
b/src/mesa/drivers/dri/i965/brw_vs.c
index c31092d..1cf6394 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -465,10 +465,32 @@ static void brw_upload_vs_prog(struct brw_context *brw)
 
/* BRW_NEW_VERTICES */
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
-  /* TODO: flag w/a for packed vertex formats here too */
-  if (vp->program.Base.InputsRead & BITFIELD64_BIT(i) &&
- brw->vb.inputs[i].glarray->Type == GL_FIXED) {
-key.gl_attrib_wa_flags[i] = brw->vb.inputs[i].glarray->Size;
+  if (vp->program.Base.InputsRead & BITFIELD64_BIT(i)) {
+ uint8_t wa_flags = 0;
+
+ switch (brw->vb.inputs[i].glarray->Type) {
+
+ case GL_FIXED:
+wa_flags = brw->vb.inputs[i].glarray->Size;
+break;
+
+ case GL_INT_2_10_10_10_REV:
+wa_flags |= BRW_ATTRIB_WA_SIGN;
+/* fallthough */
+
+ case GL_UNSIGNED_INT_2_10_10_10_REV:
+if (brw->vb.inputs[i].glarray->Format == GL_BGRA)
+   wa_flags |= BRW_ATTRIB_WA_BGRA;
+
+if (brw->vb.inputs[i].glarray->Normalized)
+   wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
+else if (!brw->vb.inputs[i].glarray->Integer)
+   wa_flags |= BRW_ATTRIB_WA_SCALE;
+
+break;
+ }
+
+ key.gl_attrib_wa_flags[i] = wa_flags;
   }
}
 
-- 
1.7.12.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] i965: emit w/a for packed attribute formats in VS

2012-10-21 Thread Chris Forbes
Implements BGRA swizzle, sign recovery, and normalization
as required by ARB_vertex_type_10_10_10_2_rev.

This patch only adds the support to the "old" VS backend;
this is what is tested by the piglit tests. Port to the new
VS backend is still to come.

Normalization is sloppy, and will be revisited for final version.

Signed-off-by: Chris Forbes 
---
 src/mesa/drivers/dri/i965/brw_vs_emit.c | 49 +++--
 1 file changed, 47 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c 
b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index a4742c7..bfb617a 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1581,26 +1581,71 @@ accumulator_contains(struct brw_vs_compile *c, struct 
brw_reg val)
}
 }
 
+/* apply various attribute format workarounds */
 static void
 brw_vs_apply_attrib_wa(struct brw_vs_compile *c)
 {
struct brw_compile *p = &c->func;
+   struct brw_reg shift_tmp;
+   struct brw_reg shift_tmp_ud = retype(shift_tmp, BRW_REGISTER_TYPE_UD);
int i;
+   int any_sign_recovery = 0;
+
+   for (i = 0; i < VERT_ATTRIB_MAX; i++)
+  if (c->prog_data.inputs_read & BITFIELD64_BIT(i))
+ if (c->key.gl_attrib_wa_flags[i] & BRW_ATTRIB_WA_SIGN)
+any_sign_recovery = 1;
+
+   /* set up the shift value for sign recovery if any attribs needed it */
+   if (any_sign_recovery) {
+  shift_tmp = get_tmp(c);
+  brw_MOV(p, brw_writemask(shift_tmp_ud, WRITEMASK_XYZ), brw_imm_ud(22));
+  brw_MOV(p, brw_writemask(shift_tmp_ud, WRITEMASK_W), brw_imm_ud(30));
+   }
 
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
   uint8_t wa_flags = c->key.gl_attrib_wa_flags[i];
+  struct brw_reg reg = c->regs[PROGRAM_INPUT][i];
+  struct brw_reg reg_d = retype(reg, BRW_REGISTER_TYPE_D);
+  struct brw_reg reg_ud = retype(reg, BRW_REGISTER_TYPE_UD);
+
   if (!(c->prog_data.inputs_read & BITFIELD64_BIT(i)))
  continue;
 
   if (wa_flags & BRW_ATTRIB_WA_COMPONENTS) {
- struct brw_reg reg = c->regs[PROGRAM_INPUT][i];
  brw_MUL(p,
 brw_writemask(reg, (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENTS)) - 
1),
 reg, brw_imm_f(1.0 / 65536.0));
   }
 
-  /* TODO: emit other packed vertex attrib w/a shader code here. */
+  if (wa_flags & BRW_ATTRIB_WA_SIGN) {
+ brw_SHL(p, reg_ud, reg_ud, shift_tmp_ud);
+ brw_ASR(p, reg_d, reg_d, shift_tmp_ud);
+  }
+
+  if (wa_flags & BRW_ATTRIB_WA_BGRA) {
+ brw_MOV(p, reg_ud, brw_swizzle(reg_ud, 2,1,0,3));
+  }
+
+  if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) {
+ /* normalize according to GL 3.2 spec eqn 2.2, 2.3? this is sloppy. */
+ brw_MOV(p, reg, (wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud);
+ brw_MUL(p, brw_writemask(reg, WRITEMASK_XYZ), reg,
+   (wa_flags & BRW_ATTRIB_WA_SIGN) ? brw_imm_f(1.0 / 512.0)
+  : brw_imm_f(1.0 / 1024.0));
+ if (~wa_flags & BRW_ATTRIB_WA_SIGN)
+brw_MUL(p, brw_writemask(reg, WRITEMASK_W), reg,
+   brw_imm_f(1.0 / 3.0));
+  }
+
+  if (wa_flags & BRW_ATTRIB_WA_SCALE) {
+ /* just convert from int to float */
+ brw_MOV(p, reg, (wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud);
+  }
}
+
+   if (any_sign_recovery)
+  release_tmp(c, shift_tmp_ud);
 }
 
 /* Emit the vertex program instructions here.
-- 
1.7.12.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] i965: enable ARB_vertex_type_2_10_10_10_rev

2012-10-21 Thread Chris Forbes
Enabled on Gen4+.

Signed-off-by: Chris Forbes 
---
 docs/GL3.txt  | 2 +-
 src/mesa/drivers/dri/intel/intel_extensions.c | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 876165f..6e66e55 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -82,7 +82,7 @@ GL_ARB_texture_rgb10_a2ui DONE 
(i965, r600)
 GL_ARB_texture_swizzleDONE (same as EXT 
version) (i965, r300, r600, swrast)
 GL_ARB_timer_queryDONE (i965)
 GL_ARB_instanced_arrays   DONE (i965, r300, r600)
-GL_ARB_vertex_type_2_10_10_10_rev DONE (r600)
+GL_ARB_vertex_type_2_10_10_10_rev DONE (i965, r600)
 
 
 GL 4.0:
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c 
b/src/mesa/drivers/dri/intel/intel_extensions.c
index ec99c3e..c025d32 100755
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -89,6 +89,9 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.OES_compressed_ETC1_RGB8_texture = true;
ctx->Extensions.ARB_texture_rgb10_a2ui = true;
 
+   if (intel->gen >= 4)
+   ctx->Extensions.ARB_vertex_type_2_10_10_10_rev = true;
+
if (intel->gen >= 6)
   if (ctx->API == API_OPENGL_CORE) {
  ctx->Const.GLSLVersion = 140;
-- 
1.7.12.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev