From: Ian Romanick <ian.d.roman...@intel.com>

SEQ and SNE are not native i915 instructions, so they each generate at
least 3 instructions.  If both operands are uniforms or constants, we
get 5 instructions like:

                U[1] = MOV CONST[1]
                U[0].xyz = SGE CONST[0].xxxx, U[1]
                U[1] = MOV CONST[1].-x-y-z-w
                R[0].xyz = SGE CONST[0].-x-x-x-x, U[1]
                R[0].xyz = MUL R[0], U[0]

This code is stupid.  Instead of having the individual calls to
i915_emit_arith generate the moves to utemps, do it in the caller.  This
results in code like:

                U[1] = MOV CONST[1]
                U[0].xyz = SGE CONST[0].xxxx, U[1]
                R[0].xyz = SGE CONST[0].-x-x-x-x, U[1].-x-y-z-w
                R[0].xyz = MUL R[0], U[0]

This allows fs-temp-array-mat2-index-col-wr and
fs-temp-array-mat2-index-row-wr to fit in hardware limits (instead of
falling back to software rasterization).

NOTE: Without pending patches to the piglit tests, these tests will now
fail.  This is an unrelated, pre-existing issue.

Signed-off-by: Ian Romanick <ian.d.roman...@intel.com>
---
 src/mesa/drivers/dri/i915/i915_fragprog.c | 46 +++++++++++++++++++++++--------
 1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c 
b/src/mesa/drivers/dri/i915/i915_fragprog.c
index 930c2b8..c304d22 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -817,23 +817,34 @@ upload_program(struct i915_fragment_program *p)
         flags = get_result_flags(inst);
         dst = get_result_vector(p, inst);
 
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+
+         if (GET_UREG_TYPE(src0) == REG_TYPE_CONST
+             && GET_UREG_TYPE(src1) == REG_TYPE_CONST) {
+            unsigned tmp = i915_get_utemp(p);
+
+            i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
+                            src1, 0, 0);
+
+            src1 = tmp;
+         }
+
         /* tmp = src1 >= src2 */
         i915_emit_arith(p,
                         A0_SGE,
                         tmp,
                         flags, 0,
-                        src_vector(p, &inst->SrcReg[0], program),
-                        src_vector(p, &inst->SrcReg[1], program),
+                        src0,
+                        src1,
                         0);
         /* dst = src1 <= src2 */
         i915_emit_arith(p,
                         A0_SGE,
                         dst,
                         flags, 0,
-                        negate(src_vector(p, &inst->SrcReg[0], program),
-                               1, 1, 1, 1),
-                        negate(src_vector(p, &inst->SrcReg[1], program),
-                               1, 1, 1, 1),
+                        negate(src0, 1, 1, 1, 1),
+                        negate(src1, 1, 1, 1, 1),
                         0);
         /* dst = tmp && dst */
         i915_emit_arith(p,
@@ -966,23 +977,34 @@ upload_program(struct i915_fragment_program *p)
         flags = get_result_flags(inst);
         dst = get_result_vector(p, inst);
 
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+
+         if (GET_UREG_TYPE(src0) == REG_TYPE_CONST
+             && GET_UREG_TYPE(src1) == REG_TYPE_CONST) {
+            unsigned tmp = i915_get_utemp(p);
+
+            i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
+                            src1, 0, 0);
+
+            src1 = tmp;
+         }
+
         /* tmp = src1 < src2 */
         i915_emit_arith(p,
                         A0_SLT,
                         tmp,
                         flags, 0,
-                        src_vector(p, &inst->SrcReg[0], program),
-                        src_vector(p, &inst->SrcReg[1], program),
+                        src0,
+                        src1,
                         0);
         /* dst = src1 > src2 */
         i915_emit_arith(p,
                         A0_SLT,
                         dst,
                         flags, 0,
-                        negate(src_vector(p, &inst->SrcReg[0], program),
-                               1, 1, 1, 1),
-                        negate(src_vector(p, &inst->SrcReg[1], program),
-                               1, 1, 1, 1),
+                        negate(src0, 1, 1, 1, 1),
+                        negate(src1, 1, 1, 1, 1),
                         0);
         /* dst = tmp || dst */
         i915_emit_arith(p,
-- 
1.8.1.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to