On 03/19/2015 12:06 AM, Ian Romanick wrote:
On 03/18/2015 01:18 AM, Tapani Pälli wrote:
Hi;

Test looks good and passes fine on HSW;

Reviewed-by: Tapani Pälli <[email protected]>


With INTEL_DEVID_OVERRIDE (0x0046 or 0x2A42) though it does not seem to
hit the mentioned optimization, since the comparison operation is
ir_binop_gequal, this is on HSW desktop machine.
Hm... that is odd.  I ran this test as:

INTEL_DEVID_OVERRIDE=0x0046 INTEL_DEBUG=fs bin/shader_runner 
tests/spec/glsl-1.10/execution/fs-step.shader_test -auto

At the commit in my local tree before the optimization is added, I get:

Native code for unnamed fragment shader 3
SIMD8 shader: 14 instructions. 0 loops. Compacted 224 to 208 bytes (7%)
    START B0
mul(8)          g6<1>F          g2<0,1,0>F      g2.1<0,1,0>F    { align1 
compacted };
cmp.ge.f0(8)    g4<1>F          g6<8,8,1>F      0F              { align1 };
and(8)          g4<1>D          g4<8,8,1>D      1D              { align1 };
and(8)          g4<1>D          -g4<8,8,1>D     0x3f800000UD    { align1 };
mul(8)          g5<1>F          g4<8,8,1>F      g2.2<0,1,0>F    { align1 };
add(8)          m3<1>F          g5<8,8,1>F      g2.6<0,1,0>F    { align1 };
mul(8)          g5<1>F          g4<8,8,1>F      g2.3<0,1,0>F    { align1 
compacted };
add(8)          m4<1>F          g5<8,8,1>F      g2.7<0,1,0>F    { align1 };
mul(8)          g5<1>F          g4<8,8,1>F      g2.4<0,1,0>F    { align1 
compacted };
add(8)          m5<1>F          g5<8,8,1>F      g3<0,1,0>F      { align1 };
mul(8)          g4<1>F          g4<8,8,1>F      g2.5<0,1,0>F    { align1 };
add(8)          m6<1>F          g4<8,8,1>F      g3.1<0,1,0>F    { align1 };
mov(8)          m2<1>F          g1<8,8,1>F                      { align1 nomask 
};
nop                                                             ;
send(8) 1       null            g0<8,8,1>UW
                             write RT write SIMD8 LastRT Surface = 0 mlen 6 
rlen 0 { align1 EOT };
    END B0

At the next commit I get:

Native code for unnamed fragment shader 3
SIMD8 shader: 13 instructions. 0 loops. Compacted 208 to 192 bytes (8%)
    START B0
mul.ge.f0(8)    null            g2<0,1,0>F      g2.1<0,1,0>F    { align1 
compacted };
mov(8)          g4<1>F          1F                              { align1 };
(+f0) sel(8)    g4<1>F          g4<8,8,1>F      0F              { align1 };
mul(8)          g5<1>F          g4<8,8,1>F      g2.2<0,1,0>F    { align1 };
add(8)          m3<1>F          g5<8,8,1>F      g2.6<0,1,0>F    { align1 };
mul(8)          g5<1>F          g4<8,8,1>F      g2.3<0,1,0>F    { align1 
compacted };
add(8)          m4<1>F          g5<8,8,1>F      g2.7<0,1,0>F    { align1 };
mul(8)          g5<1>F          g4<8,8,1>F      g2.4<0,1,0>F    { align1 
compacted };
add(8)          m5<1>F          g5<8,8,1>F      g3<0,1,0>F      { align1 };
mul(8)          g4<1>F          g4<8,8,1>F      g2.5<0,1,0>F    { align1 };
add(8)          m6<1>F          g4<8,8,1>F      g3.1<0,1,0>F    { align1 };
mov(8)          m2<1>F          g1<8,8,1>F                      { align1 nomask 
};
nop                                                             ;
send(8) 1       null            g0<8,8,1>UW
                             write RT write SIMD8 LastRT Surface = 0 mlen 6 
rlen 0 { align1 EOT };
    END B0

How does that compare with what you get?

Argh sorry, I've been too much fixated in to the equal|nequal case so I've missed the actual treatment to sel below. I get same result as you and now I understand how this works.

On 03/17/2015 11:51 PM, Ian Romanick wrote:
From: Ian Romanick <[email protected]>

This is a general step() test, but it is designed to tickle an
optimization path in the GEN4 and GEN5 code generation in the i965
driver.  This optimization tries to generate different code for
expressions like 'float(expr cmp 0)'.

Signed-off-by: Ian Romanick <[email protected]>
Cc: Tapani Palli <[email protected]>
---
   tests/spec/glsl-1.10/execution/fs-step.shader_test | 35
++++++++++++++++++++++
   1 file changed, 35 insertions(+)
   create mode 100644 tests/spec/glsl-1.10/execution/fs-step.shader_test

diff --git a/tests/spec/glsl-1.10/execution/fs-step.shader_test
b/tests/spec/glsl-1.10/execution/fs-step.shader_test
new file mode 100644
index 0000000..2ea0725
--- /dev/null
+++ b/tests/spec/glsl-1.10/execution/fs-step.shader_test
@@ -0,0 +1,35 @@
+[require]
+GLSL >= 1.10
+
+[vertex shader passthrough]
+
+[fragment shader]
+uniform float a;
+uniform float b;
+uniform vec4 color0;
+uniform vec4 color1;
+
+void main()
+{
+    /* This is a general step() test, but it is designed to tickle an
+     * optimization path in the GEN4 and GEN5 code generation in the
i965
+     * driver.  This optimization tries to generate different code for
+     * expressions like 'float(expr cmp 0)'.
+     */
+    gl_FragColor = step(0.0, a * b) * color0 + color1;
+}
+
+[test]
+uniform float a -1
+uniform float b 1
+uniform vec4 color0 1.0 -1.0 0.0 0.0
+uniform vec4 color1 0.0 1.0 0.0 1.0
+draw rect -1 -1 1 2
+relative probe rgba (0.25, 0.5) (0.0, 1.0, 0.0, 1.0)
+
+uniform float a 1
+uniform float b 1
+uniform vec4 color0 -1.0 1.0 0.0 0.0
+uniform vec4 color1 1.0 0.0 0.0 1.0
+draw rect 0 -1 1 2
+relative probe rgba (0.75, 0.5) (0.0, 1.0, 0.0, 1.0)

_______________________________________________
Piglit mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/piglit

Reply via email to