On 03/18/2015 01:18 AM, Tapani Pälli wrote:
Hi;
Test looks good and passes fine on HSW;
Reviewed-by: Tapani Pälli <[email protected]>
With INTEL_DEVID_OVERRIDE (0x0046 or 0x2A42) though it does not seem to
hit the mentioned optimization, since the comparison operation is
ir_binop_gequal, this is on HSW desktop machine.
Hm... that is odd. I ran this test as:
INTEL_DEVID_OVERRIDE=0x0046 INTEL_DEBUG=fs bin/shader_runner
tests/spec/glsl-1.10/execution/fs-step.shader_test -auto
At the commit in my local tree before the optimization is added, I get:
Native code for unnamed fragment shader 3
SIMD8 shader: 14 instructions. 0 loops. Compacted 224 to 208 bytes (7%)
START B0
mul(8) g6<1>F g2<0,1,0>F g2.1<0,1,0>F { align1
compacted };
cmp.ge.f0(8) g4<1>F g6<8,8,1>F 0F { align1 };
and(8) g4<1>D g4<8,8,1>D 1D { align1 };
and(8) g4<1>D -g4<8,8,1>D 0x3f800000UD { align1 };
mul(8) g5<1>F g4<8,8,1>F g2.2<0,1,0>F { align1 };
add(8) m3<1>F g5<8,8,1>F g2.6<0,1,0>F { align1 };
mul(8) g5<1>F g4<8,8,1>F g2.3<0,1,0>F { align1
compacted };
add(8) m4<1>F g5<8,8,1>F g2.7<0,1,0>F { align1 };
mul(8) g5<1>F g4<8,8,1>F g2.4<0,1,0>F { align1
compacted };
add(8) m5<1>F g5<8,8,1>F g3<0,1,0>F { align1 };
mul(8) g4<1>F g4<8,8,1>F g2.5<0,1,0>F { align1 };
add(8) m6<1>F g4<8,8,1>F g3.1<0,1,0>F { align1 };
mov(8) m2<1>F g1<8,8,1>F { align1 nomask
};
nop ;
send(8) 1 null g0<8,8,1>UW
write RT write SIMD8 LastRT Surface = 0 mlen 6
rlen 0 { align1 EOT };
END B0
At the next commit I get:
Native code for unnamed fragment shader 3
SIMD8 shader: 13 instructions. 0 loops. Compacted 208 to 192 bytes (8%)
START B0
mul.ge.f0(8) null g2<0,1,0>F g2.1<0,1,0>F { align1
compacted };
mov(8) g4<1>F 1F { align1 };
(+f0) sel(8) g4<1>F g4<8,8,1>F 0F { align1 };
mul(8) g5<1>F g4<8,8,1>F g2.2<0,1,0>F { align1 };
add(8) m3<1>F g5<8,8,1>F g2.6<0,1,0>F { align1 };
mul(8) g5<1>F g4<8,8,1>F g2.3<0,1,0>F { align1
compacted };
add(8) m4<1>F g5<8,8,1>F g2.7<0,1,0>F { align1 };
mul(8) g5<1>F g4<8,8,1>F g2.4<0,1,0>F { align1
compacted };
add(8) m5<1>F g5<8,8,1>F g3<0,1,0>F { align1 };
mul(8) g4<1>F g4<8,8,1>F g2.5<0,1,0>F { align1 };
add(8) m6<1>F g4<8,8,1>F g3.1<0,1,0>F { align1 };
mov(8) m2<1>F g1<8,8,1>F { align1 nomask
};
nop ;
send(8) 1 null g0<8,8,1>UW
write RT write SIMD8 LastRT Surface = 0 mlen 6
rlen 0 { align1 EOT };
END B0
How does that compare with what you get?