When we compute the output swizzle we want to consider the number of components in the add operation. So far we were using the writemask of the multiplication for this instead, which is not correct. --- src/glsl/nir/nir_opt_peephole_ffma.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-)
diff --git a/src/glsl/nir/nir_opt_peephole_ffma.c b/src/glsl/nir/nir_opt_peephole_ffma.c index b430eac..798506b 100644 --- a/src/glsl/nir/nir_opt_peephole_ffma.c +++ b/src/glsl/nir/nir_opt_peephole_ffma.c @@ -73,7 +73,8 @@ are_all_uses_fadd(nir_ssa_def *def) } static nir_alu_instr * -get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs) +get_mul_for_src(nir_alu_src *src, int num_components, + uint8_t swizzle[4], bool *negate, bool *abs) { assert(src->src.is_ssa && !src->abs && !src->negate); @@ -85,16 +86,16 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs) switch (alu->op) { case nir_op_imov: case nir_op_fmov: - alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs); + alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); break; case nir_op_fneg: - alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs); + alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); *negate = !*negate; break; case nir_op_fabs: - alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs); + alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); *negate = false; *abs = true; break; @@ -115,12 +116,8 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs) if (!alu) return NULL; - for (unsigned i = 0; i < 4; i++) { - if (!(alu->dest.write_mask & (1 << i))) - break; - + for (unsigned i = 0; i < num_components; i++) swizzle[i] = swizzle[src->swizzle[i]]; - } return alu; } @@ -160,7 +157,9 @@ nir_opt_peephole_ffma_block(nir_block *block, void *void_state) negate = false; abs = false; - mul = get_mul_for_src(&add->src[add_mul_src], swizzle, &negate, &abs); + mul = get_mul_for_src(&add->src[add_mul_src], + add->dest.dest.ssa.num_components, + swizzle, &negate, &abs); if (mul != NULL) break; -- 1.9.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev