It will soon be useful to have this as a separate step. --- src/compiler/nir/nir_lower_vec_to_movs.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-)
diff --git a/src/compiler/nir/nir_lower_vec_to_movs.c b/src/compiler/nir/nir_lower_vec_to_movs.c index 8b24376..610a362 100644 --- a/src/compiler/nir/nir_lower_vec_to_movs.c +++ b/src/compiler/nir/nir_lower_vec_to_movs.c @@ -171,12 +171,6 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx) return 0; } - /* Stash off all of the ALU instruction's swizzles. */ - uint8_t swizzles[4][4]; - for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++) - for (unsigned i = 0; i < 4; i++) - swizzles[j][i] = src_alu->src[j].swizzle[i]; - unsigned write_mask = 0; for (unsigned i = start_idx; i < 4; i++) { if (!(vec->dest.write_mask & (1 << i))) @@ -186,10 +180,22 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx) vec->src[i].src.ssa != &src_alu->dest.dest.ssa) continue; + write_mask |= 1 << i; + } + + /* Stash off all of the ALU instruction's swizzles. */ + uint8_t swizzles[4][4]; + for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++) + for (unsigned i = 0; i < 4; i++) + swizzles[j][i] = src_alu->src[j].swizzle[i]; + + for (unsigned i = start_idx; i < 4; i++) { + if (!(write_mask & (1 << i))) + continue; + /* At this point, the give vec source matchese up with the ALU * instruction so we can re-swizzle that component to match. */ - write_mask |= 1 << i; if (has_replicated_dest(src_alu)) { /* Since the destination is a single replicated value, we don't need * to do any reswizzling -- 2.5.0.400.gff86faf _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev