On 09/10/2015 02:50 AM, Jason Ekstrand wrote: > Previously, we did this thing with keeping track of a separate start_idx > which was different from the iteration variable. I think this was a relic > of the way that GLSL IR implements writemasks. In NIR, if a given bit in > the writemask is unset then that channel is just "unused", not missing. In > particular, a vec4 operation with a writemask of 0xd will use sources 0, 2, > and 3 and leave source 1 alone. We can simplify things a good deal (and > make them correct) by removing this "compacting" step. >
Indeed, much clearer now. Reviewed-by: Eduardo Lima Mitev <el...@igalia.com> > Cc: Eric Anholt <e...@anholt.net> > --- > src/glsl/nir/nir_lower_vec_to_movs.c | 33 +++++++++++++-------------------- > 1 file changed, 13 insertions(+), 20 deletions(-) > > diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c > b/src/glsl/nir/nir_lower_vec_to_movs.c > index 7d31e36..ed8ec9b 100644 > --- a/src/glsl/nir/nir_lower_vec_to_movs.c > +++ b/src/glsl/nir/nir_lower_vec_to_movs.c > @@ -58,29 +58,25 @@ src_matches_dest_reg(nir_dest *dest, nir_src *src) > * which ones have been processed. > */ > static unsigned > -insert_mov(nir_alu_instr *vec, unsigned start_channel, > - unsigned start_src_idx, nir_shader *shader) > +insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) > { > - unsigned src_idx = start_src_idx; > - assert(src_idx < nir_op_infos[vec->op].num_inputs); > + assert(start_idx < nir_op_infos[vec->op].num_inputs); > > nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_imov); > - nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mov); > + nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov); > nir_alu_dest_copy(&mov->dest, &vec->dest, mov); > > - mov->dest.write_mask = (1u << start_channel); > - mov->src[0].swizzle[start_channel] = vec->src[src_idx].swizzle[0]; > - src_idx++; > + mov->dest.write_mask = (1u << start_idx); > + mov->src[0].swizzle[start_idx] = vec->src[start_idx].swizzle[0]; > > - for (unsigned i = start_channel + 1; i < 4; i++) { > + for (unsigned i = start_idx + 1; i < 4; i++) { > if (!(vec->dest.write_mask & (1 << i))) > continue; > > - if (nir_srcs_equal(vec->src[src_idx].src, > vec->src[start_src_idx].src)) { > + if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src)) { > mov->dest.write_mask |= (1 << i); > - mov->src[0].swizzle[i] = vec->src[src_idx].swizzle[0]; > + mov->src[0].swizzle[i] = vec->src[i].swizzle[0]; > } > - src_idx++; > } > > nir_instr_insert_before(&vec->instr, &mov->instr); > @@ -129,26 +125,23 @@ lower_vec_to_movs_block(nir_block *block, void > *void_state) > * destination reg, in case other values we're populating in the dest > * might overwrite them. > */ > - for (unsigned i = 0, src_idx = 0; i < 4; i++) { > + for (unsigned i = 0; i < 4; i++) { > if (!(vec->dest.write_mask & (1 << i))) > continue; > > - if (src_matches_dest_reg(&vec->dest.dest, &vec->src[src_idx].src)) { > - finished_write_mask |= insert_mov(vec, i, src_idx, > state->shader); > + if (src_matches_dest_reg(&vec->dest.dest, &vec->src[i].src)) { > + finished_write_mask |= insert_mov(vec, i, state->shader); > break; > } > - src_idx++; > } > > /* Now, emit MOVs for all the other src channels. */ > - for (unsigned i = 0, src_idx = 0; i < 4; i++) { > + for (unsigned i = 0; i < 4; i++) { > if (!(vec->dest.write_mask & (1 << i))) > continue; > > if (!(finished_write_mask & (1 << i))) > - finished_write_mask |= insert_mov(vec, i, src_idx, > state->shader); > - > - src_idx++; > + finished_write_mask |= insert_mov(vec, i, state->shader); > } > > nir_instr_remove(&vec->instr); > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev