On Wed, Dec 2, 2015 at 4:15 PM, Kenneth Graunke <kenn...@whitecape.org> wrote: > Tessellation control shaders need to be careful when writing outputs. > Because multiple threads can concurrently write the same output > variables, we need to only write the exact components we were told. > > Traditionally, for sub-vector writes, we've read the whole vector, > updated the temporary, and written the whole vector back. This breaks > down with concurrent access. > > This patch prepares the way for a solution by adding a writemask field > to store_var intrinsics, as well as the other store intrinsics. It then > updates all produces to emit a writemask of "all channels enabled". It > updates nir_lower_io to copy the writemask to output store intrinsics. > > Finally, it updates nir_lower_vars_to_ssa to handle partial writemasks > by doing a read-modify-write cycle (which is safe, because local > variables are specific to a single thread). > > This should have no functional change, since no one actually emits > partial writemasks yet. > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/gallium/auxiliary/nir/tgsi_to_nir.c | 1 + > .../drivers/freedreno/ir3/ir3_compiler_nir.c | 6 +++ > src/glsl/nir/glsl_to_nir.cpp | 2 + > src/glsl/nir/nir_builder.h | 4 +- > src/glsl/nir/nir_intrinsics.h | 14 +++---- > src/glsl/nir/nir_lower_gs_intrinsics.c | 3 +- > src/glsl/nir/nir_lower_io.c | 2 + > src/glsl/nir/nir_lower_locals_to_regs.c | 2 +- > src/glsl/nir/nir_lower_var_copies.c | 1 + > src/glsl/nir/nir_lower_vars_to_ssa.c | 46 > ++++++++++++++++------ > src/glsl/nir/nir_validate.c | 1 + > src/mesa/program/prog_to_nir.c | 2 + > 12 files changed, 63 insertions(+), 21 deletions(-) > > Technically, I don't think I need the ir3 changes here - it should work > without any changes. I think I was just overzealously preparing things > to handle writemasks before I realized there shouldn't be any. > > But, it might be worth doing anyway. Not sure. > > Jason and I have already talked about the fact that we're conflicting. > I like what he's doing, and he suggested this; we just need to figure out > what lands first. I decided to send these out anyway for people to look > at and start reviewing, since there's a lot to do there. We'll sort it > out in v2. > > diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c > b/src/gallium/auxiliary/nir/tgsi_to_nir.c > index 86c2ffa..f5547c8 100644 > --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c > +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c > @@ -1894,6 +1894,7 @@ ttn_emit_instruction(struct ttn_compile *c) > &tgsi_dst->Indirect : NULL; > > store->num_components = 4; > + store->const_index[0] = 0xf; > store->variables[0] = ttn_array_deref(c, store, var, offset, indirect); > store->src[0] = nir_src_for_reg(dest.dest.reg.reg); > > diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c > b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c > index 8617704..3beed0e 100644 > --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c > +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c > @@ -1314,6 +1314,9 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, > nir_intrinsic_instr *intr) > case nir_deref_array_type_direct: > /* direct access does not require anything special: */ > for (int i = 0; i < intr->num_components; i++) { > + if (!(intr->const_index[0] & (1 << i))) > + continue; > + > unsigned n = darr->base_offset * 4 + i; > compile_assert(ctx, n < arr->length); > arr->arr[n] = src[i]; > @@ -1326,6 +1329,9 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, > nir_intrinsic_instr *intr) > struct ir3_instruction *addr = > get_addr(ctx, get_src(ctx, > &darr->indirect)[0]); > for (int i = 0; i < intr->num_components; i++) { > + if (!(intr->const_index[0] & (1 << i))) > + continue; > + > struct ir3_instruction *store; > unsigned n = darr->base_offset * 4 + i; > compile_assert(ctx, n < arr->length); > diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp > index 45d045c..5c84b0d 100644 > --- a/src/glsl/nir/glsl_to_nir.cpp > +++ b/src/glsl/nir/glsl_to_nir.cpp > @@ -982,6 +982,7 @@ nir_visitor::visit(ir_call *ir) > nir_intrinsic_instr *store_instr = > nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); > store_instr->num_components = > ir->return_deref->type->vector_elements; > + store_instr->const_index[0] = (1 << store_instr->num_components) - > 1; > > store_instr->variables[0] = > evaluate_deref(&store_instr->instr, ir->return_deref); > @@ -1080,6 +1081,7 @@ nir_visitor::visit(ir_assignment *ir) > nir_intrinsic_instr *store = > nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var); > store->num_components = ir->lhs->type->vector_elements; > + store->const_index[0] = (1 << store->num_components) - 1; > nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref); > store->variables[0] = nir_deref_as_var(store_deref); > store->src[0] = nir_src_for_ssa(src); > diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h > index b909f483..6478aad 100644 > --- a/src/glsl/nir/nir_builder.h > +++ b/src/glsl/nir/nir_builder.h > @@ -310,13 +310,15 @@ nir_load_var(nir_builder *build, nir_variable *var) > } > > static inline void > -nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value) > +nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value, > + unsigned writemask) > { > const unsigned num_components = glsl_get_vector_elements(var->type); > > nir_intrinsic_instr *store = > nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var); > store->num_components = num_components; > + store->const_index[0] = writemask; > store->variables[0] = nir_deref_var_create(store, var); > store->src[0] = nir_src_for_ssa(value); > nir_builder_instr_insert(build, &store->instr); > diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h > index b2565c5..013f2ac 100644 > --- a/src/glsl/nir/nir_intrinsics.h > +++ b/src/glsl/nir/nir_intrinsics.h > @@ -43,7 +43,7 @@ > > > INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE) > -INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0) > +INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0) > INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0) > > /* > @@ -266,16 +266,16 @@ LOAD(per_vertex_output, 1, 1, > NIR_INTRINSIC_CAN_ELIMINATE) > * block index and an extra index with the writemask to use. > */ > > -#define STORE(name, extra_srcs, extra_srcs_size, extra_indices, flags) \ > +#define STORE(name, extra_srcs, extra_srcs_size, flags) \ > INTRINSIC(store_##name, 1 + extra_srcs, \ > ARR(0, extra_srcs_size, extra_srcs_size, extra_srcs_size), \ > - false, 0, 0, 1 + extra_indices, flags) \ > + false, 0, 0, 2, flags) \ > INTRINSIC(store_##name##_indirect, 2 + extra_srcs, \ > ARR(0, 1, extra_srcs_size, extra_srcs_size), \ > - false, 0, 0, 1 + extra_indices, flags) > + false, 0, 0, 2, flags) > > -STORE(output, 0, 0, 0, 0) > -STORE(per_vertex_output, 1, 1, 0, 0) > -STORE(ssbo, 1, 1, 1, 0) > +STORE(output, 0, 0, 0) > +STORE(per_vertex_output, 1, 1, 0) > +STORE(ssbo, 1, 1, 0) > > LAST_INTRINSIC(store_ssbo_indirect) > diff --git a/src/glsl/nir/nir_lower_gs_intrinsics.c > b/src/glsl/nir/nir_lower_gs_intrinsics.c > index e0d0678..7cde839 100644 > --- a/src/glsl/nir/nir_lower_gs_intrinsics.c > +++ b/src/glsl/nir/nir_lower_gs_intrinsics.c > @@ -99,7 +99,8 @@ rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct > state *state) > > /* Increment the vertex count by 1 */ > nir_store_var(b, state->vertex_count_var, > - nir_iadd(b, count, nir_imm_int(b, 1))); > + nir_iadd(b, count, nir_imm_int(b, 1)), > + 1); /* .x */
I'd rather 0x1 since it is a bitfield. > > nir_instr_remove(&intrin->instr); > > diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c > index 5683e69..7bbc333 100644 > --- a/src/glsl/nir/nir_lower_io.c > +++ b/src/glsl/nir/nir_lower_io.c > @@ -279,6 +279,8 @@ nir_lower_io_block(nir_block *block, void *void_state) > store_op); > store->num_components = intrin->num_components; > store->const_index[0] = offset; > + /* Copy the writemask */ > + store->const_index[1] = intrin->const_index[0]; > > nir_src_copy(&store->src[0], &intrin->src[0], store); > > diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c > b/src/glsl/nir/nir_lower_locals_to_regs.c > index 17b53ca..3e21ac0 100644 > --- a/src/glsl/nir/nir_lower_locals_to_regs.c > +++ b/src/glsl/nir/nir_lower_locals_to_regs.c > @@ -243,7 +243,7 @@ lower_locals_to_regs_block(nir_block *block, void > *void_state) > > nir_alu_instr *mov = nir_alu_instr_create(state->shader, > nir_op_imov); > nir_src_copy(&mov->src[0].src, &intrin->src[0], mov); > - mov->dest.write_mask = (1 << intrin->num_components) - 1; > + mov->dest.write_mask = intrin->const_index[0]; > mov->dest.dest.is_ssa = false; > mov->dest.dest.reg.reg = reg_src.reg.reg; > mov->dest.dest.reg.base_offset = reg_src.reg.base_offset; > diff --git a/src/glsl/nir/nir_lower_var_copies.c > b/src/glsl/nir/nir_lower_var_copies.c > index 98c107a..a9017de 100644 > --- a/src/glsl/nir/nir_lower_var_copies.c > +++ b/src/glsl/nir/nir_lower_var_copies.c > @@ -128,6 +128,7 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr, > nir_intrinsic_instr *store = > nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var); > store->num_components = num_components; > + store->const_index[0] = (1 << num_components) - 1; > store->variables[0] = nir_deref_as_var(nir_copy_deref(store, > &dest_head->deref)); > > store->src[0].is_ssa = true; > diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c > b/src/glsl/nir/nir_lower_vars_to_ssa.c > index e670dbd..3ec0e1d 100644 > --- a/src/glsl/nir/nir_lower_vars_to_ssa.c > +++ b/src/glsl/nir/nir_lower_vars_to_ssa.c > @@ -26,6 +26,7 @@ > */ > > #include "nir.h" > +#include "nir_builder.h" > #include "nir_vla.h" > > > @@ -590,6 +591,9 @@ add_phi_sources(nir_block *block, nir_block *pred, > static bool > rename_variables_block(nir_block *block, struct lower_variables_state *state) > { > + nir_builder b; > + nir_builder_init(&b, state->impl); > + > nir_foreach_instr_safe(block, instr) { > if (instr->type == nir_instr_type_phi) { > nir_phi_instr *phi = nir_instr_as_phi(instr); > @@ -675,20 +679,40 @@ rename_variables_block(nir_block *block, struct > lower_variables_state *state) > > assert(intrin->src[0].is_ssa); > > - nir_alu_instr *mov = nir_alu_instr_create(state->shader, > - nir_op_imov); > - mov->src[0].src.is_ssa = true; > - mov->src[0].src.ssa = intrin->src[0].ssa; > - for (unsigned i = intrin->num_components; i < 4; i++) > - mov->src[0].swizzle[i] = 0; > + nir_ssa_def *new_def; > + b.cursor = nir_before_instr(&intrin->instr); > > - mov->dest.write_mask = (1 << intrin->num_components) - 1; > - nir_ssa_dest_init(&mov->instr, &mov->dest.dest, > - intrin->num_components, NULL); > + if (intrin->const_index[0] == (1 << intrin->num_components) - 1) > { > + /* Whole variable store - just copy the source. Note that > + * intrin->num_components and > intrin->src[0].ssa->num_components > + * may differ. > + */ > + unsigned swiz[4]; > + for (unsigned i = 0; i < 4; i++) > + swiz[i] = i < intrin->num_components ? i : 0; > + > + new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz, > + intrin->num_components, false); > + } else { > + nir_ssa_def *old_def = get_ssa_def_for_block(node, block, > state); > + /* For writemasked store_var intrinsics, we combine the newly > + * written values with the existing contents of unwritten > + * channels, creating a new SSA value for the whole vector. > + */ > + nir_ssa_def *srcs[4]; > + for (unsigned i = 0; i < intrin->num_components; i++) { > + if (intrin->const_index[0] & (1 << i)) { > + srcs[i] = nir_channel(&b, intrin->src[0].ssa, i); > + } else { > + srcs[i] = nir_channel(&b, old_def, i); > + } > + } > + new_def = nir_vec(&b, srcs, intrin->num_components); > + } We've never used the builder in vars_to_ssa before and it does generate a lot of movs... I guess we do run copy-prop afterwards anyway, so it's probably ok. > > - nir_instr_insert_before(&intrin->instr, &mov->instr); > + assert(new_def->num_components == intrin->num_components); > > - def_stack_push(node, &mov->dest.dest.ssa, state); > + def_stack_push(node, new_def, state); > > /* We'll wait to remove the instruction until the next pass > * where we pop the node we just pushed back off the stack. > diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c > index 06879d6..da92055 100644 > --- a/src/glsl/nir/nir_validate.c > +++ b/src/glsl/nir/nir_validate.c > @@ -417,6 +417,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, > validate_state *state) > assert(instr->variables[0]->var->data.mode != nir_var_shader_in && > instr->variables[0]->var->data.mode != nir_var_uniform && > instr->variables[0]->var->data.mode != nir_var_shader_storage); > + assert((instr->const_index[0] & ~((1 << instr->num_components) - 1)) > == 0); For this patch, could we please have assert(instr->const_index[0] == (1 << instr->num_components) - 1); to make sure we set it everywhere. As is, the condition you gave above will pass if they leave const_index[0] as 0. We can loosten the restriction to the one above when we actually start using it. By and large, this looks pretty good to me. > break; > } > case nir_intrinsic_copy_var: > diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c > index 539e3c0..48f47b8 100644 > --- a/src/mesa/program/prog_to_nir.c > +++ b/src/mesa/program/prog_to_nir.c > @@ -927,6 +927,7 @@ ptn_add_output_stores(struct ptn_compile *c) > nir_intrinsic_instr *store = > nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); > store->num_components = glsl_get_vector_elements(var->type); > + store->const_index[0] = (1 << store->num_components) - 1; > store->variables[0] = > nir_deref_var_create(store, c->output_vars[var->data.location]); > > @@ -997,6 +998,7 @@ setup_registers_and_variables(struct ptn_compile *c) > nir_intrinsic_instr *store = > nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); > store->num_components = 4; > + store->const_index[0] = WRITEMASK_XYZW; > store->variables[0] = nir_deref_var_create(store, fullvar); > store->src[0] = nir_src_for_ssa(f001); > nir_builder_instr_insert(b, &store->instr); > -- > 2.6.2 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev