On Tue, Nov 10, 2015 at 1:21 AM, Kenneth Graunke <kenn...@whitecape.org> wrote: > Normally, we rely on nir_lower_outputs_to_temporaries to create shadow > variables for outputs, buffering the results and writing them all out > at the end of the program. However, this is infeasible for tessellation > control shader outputs. > > Tessellation control shaders can generate multiple output vertices, and > write per-vertex outputs. These are arrays indexed by the vertex > number; each thread only writes one element, but can read any other > element - including those being concurrently written by other threads. > The barrier() intrinsic synchronizes between threads. > > Even if we tried to shadow every output element (which is of dubious > value), we'd have to read updated values in at barrier() time, which > means we need to allow output reads. > > Most stages should continue using nir_lower_outputs_to_temporaries(), > but in theory drivers could choose not to if they really wanted. > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/glsl/nir/nir_intrinsics.h | 2 ++ > src/glsl/nir/nir_lower_io.c | 28 ++++++++++++++++++++-------- > src/glsl/nir/nir_print.c | 2 ++ > src/glsl/nir/nir_validate.c | 1 - > 4 files changed, 24 insertions(+), 9 deletions(-) > > diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h > index 26ac7ce..b8d7d6c 100644 > --- a/src/glsl/nir/nir_intrinsics.h > +++ b/src/glsl/nir/nir_intrinsics.h > @@ -255,6 +255,8 @@ LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | > NIR_INTRINSIC_CAN_REORDER) > LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) > LOAD(per_vertex_input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | > NIR_INTRINSIC_CAN_REORDER) > LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) > +LOAD(output, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE) > +LOAD(per_vertex_output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) > > /* > * Stores work the same way as loads, except now the first register input is > diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c > index e460160..e81524c 100644 > --- a/src/glsl/nir/nir_lower_io.c > +++ b/src/glsl/nir/nir_lower_io.c > @@ -158,6 +158,15 @@ load_op(struct lower_io_state *state, > nir_intrinsic_load_input; > } > break; > + case nir_var_shader_out: > + if (per_vertex) { > + op = has_indirect ? nir_intrinsic_load_per_vertex_output_indirect : > + nir_intrinsic_load_per_vertex_output; > + } else { > + op = has_indirect ? nir_intrinsic_load_output_indirect : > + nir_intrinsic_load_output; > + } > + break; > case nir_var_uniform: > op = has_indirect ? nir_intrinsic_load_uniform_indirect : > nir_intrinsic_load_uniform; > @@ -188,14 +197,18 @@ nir_lower_io_block(nir_block *block, void *void_state) > if (state->mode != -1 && state->mode != mode) > continue; > > + if (mode != nir_var_shader_in && > + mode != nir_var_shader_out && > + mode != nir_var_uniform) > + continue; > + > switch (intrin->intrinsic) { > case nir_intrinsic_load_var: { > - if (mode != nir_var_shader_in && mode != nir_var_uniform) > - continue; > - > - bool per_vertex = stage_uses_per_vertex_inputs(state) && > - mode == nir_var_shader_in && > - !intrin->variables[0]->var->data.patch; > + bool per_vertex = !intrin->variables[0]->var->data.patch && > + ((mode == nir_var_shader_in && > + stage_uses_per_vertex_inputs(state)) || > + (mode == nir_var_shader_out && > + stage_uses_per_vertex_outputs(state)));
With my suggested functions, this would reduce to is_per_vertex_input() || is_per_vertex_output() > > nir_ssa_def *indirect; > nir_ssa_def *vertex_index; > @@ -239,8 +252,7 @@ nir_lower_io_block(nir_block *block, void *void_state) > } > > case nir_intrinsic_store_var: { > - if (intrin->variables[0]->var->data.mode != nir_var_shader_out) > - continue; > + assert(mode == nir_var_shader_out); > > nir_ssa_def *indirect; > nir_ssa_def *vertex_index; > diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c > index 23fcafe..f7f5fdf 100644 > --- a/src/glsl/nir/nir_print.c > +++ b/src/glsl/nir/nir_print.c > @@ -448,6 +448,8 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, > print_state *state) > case nir_intrinsic_load_per_vertex_input_indirect: > var_list = &state->shader->inputs; > break; > + case nir_intrinsic_load_output: > + case nir_intrinsic_load_output_indirect: > case nir_intrinsic_store_output: > case nir_intrinsic_store_output_indirect: > case nir_intrinsic_store_per_vertex_output: > diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c > index a42e830..841bace 100644 > --- a/src/glsl/nir/nir_validate.c > +++ b/src/glsl/nir/nir_validate.c > @@ -422,7 +422,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, > validate_state *state) > assert(instr->variables[0]->var->data.mode != nir_var_shader_in && > instr->variables[0]->var->data.mode != nir_var_uniform && > instr->variables[0]->var->data.mode != nir_var_shader_storage); > - assert(instr->variables[1]->var->data.mode != nir_var_shader_out); > break; > default: > break; > -- > 2.6.2 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev