I only scanned through then briefly, but they look good to me. Assuming Jenkins agrees,
Reviewed-by: Jason Ekstrand <jason.ekstr...@intel.com> Let's actually push it this time. :-) On Fri, Oct 2, 2015 at 2:52 PM, Kenneth Graunke <kenn...@whitecape.org> wrote: > Geometry and tessellation shaders process multiple vertices; their > inputs are arrays indexed by the vertex number. While GLSL makes > this look like a normal array, it can be very different behind the > scenes. > > On Intel hardware, all inputs for a particular vertex are stored > together - as if they were grouped into a single struct. This means > that consecutive elements of these top-level arrays are not contiguous. > In fact, they may sometimes be in completely disjoint memory segments. > > NIR's existing load_input intrinsics are awkward for this case, as they > distill everything down to a single offset. We'd much rather keep the > vertex ID separate, but build up an offset as normal beyond that. > > This patch introduces new nir_intrinsic_load_per_vertex_input > intrinsics to handle this case. They work like ordinary load_input > intrinsics, but have an extra source (src[0]) which represents the > outermost array index. > > v2: Rebase on earlier refactors. > v3: Use ssa defs instead of nir_srcs, rebase on earlier refactors. > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/glsl/nir/nir_intrinsics.h | 1 + > src/glsl/nir/nir_lower_io.c | 55 ++++++++++++++++++++++--- > src/glsl/nir/nir_print.c | 2 + > src/mesa/drivers/dri/i965/brw_nir.c | 13 +++++- > src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp | 58 > +++++++++++---------------- > 5 files changed, 86 insertions(+), 43 deletions(-) > > diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h > index ac4c2ba..263d8c1 100644 > --- a/src/glsl/nir/nir_intrinsics.h > +++ b/src/glsl/nir/nir_intrinsics.h > @@ -228,6 +228,7 @@ SYSTEM_VALUE(num_work_groups, 3, 0) > LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) > LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) > LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) > +LOAD(per_vertex_input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | > NIR_INTRINSIC_CAN_REORDER) > LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) > > /* > diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c > index 51a953a..a6bae94 100644 > --- a/src/glsl/nir/nir_lower_io.c > +++ b/src/glsl/nir/nir_lower_io.c > @@ -63,8 +63,20 @@ nir_assign_var_locations(struct exec_list *var_list, > unsigned *size, > *size = location; > } > > +/** > + * Returns true if we're processing a stage whose inputs are arrays indexed > + * by a vertex number (such as geometry shader inputs). > + */ > +static bool > +stage_uses_per_vertex_inputs(struct lower_io_state *state) > +{ > + gl_shader_stage stage = state->builder.shader->stage; > + return stage == MESA_SHADER_GEOMETRY; > +} > + > static unsigned > get_io_offset(nir_deref_var *deref, nir_instr *instr, > + nir_ssa_def **vertex_index, > nir_ssa_def **out_indirect, > struct lower_io_state *state) > { > @@ -75,6 +87,22 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, > b->cursor = nir_before_instr(instr); > > nir_deref *tail = &deref->deref; > + > + /* For per-vertex input arrays (i.e. geometry shader inputs), keep the > + * outermost array index separate. Process the rest normally. > + */ > + if (vertex_index != NULL) { > + tail = tail->child; > + assert(tail->deref_type == nir_deref_type_array); > + nir_deref_array *deref_array = nir_deref_as_array(tail); > + > + nir_ssa_def *vtx = nir_imm_int(b, deref_array->base_offset); > + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { > + vtx = nir_iadd(b, vtx, nir_ssa_for_src(b, deref_array->indirect, > 1)); > + } > + *vertex_index = vtx; > + } > + > while (tail->child != NULL) { > const struct glsl_type *parent_type = tail->type; > tail = tail->child; > @@ -107,13 +135,19 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, > } > > static nir_intrinsic_op > -load_op(nir_variable_mode mode, bool has_indirect) > +load_op(struct lower_io_state *state, > + nir_variable_mode mode, bool per_vertex, bool has_indirect) > { > nir_intrinsic_op op; > switch (mode) { > case nir_var_shader_in: > - op = has_indirect ? nir_intrinsic_load_input_indirect : > - nir_intrinsic_load_input; > + if (per_vertex) { > + op = has_indirect ? nir_intrinsic_load_per_vertex_input_indirect : > + nir_intrinsic_load_per_vertex_input; > + } else { > + op = has_indirect ? nir_intrinsic_load_input_indirect : > + nir_intrinsic_load_input; > + } > break; > case nir_var_uniform: > op = has_indirect ? nir_intrinsic_load_uniform_indirect : > @@ -150,14 +184,20 @@ nir_lower_io_block(nir_block *block, void *void_state) > if (mode != nir_var_shader_in && mode != nir_var_uniform) > continue; > > + bool per_vertex = stage_uses_per_vertex_inputs(state) && > + mode == nir_var_shader_in; > + > nir_ssa_def *indirect; > + nir_ssa_def *vertex_index; > > unsigned offset = get_io_offset(intrin->variables[0], > &intrin->instr, > + per_vertex ? &vertex_index : NULL, > &indirect, state); > > nir_intrinsic_instr *load = > nir_intrinsic_instr_create(state->mem_ctx, > - load_op(mode, indirect)); > + load_op(state, mode, per_vertex, > + indirect)); > load->num_components = intrin->num_components; > > unsigned location = intrin->variables[0]->var->data.driver_location; > @@ -168,8 +208,11 @@ nir_lower_io_block(nir_block *block, void *void_state) > load->const_index[0] = location + offset; > } > > + if (per_vertex) > + load->src[0] = nir_src_for_ssa(vertex_index); > + > if (indirect) > - load->src[0] = nir_src_for_ssa(indirect); > + load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(indirect); > > if (intrin->dest.is_ssa) { > nir_ssa_dest_init(&load->instr, &load->dest, > @@ -192,7 +235,7 @@ nir_lower_io_block(nir_block *block, void *void_state) > nir_ssa_def *indirect; > > unsigned offset = get_io_offset(intrin->variables[0], > &intrin->instr, > - &indirect, state); > + NULL, &indirect, state); > offset += intrin->variables[0]->var->data.driver_location; > > nir_intrinsic_op store_op; > diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c > index a19aa8b..f40c5df 100644 > --- a/src/glsl/nir/nir_print.c > +++ b/src/glsl/nir/nir_print.c > @@ -443,6 +443,8 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, > print_state *state) > break; > case nir_intrinsic_load_input: > case nir_intrinsic_load_input_indirect: > + case nir_intrinsic_load_per_vertex_input: > + case nir_intrinsic_load_per_vertex_input_indirect: > var_list = &state->shader->inputs; > break; > case nir_intrinsic_store_output: > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c > b/src/mesa/drivers/dri/i965/brw_nir.c > index 2812fd7..64763e0 100644 > --- a/src/mesa/drivers/dri/i965/brw_nir.c > +++ b/src/mesa/drivers/dri/i965/brw_nir.c > @@ -32,8 +32,17 @@ brw_nir_lower_inputs(nir_shader *nir, > const struct gl_program *prog, > bool is_scalar) > { > - nir_assign_var_locations(&nir->inputs, &nir->num_inputs, > - is_scalar ? type_size_scalar : type_size_vec4); > + switch (nir->stage) { > + case MESA_SHADER_GEOMETRY: > + foreach_list_typed(nir_variable, var, node, &nir->inputs) { > + var->data.driver_location = var->data.location; > + } > + break; > + default: > + nir_assign_var_locations(&nir->inputs, &nir->num_inputs, > + is_scalar ? type_size_scalar : > type_size_vec4); > + break; > + } > } > > static void > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp > index 64a90e5..91280de 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp > @@ -29,41 +29,6 @@ namespace brw { > void > vec4_gs_visitor::nir_setup_inputs(nir_shader *shader) > { > - nir_inputs = ralloc_array(mem_ctx, src_reg, shader->num_inputs); > - > - foreach_list_typed(nir_variable, var, node, &shader->inputs) { > - int offset = var->data.driver_location; > - if (var->type->base_type == GLSL_TYPE_ARRAY) { > - /* Geometry shader inputs are arrays, but they use an unusual array > - * layout: instead of all array elements for a given geometry shader > - * input being stored consecutively, all geometry shader inputs are > - * interleaved into one giant array. At this stage of compilation, > we > - * assume that the stride of the array is BRW_VARYING_SLOT_COUNT. > - * Later, setup_attributes() will remap our accesses to the actual > - * input array. > - */ > - assert(var->type->length > 0); > - int length = var->type->length; > - int size = type_size_vec4(var->type) / length; > - for (int i = 0; i < length; i++) { > - int location = var->data.location + i * BRW_VARYING_SLOT_COUNT; > - for (int j = 0; j < size; j++) { > - src_reg src = src_reg(ATTR, location + j, var->type); > - src = retype(src, brw_type_for_base_type(var->type)); > - nir_inputs[offset] = src; > - offset++; > - } > - } > - } else { > - int size = type_size_vec4(var->type); > - for (int i = 0; i < size; i++) { > - src_reg src = src_reg(ATTR, var->data.location + i, var->type); > - src = retype(src, brw_type_for_base_type(var->type)); > - nir_inputs[offset] = src; > - offset++; > - } > - } > - } > } > > void > @@ -96,6 +61,29 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr > *instr) > src_reg src; > > switch (instr->intrinsic) { > + case nir_intrinsic_load_per_vertex_input_indirect: > + assert(!"EmitNoIndirectInput should prevent this."); > + case nir_intrinsic_load_per_vertex_input: { > + /* The EmitNoIndirectInput flag guarantees our vertex index will > + * be constant. We should handle indirects someday. > + */ > + nir_const_value *vertex = nir_src_as_const_value(instr->src[0]); > + > + /* Make up a type...we have no way of knowing... */ > + const glsl_type *const type = glsl_type::ivec(instr->num_components); > + > + src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u[0] + > + instr->const_index[0], type); > + dest = get_nir_dest(instr->dest, src.type); > + dest.writemask = brw_writemask_for_size(instr->num_components); > + emit(MOV(dest, src)); > + break; > + } > + > + case nir_intrinsic_load_input: > + case nir_intrinsic_load_input_indirect: > + unreachable("nir_lower_io should have produced per_vertex intrinsics"); > + > case nir_intrinsic_emit_vertex_with_counter: { > this->vertex_count = > retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD); > -- > 2.5.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev