On Mon, Nov 14, 2016 at 5:41 PM, Kenneth Graunke <kenn...@whitecape.org> wrote:
> Certain built-in arrays, such as gl_ClipDistance[], gl_CullDistance[], > gl_TessLevelInner[], and gl_TessLevelOuter[] are specified as scalar > arrays. Normal scalar arrays are sparse - each array element usually > occupies a whole vec4 slot. However, most hardware assumes these > built-in arrays are tightly packed. > > The new var->data.compact flag indicates that a scalar array should > be tightly packed, so a float[4] array would take up a single vec4 > slot, and a float[8] array would take up two slots. > > They are still arrays, not vec4s, however. nir_lower_io will generate > intrinsics using ARB_enhanced_layouts style component qualifiers. > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/compiler/glsl/glsl_to_nir.cpp | 1 + > src/compiler/nir/nir.h | 7 +++++ > src/compiler/nir/nir_gather_info.c | 9 ++++-- > src/compiler/nir/nir_lower_indirect_derefs.c | 8 +++-- > src/compiler/nir/nir_lower_io.c | 44 > ++++++++++++++++++++-------- > src/compiler/nir/nir_print.c | 4 ++- > 6 files changed, 55 insertions(+), 18 deletions(-) > > diff --git a/src/compiler/glsl/glsl_to_nir.cpp > b/src/compiler/glsl/glsl_to_nir.cpp > index 6ca760b..ed1c739 100644 > --- a/src/compiler/glsl/glsl_to_nir.cpp > +++ b/src/compiler/glsl/glsl_to_nir.cpp > @@ -331,6 +331,7 @@ nir_visitor::visit(ir_variable *ir) > var->data.explicit_index = ir->data.explicit_index; > var->data.explicit_binding = ir->data.explicit_binding; > var->data.has_initializer = ir->data.has_initializer; > + var->data.compact = false; > var->data.location_frac = ir->data.location_frac; > > switch (ir->data.depth_layout) { > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > index 3d46384..0b78242 100644 > --- a/src/compiler/nir/nir.h > +++ b/src/compiler/nir/nir.h > @@ -230,6 +230,13 @@ typedef struct nir_variable { > unsigned location_frac:2; > > /** > + * If true, this variable represents an array of scalars that should > + * be tightly packed. In other words, consecutive array elements > + * should be stored one component apart, rather than one slot apart. > + */ > + unsigned compact:1; > Should this 1-bit integer be a bool? > + > + /** > * Whether this is a fragment shader output implicitly initialized > with > * the previous contents of the specified render target at the > * framebuffer location corresponding to this shader invocation. > diff --git a/src/compiler/nir/nir_gather_info.c > b/src/compiler/nir/nir_gather_info.c > index 63c8a42..4d07dda 100644 > --- a/src/compiler/nir/nir_gather_info.c > +++ b/src/compiler/nir/nir_gather_info.c > @@ -94,8 +94,11 @@ mark_whole_variable(nir_shader *shader, nir_variable > *var) > var->data.mode == nir_var_shader_in) > is_vertex_input = true; > > - set_io_mask(shader, var, 0, > - glsl_count_attribute_slots(type, is_vertex_input)); > + const unsigned slots = > + var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4) > + : glsl_count_attribute_slots(type, > is_vertex_input); > By using glsl_get_length(), you're assuming that all compact things are 1-D arrays with no structs. Is that your intention? If so, we should probably assert so that we catch it if we ever change this in the future. > + > + set_io_mask(shader, var, 0, slots); > } > > static unsigned > @@ -150,7 +153,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var > *deref) > * here marking the entire variable as used. > */ > if (!(glsl_type_is_matrix(type) || > - (glsl_type_is_array(type) && > + (glsl_type_is_array(type) && !var->data.compact && > (glsl_type_is_numeric(glsl_without_array(type)) || > glsl_type_is_boolean(glsl_without_array(type)))))) { > > diff --git a/src/compiler/nir/nir_lower_indirect_derefs.c > b/src/compiler/nir/nir_lower_indirect_derefs.c > index 356373e..5c97dc8e 100644 > --- a/src/compiler/nir/nir_lower_indirect_derefs.c > +++ b/src/compiler/nir/nir_lower_indirect_derefs.c > @@ -175,8 +175,12 @@ lower_indirect_block(nir_block *block, nir_builder *b, > if (!deref_has_indirect(intrin->variables[0])) > continue; > > - /* Only lower variables whose mode is in the mask */ > - if (!(modes & intrin->variables[0]->var->data.mode)) > + /* Only lower variables whose mode is in the mask, or compact > + * array variables. (We can't handle indirects on tightly packed > + * scalar arrays, so we need to lower them regardless.) > + */ > + if (!(modes & intrin->variables[0]->var->data.mode) && > + !intrin->variables[0]->var->data.compact) > continue; > > b->cursor = nir_before_instr(&intrin->instr); > diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_ > io.c > index a7e7f14..6628947 100644 > --- a/src/compiler/nir/nir_lower_io.c > +++ b/src/compiler/nir/nir_lower_io.c > @@ -88,7 +88,8 @@ nir_is_per_vertex_io(nir_variable *var, gl_shader_stage > stage) > static nir_ssa_def * > get_io_offset(nir_builder *b, nir_deref_var *deref, > nir_ssa_def **vertex_index, > - int (*type_size)(const struct glsl_type *)) > + int (*type_size)(const struct glsl_type *), > + unsigned *component) > { > nir_deref *tail = &deref->deref; > > @@ -106,6 +107,19 @@ get_io_offset(nir_builder *b, nir_deref_var *deref, > *vertex_index = vtx; > } > > + if (deref->var->data.compact) { > + assert(tail->child->deref_type == nir_deref_type_array); > + assert(glsl_type_is_scalar(glsl_without_array(deref->var->type))); > + nir_deref_array *deref_array = nir_deref_as_array(tail->child); > + /* We always lower indirect dereferences for "compact" array vars. > */ > + assert(deref_array->deref_array_type == > nir_deref_array_type_direct); > + > + const unsigned total_offset = *component + deref_array->base_offset; > + const unsigned slot_offset = total_offset / 4; > + *component = total_offset % 4; > + return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset); > + } > + > /* Just emit code and let constant-folding go to town */ > nir_ssa_def *offset = nir_imm_int(b, 0); > > @@ -143,7 +157,8 @@ get_io_offset(nir_builder *b, nir_deref_var *deref, > > static nir_intrinsic_instr * > lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, > - nir_ssa_def *vertex_index, nir_ssa_def *offset) > + nir_ssa_def *vertex_index, nir_ssa_def *offset, > + unsigned component) > { > const nir_shader *nir = state->builder.shader; > nir_variable *var = intrin->variables[0]->var; > @@ -194,7 +209,7 @@ lower_load(nir_intrinsic_instr *intrin, struct > lower_io_state *state, > > nir_intrinsic_set_base(load, var->data.driver_location); > if (mode == nir_var_shader_in || mode == nir_var_shader_out) > - nir_intrinsic_set_component(load, var->data.location_frac); > + nir_intrinsic_set_component(load, component); > > if (load->intrinsic == nir_intrinsic_load_uniform) > nir_intrinsic_set_range(load, state->type_size(var->type)); > @@ -214,7 +229,8 @@ lower_load(nir_intrinsic_instr *intrin, struct > lower_io_state *state, > > static nir_intrinsic_instr * > lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state, > - nir_ssa_def *vertex_index, nir_ssa_def *offset) > + nir_ssa_def *vertex_index, nir_ssa_def *offset, > + unsigned component) > { > nir_variable *var = intrin->variables[0]->var; > nir_variable_mode mode = var->data.mode; > @@ -236,7 +252,7 @@ lower_store(nir_intrinsic_instr *intrin, struct > lower_io_state *state, > nir_intrinsic_set_base(store, var->data.driver_location); > > if (mode == nir_var_shader_out) > - nir_intrinsic_set_component(store, var->data.location_frac); > + nir_intrinsic_set_component(store, component); > > nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin)); > > @@ -289,7 +305,7 @@ lower_atomic(nir_intrinsic_instr *intrin, struct > lower_io_state *state, > > static nir_intrinsic_instr * > lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state > *state, > - nir_ssa_def *offset) > + nir_ssa_def *offset, unsigned component) > { > nir_variable *var = intrin->variables[0]->var; > > @@ -297,7 +313,7 @@ lower_interpolate_at(nir_intrinsic_instr *intrin, > struct lower_io_state *state, > > /* Ignore interpolateAt() for flat variables - flat is flat. */ > if (var->data.interpolation == INTERP_MODE_FLAT) > - return lower_load(intrin, state, NULL, offset); > + return lower_load(intrin, state, NULL, offset, component); > > nir_intrinsic_op bary_op; > switch (intrin->intrinsic) { > @@ -333,7 +349,7 @@ lower_interpolate_at(nir_intrinsic_instr *intrin, > struct lower_io_state *state, > load->num_components = intrin->num_components; > > nir_intrinsic_set_base(load, var->data.driver_location); > - nir_intrinsic_set_component(load, var->data.location_frac); > + nir_intrinsic_set_component(load, component); > > load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa); > load->src[1] = nir_src_for_ssa(offset); > @@ -398,20 +414,23 @@ nir_lower_io_block(nir_block *block, > > nir_ssa_def *offset; > nir_ssa_def *vertex_index = NULL; > + unsigned component_offset = var->data.location_frac; > > offset = get_io_offset(b, intrin->variables[0], > per_vertex ? &vertex_index : NULL, > - state->type_size); > + state->type_size, &component_offset); > > nir_intrinsic_instr *replacement; > > switch (intrin->intrinsic) { > case nir_intrinsic_load_var: > - replacement = lower_load(intrin, state, vertex_index, offset); > + replacement = lower_load(intrin, state, vertex_index, offset, > + component_offset); > break; > > case nir_intrinsic_store_var: > - replacement = lower_store(intrin, state, vertex_index, offset); > + replacement = lower_store(intrin, state, vertex_index, offset, > + component_offset); > break; > > case nir_intrinsic_var_atomic_add: > @@ -432,7 +451,8 @@ nir_lower_io_block(nir_block *block, > case nir_intrinsic_interp_var_at_sample: > case nir_intrinsic_interp_var_at_offset: > assert(vertex_index == NULL); > - replacement = lower_interpolate_at(intrin, state, offset); > + replacement = lower_interpolate_at(intrin, state, offset, > + component_offset); > break; > > default: > diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c > index 242bffb..475e3f2 100644 > --- a/src/compiler/nir/nir_print.c > +++ b/src/compiler/nir/nir_print.c > @@ -432,9 +432,11 @@ print_var_decl(nir_variable *var, print_state *state) > loc = buf; > } > > - fprintf(fp, " (%s, %u)", loc, var->data.driver_location); > + fprintf(fp, " (%s, %u)%s", loc, var->data.driver_location, > + var->data.compact ? " compact" : ""); > } > > + > Stray newline > if (var->constant_initializer) { > fprintf(fp, " = { "); > print_constant(var->constant_initializer, var->type, state); > -- > 2.10.2 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev