--- src/compiler/Makefile.sources | 1 + src/compiler/nir/nir.h | 8 +- src/compiler/nir/nir_clone.c | 1 + src/compiler/nir/nir_lower_scratch.c | 258 +++++++++++++++++++++++++++++++++++ 4 files changed, 267 insertions(+), 1 deletion(-) create mode 100644 src/compiler/nir/nir_lower_scratch.c
diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 6a7dcd8..2a1594d 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -216,6 +216,7 @@ NIR_FILES = \ nir/nir_lower_phis_to_scalar.c \ nir/nir_lower_returns.c \ nir/nir_lower_samplers.c \ + nir/nir_lower_scratch.c \ nir/nir_lower_system_values.c \ nir/nir_lower_tex.c \ nir/nir_lower_to_source_mods.c \ diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 9e8ed2c..61be6aa 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1822,7 +1822,7 @@ typedef struct nir_shader { * the highest index a load_input_*, load_uniform_*, etc. intrinsic can * access plus one */ - unsigned num_inputs, num_uniforms, num_outputs, num_shared; + unsigned num_inputs, num_uniforms, num_outputs, num_shared, num_scratch; /** The shader stage, such as MESA_SHADER_VERTEX. */ gl_shader_stage stage; @@ -2315,6 +2315,12 @@ void nir_lower_io_to_temporaries(nir_shader *shader, nir_function_impl *entrypoint, bool outputs, bool inputs); +bool nir_lower_vars_to_scratch(nir_shader *shader, + nir_variable_mode modes, + int size_threshold, + bool use_scalar_ops, + int (*type_size)(const struct glsl_type *)); + void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c index be89426..01314ad 100644 --- a/src/compiler/nir/nir_clone.c +++ b/src/compiler/nir/nir_clone.c @@ -719,6 +719,7 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s) ns->num_uniforms = s->num_uniforms; ns->num_outputs = s->num_outputs; ns->num_shared = s->num_shared; + ns->num_scratch = s->num_scratch; free_clone_state(&state); diff --git a/src/compiler/nir/nir_lower_scratch.c b/src/compiler/nir/nir_lower_scratch.c new file mode 100644 index 0000000..a1d5590 --- /dev/null +++ b/src/compiler/nir/nir_lower_scratch.c @@ -0,0 +1,258 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (ja...@jlekstrand.net) + * + */ + +/* + * This lowering pass converts references to variables with loads/stores to + * scratch space based on a few configurable parameters. + */ + +#include "nir.h" +#include "nir_builder.h" + +static bool +deref_has_indirect(nir_deref_var *deref) +{ + for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) { + if (tail->deref_type != nir_deref_type_array) + continue; + + nir_deref_array *arr = nir_deref_as_array(tail); + if (arr->deref_array_type == nir_deref_array_type_indirect) + return true; + } + + return false; +} + +static void +lower_load_store(nir_builder *b, + nir_intrinsic_instr *intrin, + int (*type_size)(const struct glsl_type *), + bool scalar) +{ + b->cursor = nir_before_instr(&intrin->instr); + + /* Just emit code and let constant-folding go to town */ + nir_ssa_def *offset = nir_imm_int(b, 0); + + nir_deref *tail = &intrin->variables[0]->deref; + while (tail->child != NULL) { + const struct glsl_type *parent_type = tail->type; + tail = tail->child; + + if (tail->deref_type == nir_deref_type_array) { + nir_deref_array *deref_array = nir_deref_as_array(tail); + unsigned size = type_size(tail->type); + + offset = nir_iadd(b, offset, + nir_imm_int(b, size * deref_array->base_offset)); + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + nir_ssa_def *mul = + nir_imul(b, nir_imm_int(b, size), + nir_ssa_for_src(b, deref_array->indirect, 1)); + + offset = nir_iadd(b, offset, mul); + } + } else if (tail->deref_type == nir_deref_type_struct) { + nir_deref_struct *deref_struct = nir_deref_as_struct(tail); + + unsigned field_offset = 0; + for (unsigned i = 0; i < deref_struct->index; i++) { + field_offset += type_size(glsl_get_struct_field(parent_type, i)); + } + offset = nir_iadd(b, offset, nir_imm_int(b, field_offset)); + } + } + + nir_variable *var = intrin->variables[0]->var; + const unsigned var_size = type_size(var->type); + + const unsigned bit_size = glsl_get_bit_size(tail->type); + unsigned num_ops = 1; + unsigned comps_per_op = glsl_get_vector_elements(tail->type); + unsigned comp_size; + if (scalar && comps_per_op > 1) { + num_ops = comps_per_op; + comps_per_op = 1; + comp_size = type_size(glsl_scalar_type(glsl_get_base_type(tail->type))); + assert(comp_size * num_ops == type_size(tail->type)); + } + assert(num_ops == 1 || comps_per_op == 1); + assert(num_ops * comps_per_op == glsl_get_vector_elements(tail->type)); + + if (intrin->intrinsic == nir_intrinsic_load_var) { + nir_ssa_def *defs[4]; + for (unsigned i = 0; i < num_ops; i++) { + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_scratch); + + load->num_components = comps_per_op; + + load->src[0] = + nir_src_for_ssa(nir_iadd(b, offset, nir_imm_int(b, i * comp_size))); + nir_intrinsic_set_base(load, var->data.location); + nir_intrinsic_set_range(load, var_size); + + nir_ssa_dest_init(&load->instr, &load->dest, + comps_per_op, bit_size, NULL); + defs[i] = &load->dest.ssa; + + nir_builder_instr_insert(b, &load->instr); + } + + if (num_ops > 1) + defs[0] = nir_vec(b, defs, num_ops); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(defs[0])); + } else { + assert(intrin->intrinsic == nir_intrinsic_store_var); + + unsigned op_mask, store_mask; + if (num_ops > 1) { + store_mask = 1; + op_mask = nir_intrinsic_write_mask(intrin); + } else { + store_mask = nir_intrinsic_write_mask(intrin); + op_mask = 1; + } + for (unsigned i = 0; i < num_ops; i++) { + if (!(op_mask & (1 << i))) + continue; + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_scratch); + + store->num_components = comps_per_op; + + if (scalar) { + assert(intrin->src[0].is_ssa); + store->src[0] = + nir_src_for_ssa(nir_channel(b, intrin->src[0].ssa, i)); + } else { + nir_src_copy(&store->src[0], &intrin->src[0], store); + } + store->src[1] = + nir_src_for_ssa(nir_iadd(b, offset, nir_imm_int(b, i * comp_size))); + nir_intrinsic_set_base(store, var->data.location); + nir_intrinsic_set_range(store, var_size); + nir_intrinsic_set_write_mask(store, store_mask); + + nir_builder_instr_insert(b, &store->instr); + } + } + + nir_instr_remove(&intrin->instr); +} + +bool +nir_lower_vars_to_scratch(nir_shader *shader, + nir_variable_mode modes, + int size_threshold, + bool use_scalar_ops, + int (*type_size)(const struct glsl_type *)) +{ + /* First, we walk the instructions and flag any variables we want to lower + * by removing them from their respective list and setting the mode to 0. + */ + nir_foreach_function(function, shader) { + nir_foreach_block(block, function->impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_var && + intrin->intrinsic != nir_intrinsic_store_var) + continue; + + nir_variable *var = intrin->variables[0]->var; + + /* Only lower variables with one of the requested modes. This + * also prevents the following code from executing more than once + * per variable since we set the mode to 0. + */ + if (!(modes & var->data.mode)) + continue; + + if (!deref_has_indirect(intrin->variables[0])) + continue; + + int var_size = type_size(var->type); + assert(var_size >= 0); + if (var_size < size_threshold) + continue; + + /* Remove it from its list */ + exec_node_remove(&var->node); + /* Invalid mode used to flag "moving to scratch" */ + var->data.mode = 0; + + var->data.location = shader->num_scratch; + shader->num_scratch += var_size; + } + } + } + + bool progress = false; + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + + nir_builder build; + nir_builder_init(&build, function->impl); + + bool impl_progress = false; + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_var && + intrin->intrinsic != nir_intrinsic_store_var) + continue; + + nir_variable *var = intrin->variables[0]->var; + /* Variables flagged for lowering above have mode == 0 */ + if (var->data.mode) + continue; + + lower_load_store(&build, intrin, type_size, use_scalar_ops); + impl_progress = true; + } + } + + if (impl_progress) { + progress = true; + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + return progress; +} -- 2.5.0.400.gff86faf _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev