Array who's elements are only accessed directly are replaced by the according number of temporary registers. By doing so the otherwise reserved register range becomes subject to further optimizations like copy propagation and register merging.
Thanks to the resulting reduced register pressure this patch makes the piglits spec/glsl-1.50/execution - variable-indexing/vs-output-array-vec3-index-wr-before-gs geometry/max-input-components pass on r600 (barts) where they would fail before with a "GPR limit exceeded" error. Signed-off-by: Gert Wollny <gw.foss...@gmail.com> --- No further changes are observed with the piglit shader subset. PS: Submitter has no write access to mesa-git src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 109 ++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 0772b73627..8fb76a9965 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -337,6 +337,7 @@ public: void copy_propagate(void); int eliminate_dead_code(void); + void dissolve_arrays(void); void merge_two_dsts(void); void merge_registers(void); void renumber_registers(void); @@ -5279,6 +5280,110 @@ glsl_to_tgsi_visitor::merge_two_dsts(void) } } + + +/* One-dimensional arrays who's elements are only accessed directly are + * replaced by an according set of temporary registers that then can become + * subject to further optimization steps like copy propagation and + * register merging. + */ +void +glsl_to_tgsi_visitor::dissolve_arrays(void) +{ + if (!next_array) + return; + + bool *has_indirect_access = rzalloc_array(mem_ctx, bool, next_array + 1); + + foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { + for (unsigned j = 0; j < num_inst_src_regs(inst); j++) { + const st_src_reg& src = inst->src[j]; + if (src.file == PROGRAM_ARRAY && + (src.reladdr || src.reladdr2 || src.has_index2)) + has_indirect_access[src.array_id] = true; + } + for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) { + const st_src_reg& src = inst->tex_offsets[j]; + if (src.file == PROGRAM_ARRAY && + (src.reladdr || src.reladdr2 || src.has_index2)) + has_indirect_access[src.array_id] = true; + } + for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) { + const st_dst_reg& dst = inst->dst[j]; + if (dst.file == PROGRAM_ARRAY && + (dst.reladdr || dst.reladdr2 || dst.has_index2)) + has_indirect_access[dst.array_id] = true; + } + } + + unsigned array_offset = 0; + unsigned n_remaining_arrays = 0; + + /* Double use: For arrays that get disolved this value will contain + * the base index of the temporary registers this array is replaced + * with. For arrays that remain it contains the new array ID. + */ + int *array_remap_info = rzalloc_array(has_indirect_access, int, + next_array + 1); + + for (unsigned i = 1; i <= next_array; ++i) { + if (!has_indirect_access[i]) { + array_remap_info[i] = this->next_temp + array_offset; + array_offset += array_sizes[i-1]; + } else { + array_sizes[n_remaining_arrays] = array_sizes[i-1]; + array_remap_info[i] = ++n_remaining_arrays; + } + } + + if (next_array != n_remaining_arrays) { + + foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { + for (unsigned j = 0; j < num_inst_src_regs(inst); j++) { + st_src_reg& src = inst->src[j]; + if (src.file == PROGRAM_ARRAY) { + if (!has_indirect_access[src.array_id]) { + src.file = PROGRAM_TEMPORARY; + src.index = src.index + array_remap_info[src.array_id]; + src.array_id = 0; + } else { + src.array_id = array_remap_info[src.array_id]; + } + } + } + for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) { + st_src_reg& src = inst->tex_offsets[j]; + if (src.file == PROGRAM_ARRAY) { + if (!has_indirect_access[src.array_id]) { + src.file = PROGRAM_TEMPORARY; + src.index = src.index + array_remap_info[src.array_id]; + src.array_id = 0; + } else { + src.array_id = array_remap_info[src.array_id]; + } + } + } + for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) { + st_dst_reg& dst = inst->dst[j]; + if (dst.file == PROGRAM_ARRAY) { + if (!has_indirect_access[dst.array_id]) { + dst.file = PROGRAM_TEMPORARY; + dst.index = dst.index + array_remap_info[dst.array_id]; + dst.array_id = 0; + } else { + dst.array_id = array_remap_info[dst.array_id]; + } + } + } + } + } + + ralloc_free(has_indirect_access); + + this->next_temp += array_offset; + next_array = n_remaining_arrays; +} + /* Merges temporary registers together where possible to reduce the number of * registers needed to run a program. * @@ -6733,6 +6838,8 @@ get_mesa_program_tgsi(struct gl_context *ctx, } #endif + v->dissolve_arrays(); + /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ v->simplify_cmp(); v->copy_propagate(); -- 2.13.6 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev