Am Dienstag, den 01.05.2018, 11:57 +0200 schrieb Nicolai Hähnle: > So the GLSL transforms don't already do this? Interesting... anyway, > seems a nice improvement, When I first sent this patch stand-alone there were some comments about this:
https://patchwork.freedesktop.org/patch/189842/[¹ BTW: as you might have seen Benedikt Schemmer did quite some testing of the series enabling register-merge on radeonsi. One thing that came up was failing piglits with bindless textures. From what I currently see all drivers that supports bindless textures actually skip register_merge and my assumption was that I should enable this code with the same restrictions - mainly because I assume that the LLVM back-end takes care of these things anyway. But of course I would like to make this code correct with respect to this feature too, so I was wondering whether it is sufficient to apply the final register renaming that stems from this code to glsl_to_tgsi_instruction::resource or whether there is something more I should take care of? (I don't have the hardware to test this). many thanks, Gert > > On 28.04.2018 21:30, Gert Wollny wrote: > > Array who's elements are only accessed directly are replaced by the > > according number of temporary registers. By doing so the otherwise > > reserved register range becomes subject to further optimizations > > like > > copy propagation and register merging. > > > > Thanks to the resulting reduced register pressure this patch makes > > the piglits > > > > spec/glsl-1.50/execution - > > variable-indexing/vs-output-array-vec3-index-wr-before-gs > > geometry/max-input-components > > > > pass on r600 (barts) where they would fail before with a "GPR limit > > exceeded" > > error. > > > > v3: * enable this optimization only if the driver requests register > > merge > > > > v2: * rename method dissolve_arrays to split_arrays > > * unify the tracking and remapping methods for src and st > > registers > > * also track access to arrays via reladdr* > > > > Signed-off-by: Gert Wollny <gw.foss...@gmail.com> > > --- > > src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 117 > > ++++++++++++++++++++++++++++- > > 1 file changed, 116 insertions(+), 1 deletion(-) > > > > diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > > b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > > index 1614367a01..e778807b7c 100644 > > --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > > +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > > @@ -369,6 +369,7 @@ public: > > void copy_propagate(void); > > int eliminate_dead_code(void); > > > > + void split_arrays(void); > > void merge_two_dsts(void); > > void merge_registers(void); > > void renumber_registers(void); > > @@ -5400,6 +5401,110 @@ glsl_to_tgsi_visitor::merge_two_dsts(void) > > } > > } > > > > + > > + > > +/* One-dimensional arrays who's elements are only accessed > > directly are > > *whose > > Also, the placement of this comment is odd, floating like this in > empty > space. It looks like it should be a comment on split_arrays. > > > > + * replaced by an according set of temporary registers that then > > can become > > + * subject to further optimization steps like copy propagation and > > + * register merging. > > + */ > > + > > +template <typename st_reg> > > +void test_indirect_access(const st_reg& reg, bool > > *has_indirect_access) > > +{ > > + if (reg.file == PROGRAM_ARRAY) { > > + if (reg.reladdr || reg.reladdr2 || reg.has_index2) { > > + has_indirect_access[reg.array_id] = true; > > + if (reg.reladdr) > > + test_indirect_access(*reg.reladdr, > > has_indirect_access); > > + if (reg.reladdr2) > > + test_indirect_access(*reg.reladdr, > > has_indirect_access); > > + } > > + } > > +} > > + > > +template <typename st_reg> > > +void remap_array(st_reg& reg, const int *array_remap_info, > > + const bool *has_indirect_access) > > +{ > > + if (reg.file == PROGRAM_ARRAY) { > > + if (!has_indirect_access[reg.array_id]) { > > + reg.file = PROGRAM_TEMPORARY; > > + reg.index = reg.index + array_remap_info[reg.array_id]; > > + reg.array_id = 0; > > + } else { > > + reg.array_id = array_remap_info[reg.array_id]; > > + } > > + > > + if (reg.reladdr) > > + remap_array(*reg.reladdr, array_remap_info, > > has_indirect_access); > > + > > + if (reg.reladdr2) > > + remap_array(*reg.reladdr2, array_remap_info, > > has_indirect_access); > > + } > > +} > > + > > +void > > +glsl_to_tgsi_visitor::split_arrays(void) > > +{ > > + if (!next_array) > > + return; > > + > > + bool *has_indirect_access = rzalloc_array(mem_ctx, bool, > > next_array + 1); > > + > > + foreach_in_list(glsl_to_tgsi_instruction, inst, &this- > > >instructions) { > > + for (unsigned j = 0; j < num_inst_src_regs(inst); j++) > > + test_indirect_access(inst->src[j], has_indirect_access); > > + > > + for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) > > + test_indirect_access(inst->tex_offsets[j], > > has_indirect_access); > > + > > + for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) > > + test_indirect_access(inst->dst[j], has_indirect_access); > > + } > > + > > + unsigned array_offset = 0; > > + unsigned n_remaining_arrays = 0; > > + > > + /* Double use: For arrays that get disolved this value will > > contain > > *dissolved > > > > + * the base index of the temporary registers this array is > > replaced > > + * with. For arrays that remain it contains the new array ID. > > + */ > > + int *array_remap_info = rzalloc_array(has_indirect_access, int, > > + next_array + 1); > > + > > + for (unsigned i = 1; i <= next_array; ++i) { > > + if (!has_indirect_access[i]) { > > + array_remap_info[i] = this->next_temp + array_offset; > > + array_offset += array_sizes[i-1]; > > Spaces around operators are generally preferred. > > > > + } else { > > + array_sizes[n_remaining_arrays] = array_sizes[i-1]; > > + array_remap_info[i] = ++n_remaining_arrays; > > + } > > + } > > + > > + if (next_array != n_remaining_arrays) { > > + > > Excessive whitespace: both the empty line and after the !=. Also > twice > below. > > Cheers, > Nicolai > > > > + foreach_in_list(glsl_to_tgsi_instruction, inst, &this- > > >instructions) { > > + > > + for (unsigned j = 0; j < num_inst_src_regs(inst); j++) > > + remap_array(inst->src[j], array_remap_info, > > has_indirect_access); > > + > > + for (unsigned j = 0; j < inst->tex_offset_num_offset; > > j++) > > + remap_array(inst->tex_offsets[j], array_remap_info, > > has_indirect_access); > > + > > + for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) { > > + remap_array(inst->dst[j], array_remap_info, > > has_indirect_access); > > + } > > + } > > + } > > + > > + ralloc_free(has_indirect_access); > > + > > + this->next_temp += array_offset; > > + next_array = n_remaining_arrays; > > +} > > + > > /* Merges temporary registers together where possible to reduce > > the number of > > * registers needed to run a program. > > * > > @@ -6892,8 +6997,18 @@ get_mesa_program_tgsi(struct gl_context > > *ctx, > > while (v->eliminate_dead_code()); > > > > v->merge_two_dsts(); > > - if (!skip_merge_registers) > > + > > + if (!skip_merge_registers) { > > + > > + v->split_arrays(); > > + v->copy_propagate(); > > + while (v->eliminate_dead_code()); > > + > > v->merge_registers(); > > + v->copy_propagate(); > > + while (v->eliminate_dead_code()); > > + } > > + > > v->renumber_registers(); > > > > /* Write the END instruction. */ > > > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev