We could just skip this for radeonsi or any driver that does regalloc, because it's useless there.
Marek On Thu, Aug 27, 2015 at 5:30 AM, Dave Airlie <airl...@gmail.com> wrote: > From: Dave Airlie <airl...@redhat.com> > > The glsl->tgsi convertor does some temporary register reduction > however in profiling shader-db this shows up quite highly, > > so optimise things to reduce the number of loops through > all the instructions we do. This drops merge_registers > from 4-5% on the profile to 1%. I think this can be reduced > further by possibly optimising the renumber pass. > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 153 > +++++++++++++++-------------- > 1 file changed, 79 insertions(+), 74 deletions(-) > > diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > index 65aae40..e07db11 100644 > --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > @@ -480,10 +480,9 @@ public: > void simplify_cmp(void); > > void rename_temp_register(int index, int new_index); > - int get_first_temp_read(int index); > - int get_first_temp_write(int index); > - int get_last_temp_read(int index); > - int get_last_temp_write(int index); > + void get_first_temp_read(int *first_reads); > + void get_last_temp_read_first_temp_write(int *last_reads, int > *first_writes); > + void get_last_temp_write(int *last_writes); > > void copy_propagate(void); > int eliminate_dead_code(void); > @@ -3688,8 +3687,8 @@ glsl_to_tgsi_visitor::rename_temp_register(int index, > int new_index) > } > } > > -int > -glsl_to_tgsi_visitor::get_first_temp_read(int index) > +void > +glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads) > { > int depth = 0; /* loop depth */ > int loop_start = -1; /* index of the first active BGNLOOP (if any) */ > @@ -3697,15 +3696,15 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index) > > foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { > for (j = 0; j < num_inst_src_regs(inst); j++) { > - if (inst->src[j].file == PROGRAM_TEMPORARY && > - inst->src[j].index == index) { > - return (depth == 0) ? i : loop_start; > + if (inst->src[j].file == PROGRAM_TEMPORARY) { > + if (first_reads[inst->src[j].index] == -1) > + first_reads[inst->src[j].index] = (depth == 0) ? i : > loop_start; > } > } > for (j = 0; j < inst->tex_offset_num_offset; j++) { > - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY && > - inst->tex_offsets[j].index == index) { > - return (depth == 0) ? i : loop_start; > + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) { > + if (first_reads[inst->tex_offsets[j].index] == -1) > + first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : > loop_start; > } > } > if (inst->op == TGSI_OPCODE_BGNLOOP) { > @@ -3718,91 +3717,73 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index) > assert(depth >= 0); > i++; > } > - return -1; > } > > -int > -glsl_to_tgsi_visitor::get_first_temp_write(int index) > +void > +glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, > int *first_writes) > { > int depth = 0; /* loop depth */ > int loop_start = -1; /* index of the first active BGNLOOP (if any) */ > - int i = 0; > - unsigned j; > - > + unsigned i = 0, j; > + int k; > foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { > + for (j = 0; j < num_inst_src_regs(inst); j++) { > + if (inst->src[j].file == PROGRAM_TEMPORARY) > + last_reads[inst->src[j].index] = (depth == 0) ? i : -2; > + } > for (j = 0; j < num_inst_dst_regs(inst); j++) { > - if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == > index) { > - return (depth == 0) ? i : loop_start; > - } > + if (inst->dst[j].file == PROGRAM_TEMPORARY) > + if (first_writes[inst->dst[j].index] == -1) > + first_writes[inst->dst[j].index] = (depth == 0) ? i : > loop_start; > + } > + for (j = 0; j < inst->tex_offset_num_offset; j++) { > + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) > + last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2; > } > if (inst->op == TGSI_OPCODE_BGNLOOP) { > if(depth++ == 0) > loop_start = i; > } else if (inst->op == TGSI_OPCODE_ENDLOOP) { > - if (--depth == 0) > + if (--depth == 0) { > loop_start = -1; > - } > - assert(depth >= 0); > - i++; > - } > - return -1; > -} > - > -int > -glsl_to_tgsi_visitor::get_last_temp_read(int index) > -{ > - int depth = 0; /* loop depth */ > - int last = -1; /* index of last instruction that reads the temporary */ > - unsigned i = 0, j; > - > - foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { > - for (j = 0; j < num_inst_src_regs(inst); j++) { > - if (inst->src[j].file == PROGRAM_TEMPORARY && > - inst->src[j].index == index) { > - last = (depth == 0) ? i : -2; > + for (k = 0; k < this->next_temp; k++) { > + if (last_reads[k] == -2) { > + last_reads[k] = i; > + } > + } > } > } > - for (j = 0; j < inst->tex_offset_num_offset; j++) { > - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY && > - inst->tex_offsets[j].index == index) > - last = (depth == 0) ? i : -2; > - } > - if (inst->op == TGSI_OPCODE_BGNLOOP) > - depth++; > - else if (inst->op == TGSI_OPCODE_ENDLOOP) > - if (--depth == 0 && last == -2) > - last = i; > assert(depth >= 0); > i++; > } > - assert(last >= -1); > - return last; > } > > -int > -glsl_to_tgsi_visitor::get_last_temp_write(int index) > +void > +glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes) > { > int depth = 0; /* loop depth */ > - int last = -1; /* index of last instruction that writes to the temporary > */ > - int i = 0; > + int i = 0, k; > unsigned j; > > foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { > for (j = 0; j < num_inst_dst_regs(inst); j++) { > - if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == > index) > - last = (depth == 0) ? i : -2; > + if (inst->dst[j].file == PROGRAM_TEMPORARY) > + last_writes[inst->dst[j].index] = (depth == 0) ? i : -2; > } > > if (inst->op == TGSI_OPCODE_BGNLOOP) > depth++; > else if (inst->op == TGSI_OPCODE_ENDLOOP) > - if (--depth == 0 && last == -2) > - last = i; > + if (--depth == 0) { > + for (k = 0; k < this->next_temp; k++) { > + if (last_writes[k] == -2) { > + last_writes[k] = i; > + } > + } > + } > assert(depth >= 0); > i++; > } > - assert(last >= -1); > - return last; > } > > /* > @@ -4238,9 +4219,10 @@ glsl_to_tgsi_visitor::merge_registers(void) > * into an array so that we don't have to traverse the instruction list as > * much. */ > for (i = 0; i < this->next_temp; i++) { > - last_reads[i] = get_last_temp_read(i); > - first_writes[i] = get_first_temp_write(i); > + last_reads[i] = -1; > + first_writes[i] = -1; > } > + get_last_temp_read_first_temp_write(last_reads, first_writes); > > /* Start looking for registers with non-overlapping usages that can be > * merged together. */ > @@ -4281,15 +4263,21 @@ glsl_to_tgsi_visitor::renumber_registers(void) > { > int i = 0; > int new_index = 0; > + int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp); > + > + for (i = 0; i < this->next_temp; i++) > + first_reads[i] = -1; > + get_first_temp_read(first_reads); > > for (i = 0; i < this->next_temp; i++) { > - if (get_first_temp_read(i) < 0) continue; > + if (first_reads[i] < 0) continue; > if (i != new_index) > rename_temp_register(i, new_index); > new_index++; > } > > this->next_temp = new_index; > + ralloc_free(first_reads); > } > > /** > @@ -5764,14 +5752,31 @@ get_mesa_program(struct gl_context *ctx, > #if 0 > /* Print out some information (for debugging purposes) used by the > * optimization passes. */ > - for (i = 0; i < v->next_temp; i++) { > - int fr = v->get_first_temp_read(i); > - int fw = v->get_first_temp_write(i); > - int lr = v->get_last_temp_read(i); > - int lw = v->get_last_temp_write(i); > - > - printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); > - assert(fw <= fr); > + { > + int i; > + int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp); > + int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp); > + int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp); > + int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp); > + > + for (i = 0; i < v->next_temp; i++) { > + first_writes[i] = -1; > + first_reads[i] = -1; > + last_writes[i] = -1; > + last_reads[i] = -1; > + } > + v->get_first_temp_read(first_reads); > + v->get_last_temp_read_first_temp_write(last_reads, first_writes); > + v->get_last_temp_write(last_writes); > + for (i = 0; i < v->next_temp; i++) > + printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i], > + first_writes[i], > + last_reads[i], > + last_writes[i]); > + ralloc_free(first_writes); > + ralloc_free(first_reads); > + ralloc_free(last_writes); > + ralloc_free(last_reads); > } > #endif > > -- > 2.4.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev