Yes, I think it's useful too. I'm not familiar with this code, so you can add
Acked-by: Marek Olšák <marek.ol...@amd.com> and commit if there is no reviewer. Marek On Thu, Aug 27, 2015 at 9:48 PM, Dave Airlie <airl...@gmail.com> wrote: > On 27 August 2015 at 21:57, Marek Olšák <mar...@gmail.com> wrote: >> We could just skip this for radeonsi or any driver that does regalloc, >> because it's useless there. > > I did expect this comment from Ilia, though adding a CAP and > piping it through is an option I'd look into later. (or someone who > cares can do it sooner). > > it doesn't change however what this patch does, it looks useful for r600 > as is. > > Dave. > >> >> Marek >> >> On Thu, Aug 27, 2015 at 5:30 AM, Dave Airlie <airl...@gmail.com> wrote: >>> From: Dave Airlie <airl...@redhat.com> >>> >>> The glsl->tgsi convertor does some temporary register reduction >>> however in profiling shader-db this shows up quite highly, >>> >>> so optimise things to reduce the number of loops through >>> all the instructions we do. This drops merge_registers >>> from 4-5% on the profile to 1%. I think this can be reduced >>> further by possibly optimising the renumber pass. >>> >>> Signed-off-by: Dave Airlie <airl...@redhat.com> >>> --- >>> src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 153 >>> +++++++++++++++-------------- >>> 1 file changed, 79 insertions(+), 74 deletions(-) >>> >>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp >>> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp >>> index 65aae40..e07db11 100644 >>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp >>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp >>> @@ -480,10 +480,9 @@ public: >>> void simplify_cmp(void); >>> >>> void rename_temp_register(int index, int new_index); >>> - int get_first_temp_read(int index); >>> - int get_first_temp_write(int index); >>> - int get_last_temp_read(int index); >>> - int get_last_temp_write(int index); >>> + void get_first_temp_read(int *first_reads); >>> + void get_last_temp_read_first_temp_write(int *last_reads, int >>> *first_writes); >>> + void get_last_temp_write(int *last_writes); >>> >>> void copy_propagate(void); >>> int eliminate_dead_code(void); >>> @@ -3688,8 +3687,8 @@ glsl_to_tgsi_visitor::rename_temp_register(int index, >>> int new_index) >>> } >>> } >>> >>> -int >>> -glsl_to_tgsi_visitor::get_first_temp_read(int index) >>> +void >>> +glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads) >>> { >>> int depth = 0; /* loop depth */ >>> int loop_start = -1; /* index of the first active BGNLOOP (if any) */ >>> @@ -3697,15 +3696,15 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index) >>> >>> foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { >>> for (j = 0; j < num_inst_src_regs(inst); j++) { >>> - if (inst->src[j].file == PROGRAM_TEMPORARY && >>> - inst->src[j].index == index) { >>> - return (depth == 0) ? i : loop_start; >>> + if (inst->src[j].file == PROGRAM_TEMPORARY) { >>> + if (first_reads[inst->src[j].index] == -1) >>> + first_reads[inst->src[j].index] = (depth == 0) ? i : >>> loop_start; >>> } >>> } >>> for (j = 0; j < inst->tex_offset_num_offset; j++) { >>> - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY && >>> - inst->tex_offsets[j].index == index) { >>> - return (depth == 0) ? i : loop_start; >>> + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) { >>> + if (first_reads[inst->tex_offsets[j].index] == -1) >>> + first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i >>> : loop_start; >>> } >>> } >>> if (inst->op == TGSI_OPCODE_BGNLOOP) { >>> @@ -3718,91 +3717,73 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index) >>> assert(depth >= 0); >>> i++; >>> } >>> - return -1; >>> } >>> >>> -int >>> -glsl_to_tgsi_visitor::get_first_temp_write(int index) >>> +void >>> +glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, >>> int *first_writes) >>> { >>> int depth = 0; /* loop depth */ >>> int loop_start = -1; /* index of the first active BGNLOOP (if any) */ >>> - int i = 0; >>> - unsigned j; >>> - >>> + unsigned i = 0, j; >>> + int k; >>> foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { >>> + for (j = 0; j < num_inst_src_regs(inst); j++) { >>> + if (inst->src[j].file == PROGRAM_TEMPORARY) >>> + last_reads[inst->src[j].index] = (depth == 0) ? i : -2; >>> + } >>> for (j = 0; j < num_inst_dst_regs(inst); j++) { >>> - if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index >>> == index) { >>> - return (depth == 0) ? i : loop_start; >>> - } >>> + if (inst->dst[j].file == PROGRAM_TEMPORARY) >>> + if (first_writes[inst->dst[j].index] == -1) >>> + first_writes[inst->dst[j].index] = (depth == 0) ? i : >>> loop_start; >>> + } >>> + for (j = 0; j < inst->tex_offset_num_offset; j++) { >>> + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) >>> + last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2; >>> } >>> if (inst->op == TGSI_OPCODE_BGNLOOP) { >>> if(depth++ == 0) >>> loop_start = i; >>> } else if (inst->op == TGSI_OPCODE_ENDLOOP) { >>> - if (--depth == 0) >>> + if (--depth == 0) { >>> loop_start = -1; >>> - } >>> - assert(depth >= 0); >>> - i++; >>> - } >>> - return -1; >>> -} >>> - >>> -int >>> -glsl_to_tgsi_visitor::get_last_temp_read(int index) >>> -{ >>> - int depth = 0; /* loop depth */ >>> - int last = -1; /* index of last instruction that reads the temporary */ >>> - unsigned i = 0, j; >>> - >>> - foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { >>> - for (j = 0; j < num_inst_src_regs(inst); j++) { >>> - if (inst->src[j].file == PROGRAM_TEMPORARY && >>> - inst->src[j].index == index) { >>> - last = (depth == 0) ? i : -2; >>> + for (k = 0; k < this->next_temp; k++) { >>> + if (last_reads[k] == -2) { >>> + last_reads[k] = i; >>> + } >>> + } >>> } >>> } >>> - for (j = 0; j < inst->tex_offset_num_offset; j++) { >>> - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY && >>> - inst->tex_offsets[j].index == index) >>> - last = (depth == 0) ? i : -2; >>> - } >>> - if (inst->op == TGSI_OPCODE_BGNLOOP) >>> - depth++; >>> - else if (inst->op == TGSI_OPCODE_ENDLOOP) >>> - if (--depth == 0 && last == -2) >>> - last = i; >>> assert(depth >= 0); >>> i++; >>> } >>> - assert(last >= -1); >>> - return last; >>> } >>> >>> -int >>> -glsl_to_tgsi_visitor::get_last_temp_write(int index) >>> +void >>> +glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes) >>> { >>> int depth = 0; /* loop depth */ >>> - int last = -1; /* index of last instruction that writes to the >>> temporary */ >>> - int i = 0; >>> + int i = 0, k; >>> unsigned j; >>> >>> foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { >>> for (j = 0; j < num_inst_dst_regs(inst); j++) { >>> - if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index >>> == index) >>> - last = (depth == 0) ? i : -2; >>> + if (inst->dst[j].file == PROGRAM_TEMPORARY) >>> + last_writes[inst->dst[j].index] = (depth == 0) ? i : -2; >>> } >>> >>> if (inst->op == TGSI_OPCODE_BGNLOOP) >>> depth++; >>> else if (inst->op == TGSI_OPCODE_ENDLOOP) >>> - if (--depth == 0 && last == -2) >>> - last = i; >>> + if (--depth == 0) { >>> + for (k = 0; k < this->next_temp; k++) { >>> + if (last_writes[k] == -2) { >>> + last_writes[k] = i; >>> + } >>> + } >>> + } >>> assert(depth >= 0); >>> i++; >>> } >>> - assert(last >= -1); >>> - return last; >>> } >>> >>> /* >>> @@ -4238,9 +4219,10 @@ glsl_to_tgsi_visitor::merge_registers(void) >>> * into an array so that we don't have to traverse the instruction list >>> as >>> * much. */ >>> for (i = 0; i < this->next_temp; i++) { >>> - last_reads[i] = get_last_temp_read(i); >>> - first_writes[i] = get_first_temp_write(i); >>> + last_reads[i] = -1; >>> + first_writes[i] = -1; >>> } >>> + get_last_temp_read_first_temp_write(last_reads, first_writes); >>> >>> /* Start looking for registers with non-overlapping usages that can be >>> * merged together. */ >>> @@ -4281,15 +4263,21 @@ glsl_to_tgsi_visitor::renumber_registers(void) >>> { >>> int i = 0; >>> int new_index = 0; >>> + int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp); >>> + >>> + for (i = 0; i < this->next_temp; i++) >>> + first_reads[i] = -1; >>> + get_first_temp_read(first_reads); >>> >>> for (i = 0; i < this->next_temp; i++) { >>> - if (get_first_temp_read(i) < 0) continue; >>> + if (first_reads[i] < 0) continue; >>> if (i != new_index) >>> rename_temp_register(i, new_index); >>> new_index++; >>> } >>> >>> this->next_temp = new_index; >>> + ralloc_free(first_reads); >>> } >>> >>> /** >>> @@ -5764,14 +5752,31 @@ get_mesa_program(struct gl_context *ctx, >>> #if 0 >>> /* Print out some information (for debugging purposes) used by the >>> * optimization passes. */ >>> - for (i = 0; i < v->next_temp; i++) { >>> - int fr = v->get_first_temp_read(i); >>> - int fw = v->get_first_temp_write(i); >>> - int lr = v->get_last_temp_read(i); >>> - int lw = v->get_last_temp_write(i); >>> - >>> - printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); >>> - assert(fw <= fr); >>> + { >>> + int i; >>> + int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp); >>> + int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp); >>> + int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp); >>> + int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp); >>> + >>> + for (i = 0; i < v->next_temp; i++) { >>> + first_writes[i] = -1; >>> + first_reads[i] = -1; >>> + last_writes[i] = -1; >>> + last_reads[i] = -1; >>> + } >>> + v->get_first_temp_read(first_reads); >>> + v->get_last_temp_read_first_temp_write(last_reads, first_writes); >>> + v->get_last_temp_write(last_writes); >>> + for (i = 0; i < v->next_temp; i++) >>> + printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, >>> first_reads[i], >>> + first_writes[i], >>> + last_reads[i], >>> + last_writes[i]); >>> + ralloc_free(first_writes); >>> + ralloc_free(first_reads); >>> + ralloc_free(last_writes); >>> + ralloc_free(last_reads); >>> } >>> #endif >>> >>> -- >>> 2.4.3 >>> >>> _______________________________________________ >>> mesa-dev mailing list >>> mesa-dev@lists.freedesktop.org >>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev