So.... what do you do when someone goes to shadertoy.com which on average uses 1000 temps?
On Wed, Mar 25, 2015 at 3:21 PM, Brian Paul <bri...@vmware.com> wrote: > The problem is, our binary shader interface only supports 32 temps at this > time. We sometimes bump into that limit as-is. > > -Brian > > > On 03/25/2015 01:04 PM, Ilia Mirkin wrote: >> >> Yes, more temp registers and more instructions. But presumably the >> backend has an optimization pass that is at least as good as this one >> (hopefully better!). Is that not the case for vmware? >> >> On Wed, Mar 25, 2015 at 2:59 PM, Brian Paul <bri...@vmware.com> wrote: >>> >>> Will removing this pass have much effect on the number of temp regs used? >>> It looks like more instructions may be emitted w/out this pass. >>> >>> We're kind of sensitive to that in the VMware driver. >>> >>> -Brian >>> >>> On 03/25/2015 12:16 PM, Marek Olšák wrote: >>>> >>>> >>>> Reviewed-by: Marek Olšák <marek.ol...@amd.com> >>>> >>>> I might need to wait for other people's opinion too. >>>> >>>> Marek >>>> >>>> On Wed, Mar 25, 2015 at 6:34 PM, Ilia Mirkin <imir...@alum.mit.edu> >>>> wrote: >>>>> >>>>> >>>>> It's buggy and unnecessary in the presence of optimizing backends. The >>>>> only backend that will suffer is nv30, but... meh. >>>>> >>>>> Bugzilla: >>>>> >>>>> https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D89759&d=AwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8&m=hW65RavQ_Xuvw96f61daCkas_SjeEudtADNX3BzgNQU&s=zjWC0LOuYp8NH6K072ITDgPYCCE0F_a_LCdd9zrdrhA&e= >>>>> >>>>> Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> >>>>> --- >>>>> src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 199 >>>>> ----------------------------- >>>>> 1 file changed, 199 deletions(-) >>>>> >>>>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp >>>>> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp >>>>> index efee4b2..0402ce3 100644 >>>>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp >>>>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp >>>>> @@ -461,7 +461,6 @@ public: >>>>> int get_last_temp_read(int index); >>>>> int get_last_temp_write(int index); >>>>> >>>>> - void copy_propagate(void); >>>>> int eliminate_dead_code(void); >>>>> >>>>> void merge_two_dsts(void); >>>>> @@ -3757,203 +3756,6 @@ glsl_to_tgsi_visitor::get_last_temp_write(int >>>>> index) >>>>> } >>>>> >>>>> /* >>>>> - * On a basic block basis, tracks available PROGRAM_TEMPORARY register >>>>> - * channels for copy propagation and updates following instructions to >>>>> - * use the original versions. >>>>> - * >>>>> - * The glsl_to_tgsi_visitor lazily produces code assuming that this >>>>> pass >>>>> - * will occur. As an example, a TXP production before this pass: >>>>> - * >>>>> - * 0: MOV TEMP[1], INPUT[4].xyyy; >>>>> - * 1: MOV TEMP[1].w, INPUT[4].wwww; >>>>> - * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; >>>>> - * >>>>> - * and after: >>>>> - * >>>>> - * 0: MOV TEMP[1], INPUT[4].xyyy; >>>>> - * 1: MOV TEMP[1].w, INPUT[4].wwww; >>>>> - * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; >>>>> - * >>>>> - * which allows for dead code elimination on TEMP[1]'s writes. >>>>> - */ >>>>> -void >>>>> -glsl_to_tgsi_visitor::copy_propagate(void) >>>>> -{ >>>>> - glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, >>>>> - >>>>> glsl_to_tgsi_instruction *, >>>>> - this->next_temp * >>>>> 4); >>>>> - int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); >>>>> - int level = 0; >>>>> - >>>>> - foreach_in_list(glsl_to_tgsi_instruction, inst, >>>>> &this->instructions) >>>>> { >>>>> - assert(inst->dst[0].file != PROGRAM_TEMPORARY >>>>> - || inst->dst[0].index < this->next_temp); >>>>> - >>>>> - /* First, do any copy propagation possible into the src regs. */ >>>>> - for (int r = 0; r < 3; r++) { >>>>> - glsl_to_tgsi_instruction *first = NULL; >>>>> - bool good = true; >>>>> - int acp_base = inst->src[r].index * 4; >>>>> - >>>>> - if (inst->src[r].file != PROGRAM_TEMPORARY || >>>>> - inst->src[r].reladdr || >>>>> - inst->src[r].reladdr2) >>>>> - continue; >>>>> - >>>>> - /* See if we can find entries in the ACP consisting of MOVs >>>>> - * from the same src register for all the swizzled channels >>>>> - * of this src register reference. >>>>> - */ >>>>> - for (int i = 0; i < 4; i++) { >>>>> - int src_chan = GET_SWZ(inst->src[r].swizzle, i); >>>>> - glsl_to_tgsi_instruction *copy_chan = acp[acp_base + >>>>> src_chan]; >>>>> - >>>>> - if (!copy_chan) { >>>>> - good = false; >>>>> - break; >>>>> - } >>>>> - >>>>> - assert(acp_level[acp_base + src_chan] <= level); >>>>> - >>>>> - if (!first) { >>>>> - first = copy_chan; >>>>> - } else { >>>>> - if (first->src[0].file != copy_chan->src[0].file || >>>>> - first->src[0].index != copy_chan->src[0].index || >>>>> - first->src[0].index2D != copy_chan->src[0].index2D) >>>>> { >>>>> - good = false; >>>>> - break; >>>>> - } >>>>> - } >>>>> - } >>>>> - >>>>> - if (good) { >>>>> - /* We've now validated that we can copy-propagate to >>>>> - * replace this src register reference. Do it. >>>>> - */ >>>>> - inst->src[r].file = first->src[0].file; >>>>> - inst->src[r].index = first->src[0].index; >>>>> - inst->src[r].index2D = first->src[0].index2D; >>>>> - inst->src[r].has_index2 = first->src[0].has_index2; >>>>> - >>>>> - int swizzle = 0; >>>>> - for (int i = 0; i < 4; i++) { >>>>> - int src_chan = GET_SWZ(inst->src[r].swizzle, i); >>>>> - glsl_to_tgsi_instruction *copy_inst = acp[acp_base + >>>>> src_chan]; >>>>> - swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, >>>>> src_chan) >>>>> << (3 * i)); >>>>> - } >>>>> - inst->src[r].swizzle = swizzle; >>>>> - } >>>>> - } >>>>> - >>>>> - switch (inst->op) { >>>>> - case TGSI_OPCODE_BGNLOOP: >>>>> - case TGSI_OPCODE_ENDLOOP: >>>>> - /* End of a basic block, clear the ACP entirely. */ >>>>> - memset(acp, 0, sizeof(*acp) * this->next_temp * 4); >>>>> - break; >>>>> - >>>>> - case TGSI_OPCODE_IF: >>>>> - case TGSI_OPCODE_UIF: >>>>> - ++level; >>>>> - break; >>>>> - >>>>> - case TGSI_OPCODE_ENDIF: >>>>> - case TGSI_OPCODE_ELSE: >>>>> - /* Clear all channels written inside the block from the ACP, >>>>> but >>>>> - * leaving those that were not touched. >>>>> - */ >>>>> - for (int r = 0; r < this->next_temp; r++) { >>>>> - for (int c = 0; c < 4; c++) { >>>>> - if (!acp[4 * r + c]) >>>>> - continue; >>>>> - >>>>> - if (acp_level[4 * r + c] >= level) >>>>> - acp[4 * r + c] = NULL; >>>>> - } >>>>> - } >>>>> - if (inst->op == TGSI_OPCODE_ENDIF) >>>>> - --level; >>>>> - break; >>>>> - >>>>> - default: >>>>> - /* Continuing the block, clear any written channels from >>>>> - * the ACP. >>>>> - */ >>>>> - for (int d = 0; d < 2; d++) { >>>>> - if (inst->dst[d].file == PROGRAM_TEMPORARY && >>>>> inst->dst[d].reladdr) { >>>>> - /* Any temporary might be written, so no copy >>>>> propagation >>>>> - * across this instruction. >>>>> - */ >>>>> - memset(acp, 0, sizeof(*acp) * this->next_temp * 4); >>>>> - } else if (inst->dst[d].file == PROGRAM_OUTPUT && >>>>> - inst->dst[d].reladdr) { >>>>> - /* Any output might be written, so no copy propagation >>>>> - * from outputs across this instruction. >>>>> - */ >>>>> - for (int r = 0; r < this->next_temp; r++) { >>>>> - for (int c = 0; c < 4; c++) { >>>>> - if (!acp[4 * r + c]) >>>>> - continue; >>>>> - >>>>> - if (acp[4 * r + c]->src[0].file == >>>>> PROGRAM_OUTPUT) >>>>> - acp[4 * r + c] = NULL; >>>>> - } >>>>> - } >>>>> - } else if (inst->dst[d].file == PROGRAM_TEMPORARY || >>>>> - inst->dst[d].file == PROGRAM_OUTPUT) { >>>>> - /* Clear where it's used as dst. */ >>>>> - if (inst->dst[d].file == PROGRAM_TEMPORARY) { >>>>> - for (int c = 0; c < 4; c++) { >>>>> - if (inst->dst[d].writemask & (1 << c)) >>>>> - acp[4 * inst->dst[d].index + c] = NULL; >>>>> - } >>>>> - } >>>>> - >>>>> - /* Clear where it's used as src. */ >>>>> - for (int r = 0; r < this->next_temp; r++) { >>>>> - for (int c = 0; c < 4; c++) { >>>>> - if (!acp[4 * r + c]) >>>>> - continue; >>>>> - >>>>> - int src_chan = GET_SWZ(acp[4 * r + >>>>> c]->src[0].swizzle, c); >>>>> - >>>>> - if (acp[4 * r + c]->src[0].file == >>>>> inst->dst[d].file && >>>>> - acp[4 * r + c]->src[0].index == >>>>> inst->dst[d].index && >>>>> - inst->dst[d].writemask & (1 << src_chan)) { >>>>> - acp[4 * r + c] = NULL; >>>>> - } >>>>> - } >>>>> - } >>>>> - } >>>>> - } >>>>> - break; >>>>> - } >>>>> - >>>>> - /* If this is a copy, add it to the ACP. */ >>>>> - if (inst->op == TGSI_OPCODE_MOV && >>>>> - inst->dst[0].file == PROGRAM_TEMPORARY && >>>>> - !(inst->dst[0].file == inst->src[0].file && >>>>> - inst->dst[0].index == inst->src[0].index) && >>>>> - !inst->dst[0].reladdr && >>>>> - !inst->saturate && >>>>> - !inst->src[0].reladdr && >>>>> - !inst->src[0].reladdr2 && >>>>> - !inst->src[0].negate) { >>>>> - for (int i = 0; i < 4; i++) { >>>>> - if (inst->dst[0].writemask & (1 << i)) { >>>>> - acp[4 * inst->dst[0].index + i] = inst; >>>>> - acp_level[4 * inst->dst[0].index + i] = level; >>>>> - } >>>>> - } >>>>> - } >>>>> - } >>>>> - >>>>> - ralloc_free(acp_level); >>>>> - ralloc_free(acp); >>>>> -} >>>>> - >>>>> -/* >>>>> * On a basic block basis, tracks available PROGRAM_TEMPORARY >>>>> registers >>>>> for dead >>>>> * code elimination. >>>>> * >>>>> @@ -5623,7 +5425,6 @@ get_mesa_program(struct gl_context *ctx, >>>>> >>>>> /* Perform optimizations on the instructions in the >>>>> glsl_to_tgsi_visitor. */ >>>>> v->simplify_cmp(); >>>>> - v->copy_propagate(); >>>>> while (v->eliminate_dead_code()); >>>>> >>>>> v->merge_two_dsts(); >>>>> -- >>>>> 2.0.5 >>>>> >>>>> _______________________________________________ >>>>> mesa-dev mailing list >>>>> mesa-dev@lists.freedesktop.org >>>>> >>>>> >>>>> https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=AwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8&m=hW65RavQ_Xuvw96f61daCkas_SjeEudtADNX3BzgNQU&s=2ypQDTjgA1t1k9zgBXxsw9iSjhlz3Mta_iyE4dy07mg&e= >>>> >>>> >>>> _______________________________________________ >>>> mesa-dev mailing list >>>> mesa-dev@lists.freedesktop.org >>>> >>>> >>>> https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=AwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8&m=hW65RavQ_Xuvw96f61daCkas_SjeEudtADNX3BzgNQU&s=2ypQDTjgA1t1k9zgBXxsw9iSjhlz3Mta_iyE4dy07mg&e= >>>> >>> > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev