- There's no reason there would be only 64 operations that read from the output of a mov from VPM, so we might smash the stack (fixes etqw trace)
- Fixes segfault where we assumed that a single-use temp had a def (fixes 2 piglit tests) - We need to only mark progress when we actually did the optimization, or we'll infinite loop (0ad trace). - Misc style fixes. - No reordering sampler instructions (fixes a glean test) shader-db results: total instructions in shared programs: 78513 -> 78071 (-0.56%) instructions in affected programs: 10406 -> 9964 (-4.25%) total estimated cycles in shared programs: 234674 -> 234274 (-0.17%) estimated cycles in affected programs: 35188 -> 34788 (-1.14%) --- Varad, here's what I came up with trying to test your patch. If these changes look good to you, I can squash them in and push. src/gallium/drivers/vc4/vc4_opt_vpm.c | 45 +++++++++++++++++------------------ 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm.c b/src/gallium/drivers/vc4/vc4_opt_vpm.c index 277b345..a4ee6af 100644 --- a/src/gallium/drivers/vc4/vc4_opt_vpm.c +++ b/src/gallium/drivers/vc4/vc4_opt_vpm.c @@ -40,10 +40,8 @@ qir_opt_vpm(struct vc4_compile *c) bool progress = false; struct qinst *vpm_writes[64] = { 0 }; - struct qinst *vpm_reads[64] = { 0 }; uint32_t use_count[c->num_temps]; uint32_t vpm_write_count = 0; - uint32_t vpm_read_count = 0; memset(&use_count, 0, sizeof(use_count)); list_for_each_entry(struct qinst, inst, &c->instructions, link) { @@ -59,24 +57,14 @@ qir_opt_vpm(struct vc4_compile *c) if (inst->src[i].file == QFILE_TEMP) { uint32_t temp = inst->src[i].index; use_count[temp]++; - - struct qinst *mov = c->defs[temp]; - if (!mov || - (mov->op != QOP_MOV && - mov->op != QOP_FMOV && - mov->op != QOP_MMOV)) { - continue; - } - - if (mov->src[0].file == QFILE_VPM) - vpm_reads[vpm_read_count++] = inst; } } } - for (int i = 0; i < vpm_read_count; i++) { - struct qinst *inst = vpm_reads[i]; - + /* For instructions reading from a temporary that contains a VPM read + * result, try to move the instruction up in place of the VPM read. + */ + list_for_each_entry(struct qinst, inst, &c->instructions, link) { if (!inst || qir_is_multi_instruction(inst)) continue; @@ -84,21 +72,32 @@ qir_opt_vpm(struct vc4_compile *c) continue; if (qir_has_side_effects(c, inst) || - qir_has_side_effect_reads(c, inst)) + qir_has_side_effect_reads(c, inst) || + qir_is_tex(inst)) continue; for (int j = 0; j < qir_get_op_nsrc(inst->op); j++) { - if(inst->src[j].file != QFILE_TEMP) + if (inst->src[j].file != QFILE_TEMP) continue; uint32_t temp = inst->src[j].index; + + /* Since VPM reads pull from a FIFO, we only get to + * read each VPM entry once (unless we reset the read + * pointer). That means we can't copy-propagate a VPM + * read to multiple locations. + */ if (use_count[temp] != 1) continue; struct qinst *mov = c->defs[temp]; - - if (mov->src[0].file != QFILE_VPM) + if (!mov || + (mov->op != QOP_MOV && + mov->op != QOP_FMOV && + mov->op != QOP_MMOV) || + mov->src[0].file != QFILE_VPM) { continue; + } uint32_t temps = 0; for (int k = 0; k < qir_get_op_nsrc(inst->op); k++) { @@ -109,15 +108,15 @@ qir_opt_vpm(struct vc4_compile *c) /* The instruction is safe to reorder if its other * sources are independent of previous instructions */ - if (temps == 1 ) { + if (temps == 1) { list_del(&inst->link); inst->src[j] = mov->src[0]; list_replace(&mov->link, &inst->link); c->defs[temp] = NULL; free(mov); + progress = true; + break; } - - progress = true; } } -- 2.7.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev