For example the following type of thing is seen in TCS from a number of Vulkan and DXVK games:
vec1 32 ssa_557 = deref_var &oPatch (shader_out float) vec1 32 ssa_558 = intrinsic load_deref (ssa_557) () vec1 32 ssa_559 = deref_var &oPatch@42 (shader_out float) vec1 32 ssa_560 = intrinsic load_deref (ssa_559) () vec1 32 ssa_561 = deref_var &oPatch@43 (shader_out float) vec1 32 ssa_562 = intrinsic load_deref (ssa_561) () intrinsic store_deref (ssa_557, ssa_558) (1) /* wrmask=x */ intrinsic store_deref (ssa_559, ssa_560) (1) /* wrmask=x */ intrinsic store_deref (ssa_561, ssa_562) (1) /* wrmask=x */ No shader-db changes on i965 (SKL). vkpipeline-db results RADV (VEGA): Totals from affected shaders: SGPRS: 8152 -> 8016 (-1.67 %) VGPRS: 6780 -> 7068 (4.25 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 480420 -> 468576 (-2.47 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 1027 -> 992 (-3.41 %) Wait states: 0 -> 0 (0.00 %) The Max Waves and VGPRS changes here are misleading. What is happening is a bunch of TCS outputs are being optimised away as they are now recognised as unused. This results in more varyings being compacted via nir_compact_varyings() which can results in more register pressure. This is an existing problem independent of this patch. I've run some benchmarks and haven't noticed any regrssions in affected games. --- src/compiler/nir/nir_opt_copy_prop_vars.c | 30 +++++++++++++++-------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c index 7a21ad56c79..fdabc1769a5 100644 --- a/src/compiler/nir/nir_opt_copy_prop_vars.c +++ b/src/compiler/nir/nir_opt_copy_prop_vars.c @@ -676,18 +676,28 @@ copy_prop_vars_block(struct copy_prop_var_state *state, } case nir_intrinsic_store_deref: { - struct value value = { - .is_ssa = true - }; - - for (unsigned i = 0; i < intrin->num_components; i++) - value.ssa[i] = intrin->src[1].ssa; - nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); - unsigned wrmask = nir_intrinsic_write_mask(intrin); struct copy_entry *entry = - get_entry_and_kill_aliases(copies, dst, wrmask); - store_to_entry(state, entry, &value, wrmask); + lookup_entry_for_deref(copies, dst, nir_derefs_equal_bit); + if (entry && entry->src.ssa[0] == intrin->src[1].ssa) { + /* If we are storing the value from a load of the same var the + * store is redundant so remove it. + */ + nir_instr_remove(instr); + } else { + struct value value = { + .is_ssa = true + }; + + for (unsigned i = 0; i < intrin->num_components; i++) + value.ssa[i] = intrin->src[1].ssa; + + unsigned wrmask = nir_intrinsic_write_mask(intrin); + struct copy_entry *entry = + get_entry_and_kill_aliases(copies, dst, wrmask); + store_to_entry(state, entry, &value, wrmask); + } + break; } -- 2.19.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev