For now I have only enabled this for RADV we can do it also for radeonsi also but we need to add a CAP for it.
vkpipeline-db results: Totals from affected shaders: SGPRS: 4104 -> 3728 (-9.16 %) VGPRS: 3604 -> 3472 (-3.66 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 186868 -> 186740 (-0.07 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 935 -> 968 (3.53 %) Wait states: 0 -> 0 (0.00 %) The VGPR and Max wave changes are in Rise of The Tomb Raider, there were also SGPR changes in Nier. --- src/amd/vulkan/radv_pipeline.c | 2 +- src/compiler/nir/nir.h | 3 +- src/compiler/nir/nir_linking_helpers.c | 111 +++++++++++++++++++++- src/mesa/state_tracker/st_glsl_to_nir.cpp | 2 +- 4 files changed, 114 insertions(+), 4 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 8f283365069..99dc54e8ed3 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1823,7 +1823,7 @@ radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders) ordered_shaders[i - 1]); nir_compact_varyings(ordered_shaders[i], - ordered_shaders[i - 1], true); + ordered_shaders[i - 1], true, true); if (progress) { if (nir_lower_global_vars_to_local(ordered_shaders[i])) { diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 60ea4fbc7ff..e69612c23ae 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2798,7 +2798,8 @@ bool nir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list, uint64_t *used_by_other_stage, uint64_t *used_by_other_stage_patches); void nir_compact_varyings(nir_shader *producer, nir_shader *consumer, - bool default_to_smooth_interp); + bool default_to_smooth_interp, + bool ignore_constant_varyings); void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer); typedef enum { diff --git a/src/compiler/nir/nir_linking_helpers.c b/src/compiler/nir/nir_linking_helpers.c index 4498d57ac4a..ab29fcc8a1f 100644 --- a/src/compiler/nir/nir_linking_helpers.c +++ b/src/compiler/nir/nir_linking_helpers.c @@ -524,6 +524,111 @@ compact_components(nir_shader *producer, nir_shader *consumer, uint8_t *comps, &producer->info.patch_outputs_read); } +/* + * On AMD hardware we can eliminate certain constant outputs between the + * vertex and fragment shader. Here we avoid packing varyings with the outputs + * we can potentialy eliminate. + * + * The four constant combination we can eliminate are as follows: + * + * 0.0, 0.0, 0.0, {0.0,1.0} + * 1.0, 1.0, 1.0, {0.0,1.0} + * + * TODO: We could also try repacking varying constants in order to produce + * these combinations. + */ +static void +skip_packing_constant_varyings(nir_shader *producer, nir_shader *consumer, + uint8_t *comps) +{ + if (consumer->info.stage != MESA_SHADER_FRAGMENT) + return; + + if (producer->info.stage != MESA_SHADER_VERTEX && + producer->info.stage != MESA_SHADER_TESS_EVAL) + return; + + uint8_t comps_zero[MAX_VARYING] = {0}; + uint8_t comps_one[MAX_VARYING] = {0}; + uint32_t skip_locations = 0; + + nir_function_impl *impl = nir_shader_get_entrypoint(producer); + + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + if (intr->intrinsic != nir_intrinsic_store_deref) + continue; + + nir_variable *var = + nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0])); + + if (var->data.location < VARYING_SLOT_VAR0 || + var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING) { + continue; + } + + unsigned location = var->data.location - VARYING_SLOT_VAR0; + if (skip_locations & (1 << location)) { + continue; + } + + /* To keep things simple only work with varyings we were able to + * previously split. + */ + if (intr->num_components != 1) { + skip_locations |= 1 << location; + continue; + } + + if (glsl_get_base_type(var->type) != GLSL_TYPE_FLOAT) { + skip_locations |= 1 << location; + continue; + } + + if (intr->src[1].ssa->parent_instr->type != nir_instr_type_load_const) { + skip_locations |= 1 << location; + continue; + } + + nir_load_const_instr *const_instr = + nir_instr_as_load_const(intr->src[1].ssa->parent_instr); + + assert(const_instr->def.bit_size == 32); + if (const_instr->value.f32[0] == 0.0) { + if (comps_one[location] & 0x7) { + skip_locations |= 1 << location; + continue; + } else { + comps_zero[location] |= 1 << var->data.location_frac; + } + } else if (const_instr->value.f32[0] == 1.0) { + if (comps_zero[location] & 0x7) { + skip_locations |= 1 << location; + continue; + } else { + comps_one[location] |= 1 << var->data.location_frac; + } + } else { + skip_locations |= 1 << location; + } + } + } + + for (unsigned i = 0; i < MAX_VARYING; i++) { + if (skip_locations & 1 << i) + continue; + + if (comps_one[i] || comps_zero[i]) { + comps[i] = 0xf; + } + } +} + /* We assume that this has been called more-or-less directly after * remove_unused_varyings. At this point, all of the varyings that we * aren't going to be using have been completely removed and the @@ -536,7 +641,8 @@ compact_components(nir_shader *producer, nir_shader *consumer, uint8_t *comps, */ void nir_compact_varyings(nir_shader *producer, nir_shader *consumer, - bool default_to_smooth_interp) + bool default_to_smooth_interp, + bool ignore_constant_varyings) { assert(producer->info.stage != MESA_SHADER_FRAGMENT); assert(consumer->info.stage != MESA_SHADER_VERTEX); @@ -545,6 +651,9 @@ nir_compact_varyings(nir_shader *producer, nir_shader *consumer, uint8_t interp_type[MAX_VARYINGS_INCL_PATCH] = {0}; uint8_t interp_loc[MAX_VARYINGS_INCL_PATCH] = {0}; + if (ignore_constant_varyings) + skip_packing_constant_varyings(producer, consumer, comps); + get_slot_component_masks_and_interp_types(&producer->outputs, comps, interp_type, interp_loc, producer->info.stage, diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index 911284401e0..c6bb1ac1162 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -751,7 +751,7 @@ st_link_nir(struct gl_context *ctx, */ if (!prev_shader->sh.LinkedTransformFeedback) nir_compact_varyings(shader_program->_LinkedShaders[prev]->Program->nir, - nir, ctx->API != API_OPENGL_COMPAT); + nir, ctx->API != API_OPENGL_COMPAT, false); } prev = i; } -- 2.17.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev