From: Marek Olšák <marek.ol...@amd.com>

A past commit added the ability to compile "optimized" shader variants
asynchronously (not stalling the app).

This commit builds upon that and adds what is basically a runtime shader
linker. If a VS output isn't used by the currently-bound PS, a new VS
compilation is started without that output. The new shader variant
is used when it's ready.

All apps using separate shader objects I've seen had unused VS outputs.

Eliminating unused/useless VS outputs also eliminates the corresponding
vertex attribute loads.
---
 src/gallium/drivers/radeonsi/si_shader.c        | 26 ++++++++++++++++-
 src/gallium/drivers/radeonsi/si_shader.h        |  7 ++---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 37 ++++++++++++++++++++++---
 3 files changed, 61 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index f8de049..973750b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2274,20 +2274,40 @@ static void si_llvm_export_vs(struct 
lp_build_tgsi_context *bld_base,
        unsigned pos_idx;
        int i;
 
        if (outputs && ctx->shader->selector->so.num_outputs) {
                si_llvm_emit_streamout(ctx, outputs, noutput);
        }
 
        for (i = 0; i < noutput; i++) {
                semantic_name = outputs[i].name;
                semantic_index = outputs[i].sid;
+               bool export_param = true;
+
+               switch (semantic_name) {
+               case TGSI_SEMANTIC_POSITION: /* ignore these */
+               case TGSI_SEMANTIC_PSIZE:
+               case TGSI_SEMANTIC_CLIPVERTEX:
+               case TGSI_SEMANTIC_EDGEFLAG:
+                       break;
+               case TGSI_SEMANTIC_GENERIC:
+               case TGSI_SEMANTIC_CLIPDIST:
+                       if (shader->key.opt.hw_vs.kill_outputs &
+                           (1ull << 
si_shader_io_get_unique_index(semantic_name, semantic_index)))
+                               export_param = false;
+                       break;
+               default:
+                       if (shader->key.opt.hw_vs.kill_outputs2 &
+                           (1u << 
si_shader_io_get_unique_index2(semantic_name, semantic_index)))
+                               export_param = false;
+                       break;
+               }
 
 handle_semantic:
                /* Select the correct target */
                switch(semantic_name) {
                case TGSI_SEMANTIC_PSIZE:
                        psize_value = outputs[i].values[0];
                        continue;
                case TGSI_SEMANTIC_EDGEFLAG:
                        edgeflag_value = outputs[i].values[0];
                        continue;
@@ -2297,20 +2317,22 @@ handle_semantic:
                        goto handle_semantic;
                case TGSI_SEMANTIC_VIEWPORT_INDEX:
                        viewport_index_value = outputs[i].values[0];
                        semantic_name = TGSI_SEMANTIC_GENERIC;
                        goto handle_semantic;
                case TGSI_SEMANTIC_POSITION:
                        target = V_008DFC_SQ_EXP_POS;
                        break;
                case TGSI_SEMANTIC_COLOR:
                case TGSI_SEMANTIC_BCOLOR:
+                       if (!export_param)
+                               continue;
                        target = V_008DFC_SQ_EXP_PARAM + param_count;
                        assert(i < 
ARRAY_SIZE(shader->info.vs_output_param_offset));
                        shader->info.vs_output_param_offset[i] = param_count;
                        param_count++;
                        break;
                case TGSI_SEMANTIC_CLIPDIST:
                        if (shader->key.opt.hw_vs.clip_disable) {
                                semantic_name = TGSI_SEMANTIC_GENERIC;
                                goto handle_semantic;
                        }
@@ -2318,20 +2340,22 @@ handle_semantic:
                        break;
                case TGSI_SEMANTIC_CLIPVERTEX:
                        if (shader->key.opt.hw_vs.clip_disable)
                                continue;
                        si_llvm_emit_clipvertex(bld_base, pos_args, 
outputs[i].values);
                        continue;
                case TGSI_SEMANTIC_PRIMID:
                case TGSI_SEMANTIC_FOG:
                case TGSI_SEMANTIC_TEXCOORD:
                case TGSI_SEMANTIC_GENERIC:
+                       if (!export_param)
+                               continue;
                        target = V_008DFC_SQ_EXP_PARAM + param_count;
                        assert(i < 
ARRAY_SIZE(shader->info.vs_output_param_offset));
                        shader->info.vs_output_param_offset[i] = param_count;
                        param_count++;
                        break;
                default:
                        target = 0;
                        fprintf(stderr,
                                "Warning: SI unhandled vs output type:%d\n",
                                semantic_name);
@@ -7070,21 +7094,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
         * conversion fails. */
        if (r600_can_dump_shader(&sscreen->b, sel->info.processor) &&
            !(sscreen->b.debug_flags & DBG_NO_TGSI)) {
                tgsi_dump(sel->tokens, 0);
                si_dump_streamout(&sel->so);
        }
 
        si_init_shader_ctx(&ctx, sscreen, shader, tm);
        ctx.separate_prolog = !is_monolithic;
 
-       memset(shader->info.vs_output_param_offset, 0xff,
+       memset(shader->info.vs_output_param_offset, EXP_PARAM_UNDEFINED,
               sizeof(shader->info.vs_output_param_offset));
 
        shader->info.uses_instanceid = sel->info.uses_instanceid;
 
        bld_base = &ctx.soa.bld_base;
        ctx.load_system_value = declare_system_value;
 
        if (!si_compile_tgsi_main(&ctx, shader)) {
                si_llvm_dispose(&ctx);
                return -1;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index fc9c913..aa37676 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -315,24 +315,20 @@ struct si_shader_selector {
  */
 
 /* Common VS bits between the shader key and the prolog key. */
 struct si_vs_prolog_bits {
        unsigned        instance_divisors[SI_NUM_VERTEX_BUFFERS];
 };
 
 /* Common VS bits between the shader key and the epilog key. */
 struct si_vs_epilog_bits {
        unsigned        export_prim_id:1; /* when PS needs it and GS is 
disabled */
-       /* TODO:
-        * - skip layer, viewport, clipdist, and culldist parameter exports
-        *   if PS doesn't read them
-        */
 };
 
 /* Common TCS bits between the shader key and the epilog key. */
 struct si_tcs_epilog_bits {
        unsigned        prim_mode:3;
 };
 
 struct si_gs_prolog_bits {
        unsigned        tri_strip_adj_fix:1;
 };
@@ -433,20 +429,22 @@ struct si_shader_key {
                        uint32_t        fix_fetch;
                } vs;
                struct {
                        uint64_t        inputs_to_copy; /* for fixed-func TCS */
                } tcs;
        } mono;
 
        /* Optimization flags for asynchronous compilation only. */
        union {
                struct {
+                       uint64_t        kill_outputs; /* "get_unique_index" 
bits */
+                       uint32_t        kill_outputs2; /* "get_unique_index2" 
bits */
                        unsigned        clip_disable:1;
                } hw_vs; /* HW VS (it can be VS, TES, GS) */
        } opt;
 };
 
 struct si_shader_config {
        unsigned                        num_sgprs;
        unsigned                        num_vgprs;
        unsigned                        spilled_sgprs;
        unsigned                        spilled_vgprs;
@@ -461,20 +459,21 @@ struct si_shader_config {
 
 enum {
        /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
        EXP_PARAM_OFFSET_0 = 0,
        EXP_PARAM_OFFSET_31 = 31,
        /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
        EXP_PARAM_DEFAULT_VAL_0000 = 64,
        EXP_PARAM_DEFAULT_VAL_0001,
        EXP_PARAM_DEFAULT_VAL_1110,
        EXP_PARAM_DEFAULT_VAL_1111,
+       EXP_PARAM_UNDEFINED = 255,
 };
 
 /* GCN-specific shader info. */
 struct si_shader_info {
        ubyte                   vs_output_param_offset[SI_MAX_VS_OUTPUTS];
        ubyte                   num_input_sgprs;
        ubyte                   num_input_vgprs;
        char                    face_vgpr_index;
        bool                    uses_instanceid;
        ubyte                   nr_pos_exports;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index e4d8747..7834f87 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -851,25 +851,49 @@ static unsigned si_get_alpha_test_func(struct si_context 
*sctx)
        if (sctx->queued.named.dsa)
                return sctx->queued.named.dsa->alpha_func;
 
        return PIPE_FUNC_ALWAYS;
 }
 
 static void si_shader_selector_key_hw_vs(struct si_context *sctx,
                                         struct si_shader_selector *vs,
                                         struct si_shader_key *key)
 {
+       struct si_shader_selector *ps = sctx->ps_shader.cso;
+
        key->opt.hw_vs.clip_disable =
                sctx->queued.named.rasterizer->clip_plane_enable == 0 &&
                (vs->info.clipdist_writemask ||
                 vs->info.writes_clipvertex) &&
                !vs->info.culldist_writemask;
+
+       /* Find out if PS is disabled. */
+       bool ps_disabled = ps == NULL;
+
+       /* Find out which VS outputs aren't used by the PS. */
+       uint64_t outputs_written = vs->outputs_written;
+       uint32_t outputs_written2 = vs->outputs_written2;
+       uint64_t inputs_read = 0;
+       uint32_t inputs_read2 = 0;
+
+       outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
+
+       if (!ps_disabled) {
+               inputs_read = ps->inputs_read;
+               inputs_read2 = ps->inputs_read2;
+       }
+
+       uint64_t linked = outputs_written & inputs_read;
+       uint32_t linked2 = outputs_written2 & inputs_read2;
+
+       key->opt.hw_vs.kill_outputs = ~linked & outputs_written;
+       key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2;
 }
 
 /* Compute the key for the hw shader variant */
 static inline void si_shader_selector_key(struct pipe_context *ctx,
                                          struct si_shader_selector *sel,
                                          struct si_shader_key *key)
 {
        struct si_context *sctx = (struct si_context *)ctx;
        unsigned i;
 
@@ -1778,25 +1802,30 @@ static unsigned si_get_ps_input_cntl(struct si_context 
*sctx,
 
        for (j = 0; j < vsinfo->num_outputs; j++) {
                if (name == vsinfo->output_semantic_name[j] &&
                    index == vsinfo->output_semantic_index[j]) {
                        offset = vs->info.vs_output_param_offset[j];
 
                        if (offset <= EXP_PARAM_OFFSET_31) {
                                /* The input is loaded from parameter memory. */
                                ps_input_cntl |= S_028644_OFFSET(offset);
                        } else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
-                               /* The input is a DEFAULT_VAL constant. */
-                               assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 &&
-                                      offset <= EXP_PARAM_DEFAULT_VAL_1111);
+                               if (offset == EXP_PARAM_UNDEFINED) {
+                                       /* This can happen with depth-only 
rendering. */
+                                       offset = 0;
+                               } else {
+                                       /* The input is a DEFAULT_VAL constant. 
*/
+                                       assert(offset >= 
EXP_PARAM_DEFAULT_VAL_0000 &&
+                                              offset <= 
EXP_PARAM_DEFAULT_VAL_1111);
+                                       offset -= EXP_PARAM_DEFAULT_VAL_0000;
+                               }
 
-                               offset -= EXP_PARAM_DEFAULT_VAL_0000;
                                ps_input_cntl = S_028644_OFFSET(0x20) |
                                                S_028644_DEFAULT_VAL(offset);
                        }
                        break;
                }
        }
 
        if (name == TGSI_SEMANTIC_PRIMID)
                /* PrimID is written after the last output. */
                ps_input_cntl |= 
S_028644_OFFSET(vs->info.vs_output_param_offset[vsinfo->num_outputs]);
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to