From: Marek Olšák <marek.ol...@amd.com>

This is the first user of optimized monolithic shader variants.

Cull distances can't be disabled by states.
---
 src/gallium/drivers/radeonsi/si_shader.c        |  6 ++++++
 src/gallium/drivers/radeonsi/si_shader.h        |  5 +++--
 src/gallium/drivers/radeonsi/si_state.c         | 10 +++++++++-
 src/gallium/drivers/radeonsi/si_state_shaders.c | 21 +++++++++++++++++++--
 4 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index cd436f7..514c23b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2287,23 +2287,29 @@ handle_semantic:
                        target = V_008DFC_SQ_EXP_POS;
                        break;
                case TGSI_SEMANTIC_COLOR:
                case TGSI_SEMANTIC_BCOLOR:
                        target = V_008DFC_SQ_EXP_PARAM + param_count;
                        assert(i < 
ARRAY_SIZE(shader->info.vs_output_param_offset));
                        shader->info.vs_output_param_offset[i] = param_count;
                        param_count++;
                        break;
                case TGSI_SEMANTIC_CLIPDIST:
+                       if (shader->key.opt.hw_vs.clip_disable) {
+                               semantic_name = TGSI_SEMANTIC_GENERIC;
+                               goto handle_semantic;
+                       }
                        target = V_008DFC_SQ_EXP_POS + 2 + semantic_index;
                        break;
                case TGSI_SEMANTIC_CLIPVERTEX:
+                       if (shader->key.opt.hw_vs.clip_disable)
+                               continue;
                        si_llvm_emit_clipvertex(bld_base, pos_args, 
outputs[i].values);
                        continue;
                case TGSI_SEMANTIC_PRIMID:
                case TGSI_SEMANTIC_FOG:
                case TGSI_SEMANTIC_TEXCOORD:
                case TGSI_SEMANTIC_GENERIC:
                        target = V_008DFC_SQ_EXP_PARAM + param_count;
                        assert(i < 
ARRAY_SIZE(shader->info.vs_output_param_offset));
                        shader->info.vs_output_param_offset[i] = param_count;
                        param_count++;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 38aa361..4cbd1c2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -313,22 +313,20 @@ struct si_shader_selector {
 
 /* Common VS bits between the shader key and the prolog key. */
 struct si_vs_prolog_bits {
        unsigned        instance_divisors[SI_NUM_VERTEX_BUFFERS];
 };
 
 /* Common VS bits between the shader key and the epilog key. */
 struct si_vs_epilog_bits {
        unsigned        export_prim_id:1; /* when PS needs it and GS is 
disabled */
        /* TODO:
-        * - skip clipdist, culldist (including clipvertex code) exports based
-        *   on which clip_plane_enable bits are set
         * - skip layer, viewport, clipdist, and culldist parameter exports
         *   if PS doesn't read them
         */
 };
 
 /* Common TCS bits between the shader key and the epilog key. */
 struct si_tcs_epilog_bits {
        unsigned        prim_mode:3;
 };
 
@@ -431,20 +429,23 @@ struct si_shader_key {
                        /* One pair of bits for every input: SI_FIX_FETCH_* 
enums. */
                        uint32_t        fix_fetch;
                } vs;
                struct {
                        uint64_t        inputs_to_copy; /* for fixed-func TCS */
                } tcs;
        } mono;
 
        /* Optimization flags for asynchronous compilation only. */
        union {
+               struct {
+                       unsigned        clip_disable:1;
+               } hw_vs; /* HW VS (it can be VS, TES, GS) */
        } opt;
 };
 
 struct si_shader_config {
        unsigned                        num_sgprs;
        unsigned                        num_vgprs;
        unsigned                        spilled_sgprs;
        unsigned                        spilled_vgprs;
        unsigned                        lds_size;
        unsigned                        spi_ps_input_ena;
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 7d118b0..b30bec1 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -637,29 +637,37 @@ static void si_emit_clip_state(struct si_context *sctx, 
struct r600_atom *atom)
 
        radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4);
        radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4);
 }
 
 #define SIX_BITS 0x3F
 
 static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
 {
        struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+       struct si_shader *vs = si_get_vs_state(sctx);
        struct tgsi_shader_info *info = si_get_vs_info(sctx);
        struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
        unsigned window_space =
           info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
        unsigned clipdist_mask =
                info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask;
        unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & 
SIX_BITS;
        unsigned culldist_mask = info->culldist_writemask << 
info->num_written_clipdistance;
-       unsigned total_mask = clipdist_mask | culldist_mask;
+       unsigned total_mask;
+
+       if (vs->key.opt.hw_vs.clip_disable) {
+               assert(!info->culldist_writemask);
+               clipdist_mask = 0;
+               culldist_mask = 0;
+       }
+       total_mask = clipdist_mask | culldist_mask;
 
        radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
                S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
                S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
                S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
                S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) |
                S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
                S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
                S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize ||
                                            info->writes_edgeflag ||
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 00ccbbd..1d116f6 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -847,20 +847,31 @@ static void si_shader_init_pm4_state(struct si_screen 
*sscreen,
 
 static unsigned si_get_alpha_test_func(struct si_context *sctx)
 {
        /* Alpha-test should be disabled if colorbuffer 0 is integer. */
        if (sctx->queued.named.dsa)
                return sctx->queued.named.dsa->alpha_func;
 
        return PIPE_FUNC_ALWAYS;
 }
 
+static void si_shader_selector_key_hw_vs(struct si_context *sctx,
+                                        struct si_shader_selector *vs,
+                                        struct si_shader_key *key)
+{
+       key->opt.hw_vs.clip_disable =
+               sctx->queued.named.rasterizer->clip_plane_enable == 0 &&
+               (vs->info.clipdist_writemask ||
+                vs->info.writes_clipvertex) &&
+               !vs->info.culldist_writemask;
+}
+
 /* Compute the key for the hw shader variant */
 static inline void si_shader_selector_key(struct pipe_context *ctx,
                                          struct si_shader_selector *sel,
                                          struct si_shader_key *key)
 {
        struct si_context *sctx = (struct si_context *)ctx;
        unsigned i;
 
        memset(key, 0, sizeof(*key));
 
@@ -875,36 +886,42 @@ static inline void si_shader_selector_key(struct 
pipe_context *ctx,
 
                        key->mono.vs.fix_fetch =
                                sctx->vertex_elements->fix_fetch &
                                u_bit_consecutive(0, 2 * count);
                }
                if (sctx->tes_shader.cso)
                        key->as_ls = 1;
                else if (sctx->gs_shader.cso)
                        key->as_es = 1;
                else {
+                       si_shader_selector_key_hw_vs(sctx, sel, key);
+
                        if (sctx->ps_shader.cso && 
sctx->ps_shader.cso->info.uses_primid)
                                key->part.vs.epilog.export_prim_id = 1;
                }
                break;
        case PIPE_SHADER_TESS_CTRL:
                key->part.tcs.epilog.prim_mode =
                        
sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
 
                if (sel == sctx->fixed_func_tcs_shader.cso)
                        key->mono.tcs.inputs_to_copy = 
sctx->vs_shader.cso->outputs_written;
                break;
        case PIPE_SHADER_TESS_EVAL:
                if (sctx->gs_shader.cso)
                        key->as_es = 1;
-               else if (sctx->ps_shader.cso && 
sctx->ps_shader.cso->info.uses_primid)
-                       key->part.tes.epilog.export_prim_id = 1;
+               else {
+                       si_shader_selector_key_hw_vs(sctx, sel, key);
+
+                       if (sctx->ps_shader.cso && 
sctx->ps_shader.cso->info.uses_primid)
+                               key->part.tes.epilog.export_prim_id = 1;
+               }
                break;
        case PIPE_SHADER_GEOMETRY:
                key->part.gs.prolog.tri_strip_adj_fix = 
sctx->gs_tri_strip_adj_fix;
                break;
        case PIPE_SHADER_FRAGMENT: {
                struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
                struct si_state_blend *blend = sctx->queued.named.blend;
 
                if 
(sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
                    sel->info.colors_written == 0x1)
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to