I really like the idea. :-)

Though, I have two general comments:

1) I think it would be better to introduce some sort of compare helper functions for the different state changes. Also, for correctness it might be safer to do the opposite checks (if someone introduce a new field and forget to update the relevant part).

2) I would suggest to introduce uses_instance_divisors in a separate patch.

Btw, I'm not going to review patch 4 because I'm not familiar enough with this part.

On 06/05/2017 06:51 PM, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

This and the previous commit decrease IB sizes and the number of
si_update_shaders invocations as follows:

                  IB size   si_update_shader calls
Borderlands 2      -10%            -27%
Deus Ex: MD         -5%            -11%
Talos Principle     -8%            -30%
---
  src/gallium/drivers/radeonsi/si_state.c         | 68 ++++++++++++++++++++++---
  src/gallium/drivers/radeonsi/si_state.h         |  1 +
  src/gallium/drivers/radeonsi/si_state_shaders.c | 24 +++++++--
  3 files changed, 80 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 5e5f564..323ec5e 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -596,23 +596,41 @@ static void *si_create_blend_state_mode(struct 
pipe_context *ctx,
static void *si_create_blend_state(struct pipe_context *ctx,
                                   const struct pipe_blend_state *state)
  {
        return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
  }
static void si_bind_blend_state(struct pipe_context *ctx, void *state)
  {
        struct si_context *sctx = (struct si_context *)ctx;
-       si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
-       si_mark_atom_dirty(sctx, &sctx->cb_render_state);
-       sctx->do_update_shaders = true;
+       struct si_state_blend *old_blend = sctx->queued.named.blend;
+       struct si_state_blend *blend = (struct si_state_blend *)state;
+
+       if (!state)
+               return;
+
+       if (!old_blend ||
+            old_blend->cb_target_mask != blend->cb_target_mask ||
+            old_blend->dual_src_blend != blend->dual_src_blend)
+               si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+
+       si_pm4_bind_state(sctx, blend, state);
+
+       if (!old_blend ||
+           old_blend->cb_target_mask != blend->cb_target_mask ||
+           old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
+           old_blend->alpha_to_one != blend->alpha_to_one ||
+           old_blend->dual_src_blend != blend->dual_src_blend ||
+           old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
+           old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit)
+               sctx->do_update_shaders = true;
  }
static void si_delete_blend_state(struct pipe_context *ctx, void *state)
  {
        struct si_context *sctx = (struct si_context *)ctx;
        si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
  }
static void si_set_blend_color(struct pipe_context *ctx,
                               const struct pipe_blend_color *state)
@@ -914,24 +932,41 @@ static void si_bind_rs_state(struct pipe_context *ctx, 
void *state)
        }
sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;
        sctx->current_vs_state |= 
S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color);
r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz); si_pm4_bind_state(sctx, rasterizer, rs);
        si_update_poly_offset_state(sctx);
- si_mark_atom_dirty(sctx, &sctx->clip_regs);
+       if (!old_rs ||
+           old_rs->clip_plane_enable != rs->clip_plane_enable ||
+           old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl)
+               si_mark_atom_dirty(sctx, &sctx->clip_regs);
+
        sctx->ia_multi_vgt_param_key.u.line_stipple_enabled =
                rs->line_stipple_enable;
-       sctx->do_update_shaders = true;
+
+       if (!old_rs ||
+           old_rs->clip_plane_enable != rs->clip_plane_enable ||
+           old_rs->rasterizer_discard != rs->rasterizer_discard ||
+           old_rs->sprite_coord_enable != rs->sprite_coord_enable ||
+           old_rs->flatshade != rs->flatshade ||
+           old_rs->two_side != rs->two_side ||
+           old_rs->multisample_enable != rs->multisample_enable ||
+           old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
+           old_rs->poly_smooth != rs->poly_smooth ||
+           old_rs->line_smooth != rs->line_smooth ||
+           old_rs->clamp_fragment_color != rs->clamp_fragment_color ||
+           old_rs->force_persample_interp != rs->force_persample_interp)
+               sctx->do_update_shaders = true;
  }
static void si_delete_rs_state(struct pipe_context *ctx, void *state)
  {
        struct si_context *sctx = (struct si_context *)ctx;
if (sctx->queued.named.rasterizer == state)
                si_pm4_bind_state(sctx, poly_offset, NULL);
        si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer 
*)state);
  }
@@ -1055,33 +1090,36 @@ static void *si_create_dsa_state(struct pipe_context 
*ctx,
                si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 
fui(state->depth.bounds_min));
                si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 
fui(state->depth.bounds_max));
        }
return dsa;
  }
static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
  {
          struct si_context *sctx = (struct si_context *)ctx;
+       struct si_state_dsa *old_dsa = sctx->queued.named.dsa;
          struct si_state_dsa *dsa = state;
if (!state)
                  return;
si_pm4_bind_state(sctx, dsa, dsa); if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
                   sizeof(struct si_dsa_stencil_ref_part)) != 0) {
                sctx->stencil_ref.dsa_part = dsa->stencil_ref;
                si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
        }
-       sctx->do_update_shaders = true;
+
+       if (!old_dsa || old_dsa->alpha_func != dsa->alpha_func)
+               sctx->do_update_shaders = true;
  }
static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
  {
        struct si_context *sctx = (struct si_context *)ctx;
        si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
  }
static void *si_create_db_flush_dsa(struct si_context *sctx)
  {
@@ -2437,20 +2475,21 @@ static void si_framebuffer_cache_flush(struct 
si_context *sctx)
static void si_set_framebuffer_state(struct pipe_context *ctx,
                                     const struct pipe_framebuffer_state *state)
  {
        struct si_context *sctx = (struct si_context *)ctx;
        struct pipe_constant_buffer constbuf = {0};
        struct r600_surface *surf = NULL;
        struct r600_texture *rtex;
        bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
        unsigned old_nr_samples = sctx->framebuffer.nr_samples;
+       unsigned old_colorbuf_enabled_4bit = 
sctx->framebuffer.colorbuf_enabled_4bit;
        bool unbound = false;
        int i;
/* Fast path for linear <-> sRGB changes and no-op changes. */
        if (state->nr_cbufs &&
            state->nr_cbufs == sctx->framebuffer.state.nr_cbufs &&
            state->zsbuf == sctx->framebuffer.state.zsbuf) {
                struct pipe_surface **cbufs = sctx->framebuffer.state.cbufs;
/* See if the surfaces are equivalent. */
@@ -2593,23 +2632,25 @@ static void si_set_framebuffer_state(struct 
pipe_context *ctx,
                surf = (struct r600_surface*)state->zsbuf;
                rtex = (struct r600_texture*)surf->base.texture;
if (!surf->depth_initialized) {
                        si_init_depth_surface(sctx, surf);
                }
                r600_context_add_resource_size(ctx, surf->base.texture);
        }
si_update_poly_offset_state(sctx);
-       si_mark_atom_dirty(sctx, &sctx->cb_render_state);
        si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
+ if (old_colorbuf_enabled_4bit != sctx->framebuffer.colorbuf_enabled_4bit)
+               si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+
        if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
                si_mark_atom_dirty(sctx, &sctx->msaa_config);
if (sctx->framebuffer.nr_samples != old_nr_samples) {
                si_mark_atom_dirty(sctx, &sctx->msaa_config);
                si_mark_atom_dirty(sctx, &sctx->db_render_state);
/* Set sample locations as fragment shader constants. */
                switch (sctx->framebuffer.nr_samples) {
                case 1:
@@ -3727,20 +3768,23 @@ static void *si_create_vertex_elements(struct 
pipe_context *ctx,
                unsigned data_format, num_format;
                int first_non_void;
                unsigned vbo_index = elements[i].vertex_buffer_index;
                unsigned char swizzle[4];
if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
                        FREE(v);
                        return NULL;
                }
+ if (elements[i].instance_divisor)
+                       v->uses_instance_divisors = true;
+
                if (!used[vbo_index]) {
                        v->first_vb_use_mask |= 1 << i;
                        used[vbo_index] = true;
                }
desc = util_format_description(elements[i].src_format);
                first_non_void = 
util_format_get_first_non_void_channel(elements[i].src_format);
                data_format = si_translate_buffer_dataformat(ctx->screen, desc, 
first_non_void);
                num_format = si_translate_buffer_numformat(ctx->screen, desc, 
first_non_void);
                channel = first_non_void >= 0 ? &desc->channel[first_non_void] 
: NULL;
@@ -3840,25 +3884,33 @@ static void *si_create_vertex_elements(struct 
pipe_context *ctx,
                                   S_008F0C_DATA_FORMAT(data_format);
        }
        memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * 
count);
return v;
  }
static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
  {
        struct si_context *sctx = (struct si_context *)ctx;
+       struct si_vertex_element *old = sctx->vertex_elements;
        struct si_vertex_element *v = (struct si_vertex_element*)state;
sctx->vertex_elements = v;
        sctx->vertex_buffers_dirty = true;
-       sctx->do_update_shaders = true;
+
+       if (v &&
+           (!old ||
+            old->count != v->count ||
+            old->uses_instance_divisors != v->uses_instance_divisors ||
+            v->uses_instance_divisors || /* we don't check which divisors 
changed */
+            memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * 
v->count)))
+               sctx->do_update_shaders = true;
  }
static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
  {
        struct si_context *sctx = (struct si_context *)ctx;
if (sctx->vertex_elements == state)
                sctx->vertex_elements = NULL;
        FREE(state);
  }
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 275f830..4da51be 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -102,20 +102,21 @@ struct si_vertex_element
  {
        unsigned                        count;
        unsigned                        first_vb_use_mask;
        /* Vertex buffer descriptor list size aligned for optimal prefetch. */
        unsigned                        desc_list_byte_size;
uint8_t fix_fetch[SI_MAX_ATTRIBS];
        uint32_t                        rsrc_word3[SI_MAX_ATTRIBS];
        uint32_t                        format_size[SI_MAX_ATTRIBS];
        struct pipe_vertex_element      elements[SI_MAX_ATTRIBS];
+       bool                            uses_instance_divisors;
  };
union si_state {
        struct {
                struct si_state_blend           *blend;
                struct si_state_rasterizer      *rasterizer;
                struct si_state_dsa             *dsa;
                struct si_pm4_state             *poly_offset;
                struct si_pm4_state             *ls;
                struct si_pm4_state             *hs;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index c21f855..677a6de 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2307,32 +2307,39 @@ static void si_bind_tes_shader(struct pipe_context 
*ctx, void *state)
        r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
        si_set_active_descriptors_for_shader(sctx, sel);
        si_update_streamout_state(sctx);
        si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant,
                            si_get_vs(sctx)->cso, si_get_vs_state(sctx));
  }
static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
  {
        struct si_context *sctx = (struct si_context *)ctx;
+       struct si_shader_selector *old_sel = sctx->ps_shader.cso;
        struct si_shader_selector *sel = state;
/* skip if supplied shader is one already in use */
-       if (sctx->ps_shader.cso == sel)
+       if (old_sel == sel)
                return;
sctx->ps_shader.cso = sel;
        sctx->ps_shader.current = sel ? sel->first_variant : NULL;
        sctx->do_update_shaders = true;
-       if (sel && sctx->ia_multi_vgt_param_key.u.uses_tess)
-               si_update_tess_uses_prim_id(sctx);
-       si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+
+       if (sel) {
+               if (sctx->ia_multi_vgt_param_key.u.uses_tess)
+                       si_update_tess_uses_prim_id(sctx);
+
+               if (!old_sel ||
+                   old_sel->info.colors_written != sel->info.colors_written)
+                       si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+       }
        si_set_active_descriptors_for_shader(sctx, sel);
  }
static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
  {
        if (shader->is_optimized) {
                
util_queue_drop_job(&sctx->screen->shader_compiler_queue_low_priority,
                                    &shader->optimized_ready);
                util_queue_fence_destroy(&shader->optimized_ready);
        }
@@ -3081,20 +3088,23 @@ static void si_update_vgt_shader_config(struct 
si_context *sctx)
        si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
  }
bool si_update_shaders(struct si_context *sctx)
  {
        struct pipe_context *ctx = (struct pipe_context*)sctx;
        struct si_compiler_ctx_state compiler_state;
        struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
        struct si_shader *old_vs = si_get_vs_state(sctx);
        bool old_clip_disable = old_vs ? old_vs->key.opt.hw_vs.clip_disable : 
false;
+       struct si_shader *old_ps = sctx->ps_shader.current;
+       unsigned old_spi_shader_col_format =
+               old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0;
        int r;
compiler_state.tm = sctx->tm;
        compiler_state.debug = sctx->b.debug;
        compiler_state.is_debug_context = sctx->is_debug;
/* Update stages before GS. */
        if (sctx->tes_shader.cso) {
                if (!sctx->tf_ring) {
                        si_init_tess_factor_ring(sctx);
@@ -3205,21 +3215,25 @@ bool si_update_shaders(struct si_context *sctx)
                        S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != 
PIPE_FUNC_ALWAYS);
if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
                    sctx->sprite_coord_enable != rs->sprite_coord_enable ||
                    sctx->flatshade != rs->flatshade) {
                        sctx->sprite_coord_enable = rs->sprite_coord_enable;
                        sctx->flatshade = rs->flatshade;
                        si_mark_atom_dirty(sctx, &sctx->spi_map);
                }
- if (sctx->screen->b.rbplus_allowed && si_pm4_state_changed(sctx, ps))
+               if (sctx->screen->b.rbplus_allowed &&
+                   si_pm4_state_changed(sctx, ps) &&
+                   (!old_ps ||
+                    old_spi_shader_col_format !=
+                    
sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format))
                        si_mark_atom_dirty(sctx, &sctx->cb_render_state);
if (sctx->ps_db_shader_control != db_shader_control) {
                        sctx->ps_db_shader_control = db_shader_control;
                        si_mark_atom_dirty(sctx, &sctx->db_render_state);
                }
if (sctx->smoothing_enabled != sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing) {
                        sctx->smoothing_enabled = 
sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing;
                        si_mark_atom_dirty(sctx, &sctx->msaa_config);

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to