On 10.06.2012 04:07, Vadim Girlin wrote:
Shader variants are stored in the list, the key for lookup is based on the
states that require different hw shaders - currently it's rctx->two_side (all
gpus) and rctx->nr_cbufs (evergreen/cayman, when writes_all property is set).

  - use simple list instead of keymap as suggested by Marek on irc
  - call r600_adjust_gprs from r600_bind_vs_shader for r6xx/r7xx
    (r600_shader_select isn't used for vertex shaders currently)

Improves performance for some apps, e.g. FlightGear -
see https://bugs.freedesktop.org/show_bug.cgi?id=50360

Signed-off-by: Vadim Girlin<vadimgir...@gmail.com>
Mhm, I really start wondering if it might not be easier to avoid having different shader variants by using CF_COND_BOOL/CF_COND_NOT_BOOL for those two special cases, e.g. build the shader in a way that it can handle both variants and then select the one we currently want with the CF bool constants.

If the shader overhead for it is to much we might also try using this implementation only if the application really starts using those features in question.


  src/gallium/drivers/r600/evergreen_state.c   |    4 +-
  src/gallium/drivers/r600/r600_pipe.h         |   38 ++++-
  src/gallium/drivers/r600/r600_shader.c       |   19 +--
  src/gallium/drivers/r600/r600_state.c        |   23 +--
  src/gallium/drivers/r600/r600_state_common.c |  228 ++++++++++++++++++--------
  5 files changed, 215 insertions(+), 97 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
index b618ca8..2bc1f67 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1843,12 +1843,12 @@ void evergreen_init_state_functions(struct r600_context 

        rctx->context.create_blend_state = evergreen_create_blend_state;
        rctx->context.create_depth_stencil_alpha_state = 
-       rctx->context.create_fs_state = r600_create_shader_state;
+       rctx->context.create_fs_state = r600_create_shader_state_ps;
        rctx->context.create_rasterizer_state = evergreen_create_rs_state;
        rctx->context.create_sampler_state = evergreen_create_sampler_state;
        rctx->context.create_sampler_view = evergreen_create_sampler_view;
        rctx->context.create_vertex_elements_state = 
-       rctx->context.create_vs_state = r600_create_shader_state;
+       rctx->context.create_vs_state = r600_create_shader_state_vs;
        rctx->context.bind_blend_state = r600_bind_blend_state;
        rctx->context.bind_depth_stencil_alpha_state = r600_bind_dsa_state;
        rctx->context.bind_fragment_sampler_states = evergreen_bind_ps_sampler;
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
index f2865d2..e3ac631 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -184,18 +184,38 @@ struct r600_vertex_element
        struct r600_pipe_state          rstate;

+struct r600_pipe_shader;
+struct r600_pipe_shader_selector {
+       struct r600_pipe_shader *current;
+       struct tgsi_token       *tokens;
+       struct pipe_stream_output_info  so;
+       unsigned        num_shaders;
+       unsigned        type;
+       /* 1 on evergreen+ when the shader contains
+        * TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, otherwise it's 0.
+        * Used to determine whether we need to include nr_cbufs in the key */
+       unsigned        eg_fs_write_all;
  struct r600_pipe_shader {
+       struct r600_pipe_shader_selector *selector;
+       struct r600_pipe_shader *next_variant;
        struct r600_shader              shader;
        struct r600_pipe_state          rstate;
        struct r600_resource            *bo;
        struct r600_resource            *bo_fetch;
        struct r600_vertex_element      vertex_elements;
-       struct tgsi_token               *tokens;
        unsigned        sprite_coord_enable;
        unsigned        flatshade;
        unsigned        pa_cl_vs_out_cntl;
-       unsigned        ps_cb_shader_mask;
-       struct pipe_stream_output_info  so;
+       unsigned        ps_cb_shader_mask;
+       unsigned        key;

  struct r600_pipe_sampler_state {
@@ -271,8 +291,8 @@ struct r600_context {
        struct pipe_stencil_ref         stencil_ref;
        struct pipe_viewport_state      viewport;
        struct pipe_clip_state          clip;
-       struct r600_pipe_shader         *ps_shader;
-       struct r600_pipe_shader         *vs_shader;
+       struct r600_pipe_shader_selector        *ps_shader;
+       struct r600_pipe_shader_selector        *vs_shader;
        struct r600_pipe_compute        *cs_shader;
        struct r600_pipe_rasterizer     *rasterizer;
        struct r600_pipe_state          vgt;
@@ -436,8 +456,6 @@ int r600_compute_shader_create(struct pipe_context * ctx,
        LLVMModuleRef mod,  struct r600_bytecode * bytecode);
  void r600_pipe_shader_destroy(struct pipe_context *ctx, struct 
r600_pipe_shader *shader);
-int r600_find_vs_semantic_index(struct r600_shader *vs,
-                               struct r600_shader *ps, int id);

  /* r600_state.c */
  void r600_set_scissor_state(struct r600_context *rctx,
@@ -497,8 +515,10 @@ void r600_sampler_view_destroy(struct pipe_context *ctx,
                               struct pipe_sampler_view *state);
  void r600_delete_state(struct pipe_context *ctx, void *state);
  void r600_bind_vertex_elements(struct pipe_context *ctx, void *state);
-void *r600_create_shader_state(struct pipe_context *ctx,
-                              const struct pipe_shader_state *state);
+void *r600_create_shader_state_ps(struct pipe_context *ctx,
+                   const struct pipe_shader_state *state);
+void *r600_create_shader_state_vs(struct pipe_context *ctx,
+                   const struct pipe_shader_state *state);
  void r600_bind_ps_shader(struct pipe_context *ctx, void *state);
  void r600_bind_vs_shader(struct pipe_context *ctx, void *state);
  void r600_delete_ps_shader(struct pipe_context *ctx, void *state);
diff --git a/src/gallium/drivers/r600/r600_shader.c 
index 63b9a03..8eb32da 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -109,6 +109,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, 
struct r600_pipe_shader *s
        static int dump_shaders = -1;
        struct r600_context *rctx = (struct r600_context *)ctx;
+       struct r600_pipe_shader_selector *sel = shader->selector;
        int r;

        /* Would like some magic "get_bool_option_once" routine.
@@ -118,16 +119,16 @@ int r600_pipe_shader_create(struct pipe_context *ctx, 
struct r600_pipe_shader *s

        if (dump_shaders) {
-               tgsi_dump(shader->tokens, 0);
+               tgsi_dump(sel->tokens, 0);

-               if (shader->so.num_outputs) {
+               if (sel->so.num_outputs) {
                        unsigned i;
                        fprintf(stderr, "STREAMOUT\n");
-                       for (i = 0; i<  shader->so.num_outputs; i++) {
-                               unsigned mask = ((1<<  
shader->so.output[i].num_components) - 1)<<
+                       for (i = 0; i<  sel->so.num_outputs; i++) {
+                               unsigned mask = ((1<<  
sel->so.output[i].num_components) - 1)<<
                                fprintf(stderr, "  %i: MEM_STREAM0_BUF%i 
OUT[%i].%s%s%s%s\n", i,
-                                       shader->so.output[i].output_buffer, 
+                                       sel->so.output[i].output_buffer, 
                                        mask&  1 ? "x" : "_",
                                        (mask>>  1)&  1 ? "y" : "_",
                                        (mask>>  2)&  1 ? "z" : "_",
@@ -156,8 +157,6 @@ void r600_pipe_shader_destroy(struct pipe_context *ctx, 
struct r600_pipe_shader
        pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
-       memset(&shader->shader,0,sizeof(struct r600_shader));

@@ -1118,8 +1117,8 @@ static int process_twoside_color_inputs(struct 
r600_shader_ctx *ctx)
  static int r600_shader_from_tgsi(struct r600_context * rctx, struct 
r600_pipe_shader *pipeshader)
        struct r600_shader *shader =&pipeshader->shader;
-       struct tgsi_token *tokens = pipeshader->tokens;
-       struct pipe_stream_output_info so = pipeshader->so;
+       struct tgsi_token *tokens = pipeshader->selector->tokens;
+       struct pipe_stream_output_info so = pipeshader->selector->so;
        struct tgsi_full_immediate *immediate;
        struct tgsi_full_property *property;
        struct r600_shader_ctx ctx;
diff --git a/src/gallium/drivers/r600/r600_state.c 
index 124eba2..d94c16b 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1806,12 +1806,12 @@ void r600_init_state_functions(struct r600_context 

        rctx->context.create_blend_state = r600_create_blend_state;
        rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state;
-       rctx->context.create_fs_state = r600_create_shader_state;
+       rctx->context.create_fs_state = r600_create_shader_state_ps;
        rctx->context.create_rasterizer_state = r600_create_rs_state;
        rctx->context.create_sampler_state = r600_create_sampler_state;
        rctx->context.create_sampler_view = r600_create_sampler_view;
        rctx->context.create_vertex_elements_state = 
-       rctx->context.create_vs_state = r600_create_shader_state;
+       rctx->context.create_vs_state = r600_create_shader_state_vs;
        rctx->context.bind_blend_state = r600_bind_blend_state;
        rctx->context.bind_depth_stencil_alpha_state = r600_bind_dsa_state;
        rctx->context.bind_fragment_sampler_states = r600_bind_ps_samplers;
@@ -1847,6 +1847,7 @@ void r600_init_state_functions(struct r600_context *rctx)
        rctx->context.set_stream_output_targets = r600_set_so_targets;

+/* Adjust GPR allocation on R6xx/R7xx */
  void r600_adjust_gprs(struct r600_context *rctx)
        struct r600_pipe_state rstate;
@@ -1855,22 +1856,22 @@ void r600_adjust_gprs(struct r600_context *rctx)
        unsigned tmp;
        int diff;

-       if (rctx->chip_class>= EVERGREEN)
-               return;
-       if (!rctx->ps_shader || !rctx->vs_shader)
-               return;
+       /* XXX: Following call moved from r600_bind_[ps|vs]_shader,
+        * it seems eg+ doesn't need it, r6xx/7xx probably need it only for
+        * adjusting the GPR allocation?
+        * Do we need this if we aren't really changing config below? */
+       r600_inval_shader_cache(rctx);

-       if (rctx->ps_shader->shader.bc.ngpr>  rctx->default_ps_gprs)
+       if (rctx->ps_shader->current->shader.bc.ngpr>  rctx->default_ps_gprs)
-               diff = rctx->ps_shader->shader.bc.ngpr - rctx->default_ps_gprs;
+               diff = rctx->ps_shader->current->shader.bc.ngpr - 
                num_vs_gprs -= diff;
                num_ps_gprs += diff;

-       if (rctx->vs_shader->shader.bc.ngpr>  rctx->default_vs_gprs)
+       if (rctx->vs_shader->current->shader.bc.ngpr>  rctx->default_vs_gprs)
-               diff = rctx->vs_shader->shader.bc.ngpr - rctx->default_vs_gprs;
+               diff = rctx->vs_shader->current->shader.bc.ngpr - 
                num_ps_gprs -= diff;
                num_vs_gprs += diff;
diff --git a/src/gallium/drivers/r600/r600_state_common.c 
index 00e1bd0..4473ac3 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -429,83 +429,196 @@ void *r600_create_vertex_elements(struct pipe_context 
        return v;

-void *r600_create_shader_state(struct pipe_context *ctx,
-                              const struct pipe_shader_state *state)
+/* Compute the key for the hw shader variant */
+static INLINE unsigned r600_shader_selector_key(struct pipe_context * ctx,
+               struct r600_pipe_shader_selector * sel)
-       struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader);
-       int r;
+       struct r600_context *rctx = (struct r600_context *)ctx;
+       unsigned key;

-       shader->tokens = tgsi_dup_tokens(state->tokens);
-       shader->so = state->stream_output;
+       if (sel->type == PIPE_SHADER_FRAGMENT) {
+               key = rctx->two_side;
+               if (sel->eg_fs_write_all)
+                       key |= rctx->nr_cbufs<<  1;
+       } else
+               key = 0;

-       r =  r600_pipe_shader_create(ctx, shader);
-       if (r) {
-               return NULL;
-       }
-       return shader;
+       return key;

-void r600_bind_ps_shader(struct pipe_context *ctx, void *state)
+/* Select the hw shader variant depending on the current state.
+ * (*dirty) is set to 1 if current variant was changed */
+static int r600_shader_select(struct pipe_context *ctx,
+        struct r600_pipe_shader_selector* sel,
+        unsigned *dirty)
+       unsigned key;
        struct r600_context *rctx = (struct r600_context *)ctx;
+       struct r600_pipe_shader * shader = NULL;
+       int r;

-       if (!state) {
-               state = rctx->dummy_pixel_shader;
+       key = r600_shader_selector_key(ctx, sel);
+       /* Check if we don't need to change anything.
+        * This path is also used for most shaders that don't need multiple
+        * variants, it will cost just a computation of the key and this
+        * test. */
+       if (likely(sel->current&&  sel->current->key == key)) {
+               return 0;

-       rctx->ps_shader = (struct r600_pipe_shader *)state;
+       /* lookup if we have other variants in the list */
+       if (sel->num_shaders>  1) {
+               struct r600_pipe_shader *p = sel->current, *c = p->next_variant;

-       r600_inval_shader_cache(rctx);
-       r600_context_pipe_state_set(rctx,&rctx->ps_shader->rstate);
+               while (c&&  c->key != key) {
+                       p = c;
+                       c = c->next_variant;
+               }

-       rctx->cb_color_control&= C_028808_MULTIWRITE_ENABLE;
-       rctx->cb_color_control |= 
+               if (c) {
+                       p->next_variant = c->next_variant;
+                       shader = c;
+               }
+       }
+       if (unlikely(!shader)) {
+               shader = CALLOC(1, sizeof(struct r600_pipe_shader));
+               shader->selector = sel;
+               r = r600_pipe_shader_create(ctx, shader);
+               if (unlikely(r)) {
+                       R600_ERR("Failed to build shader variant (type=%u, key=%u) 
+                                       sel->type, key, r);
+                       sel->current = NULL;
+                       return r;
+               }

-       if (rctx->ps_shader&&  rctx->vs_shader) {
+               /* We don't know the value of eg_fs_write_all property until we 
+                * at least one variant, so we may need to recompute the key 
+                * rctx->nr_cbufs) after building first variant. */
+               if (sel->type == PIPE_SHADER_FRAGMENT&&
+                               sel->num_shaders == 0&&
+                               rctx->chip_class>= EVERGREEN&&
+                               shader->shader.fs_write_all) {
+                       sel->eg_fs_write_all = 1;
+                       key = r600_shader_selector_key(ctx, sel);
+               }
+               shader->key = key;
+               sel->num_shaders++;
+       }
+       if (dirty)
+               *dirty = 1;
+       /* Moved from r600_bind_ps_shader, different shader variants
+        * may use different number of GPRs, so we need to update it. */
+       /* FIXME: we never did it after rebuilding the shaders, is it required? 
+       if (rctx->chip_class<  EVERGREEN&&  rctx->ps_shader&&  rctx->vs_shader) 
+       shader->next_variant = sel->current;
+       sel->current = shader;
+       return 0;
+static void *r600_create_shader_state(struct pipe_context *ctx,
+                              const struct pipe_shader_state *state,
+                              unsigned pipe_shader_type)
+       struct r600_pipe_shader_selector *sel = 
+       int r;
+       sel->type = pipe_shader_type;
+       sel->tokens = tgsi_dup_tokens(state->tokens);
+       sel->so = state->stream_output;
+       r = r600_shader_select(ctx, sel, NULL);
+       if (r)
+           return NULL;
+       return sel;
+void *r600_create_shader_state_ps(struct pipe_context *ctx,
+               const struct pipe_shader_state *state)
+       return r600_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT);
+void *r600_create_shader_state_vs(struct pipe_context *ctx,
+               const struct pipe_shader_state *state)
+       return r600_create_shader_state(ctx, state, PIPE_SHADER_VERTEX);
+void r600_bind_ps_shader(struct pipe_context *ctx, void *state)
+       struct r600_context *rctx = (struct r600_context *)ctx;
+       if (!state)
+               state = rctx->dummy_pixel_shader;
+       rctx->ps_shader = (struct r600_pipe_shader_selector *)state;
+       r600_context_pipe_state_set(rctx,&rctx->ps_shader->current->rstate);
+       rctx->cb_color_control&= C_028808_MULTIWRITE_ENABLE;
+       rctx->cb_color_control |= 

  void r600_bind_vs_shader(struct pipe_context *ctx, void *state)
        struct r600_context *rctx = (struct r600_context *)ctx;

-       rctx->vs_shader = (struct r600_pipe_shader *)state;
+       rctx->vs_shader = (struct r600_pipe_shader_selector *)state;
        if (state) {
-               r600_inval_shader_cache(rctx);
-               r600_context_pipe_state_set(rctx,&rctx->vs_shader->rstate);
+               if (rctx->chip_class<  EVERGREEN&&  rctx->ps_shader)
+                       r600_adjust_gprs(rctx);
-       if (rctx->ps_shader&&  rctx->vs_shader) {
-               r600_adjust_gprs(rctx);
+static void r600_delete_shader_selector(struct pipe_context *ctx,
+               struct r600_pipe_shader_selector *sel)
+       struct r600_pipe_shader *p = sel->current, *c;
+       while (p) {
+               c = p->next_variant;
+               r600_pipe_shader_destroy(ctx, p);
+               free(p);
+               p = c;
+       free(sel->tokens);
+       free(sel);

  void r600_delete_ps_shader(struct pipe_context *ctx, void *state)
        struct r600_context *rctx = (struct r600_context *)ctx;
-       struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state;
+       struct r600_pipe_shader_selector *sel = (struct 
r600_pipe_shader_selector *)state;

-       if (rctx->ps_shader == shader) {
+       if (rctx->ps_shader == sel) {
                rctx->ps_shader = NULL;

-       free(shader->tokens);
-       r600_pipe_shader_destroy(ctx, shader);
-       free(shader);
+       r600_delete_shader_selector(ctx, sel);

  void r600_delete_vs_shader(struct pipe_context *ctx, void *state)
        struct r600_context *rctx = (struct r600_context *)ctx;
-       struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state;
+       struct r600_pipe_shader_selector *sel = (struct 
r600_pipe_shader_selector *)state;

-       if (rctx->vs_shader == shader) {
+       if (rctx->vs_shader == sel) {
                rctx->vs_shader = NULL;

-       free(shader->tokens);
-       r600_pipe_shader_destroy(ctx, shader);
-       free(shader);
+       r600_delete_shader_selector(ctx, sel);

  static void r600_update_alpha_ref(struct r600_context *rctx)
@@ -661,24 +774,10 @@ void r600_set_so_targets(struct pipe_context *ctx,
        rctx->streamout_append_bitmask = append_bitmask;

-static int r600_shader_rebuild(struct pipe_context * ctx, struct 
r600_pipe_shader * shader)
-       struct r600_context *rctx = (struct r600_context *)ctx;
-       int r;
-       r600_pipe_shader_destroy(ctx, shader);
-       r = r600_pipe_shader_create(ctx, shader);
-       if (r) {
-               return r;
-       }
-       r600_context_pipe_state_set(rctx,&shader->rstate);
-       return 0;
  static void r600_update_derived_state(struct r600_context *rctx)
        struct pipe_context * ctx = (struct pipe_context*)rctx;
+       unsigned ps_dirty = 0;

        if (!rctx->blitter->running) {
                if (rctx->have_depth_fb || rctx->have_depth_texture)
@@ -689,30 +788,29 @@ static void r600_update_derived_state(struct r600_context 

-       if ((rctx->ps_shader->shader.two_side != rctx->two_side) ||
-           ((rctx->chip_class>= EVERGREEN)&&  
-            (rctx->ps_shader->shader.nr_cbufs != rctx->nr_cbufs))) {
-               r600_shader_rebuild(&rctx->context, rctx->ps_shader);
-       }
+       r600_shader_select(ctx, rctx->ps_shader,&ps_dirty);

        if (rctx->alpha_ref_dirty) {

        if (rctx->ps_shader&&  ((rctx->sprite_coord_enable&&
-               (rctx->ps_shader->sprite_coord_enable != 
rctx->sprite_coord_enable)) ||
-               (rctx->rasterizer&&  rctx->rasterizer->flatshade != 
rctx->ps_shader->flatshade))) {
+               (rctx->ps_shader->current->sprite_coord_enable != 
rctx->sprite_coord_enable)) ||
+               (rctx->rasterizer&&  rctx->rasterizer->flatshade != 
rctx->ps_shader->current->flatshade))) {

                if (rctx->chip_class>= EVERGREEN)
-                       evergreen_pipe_shader_ps(ctx, rctx->ps_shader);
+                       evergreen_pipe_shader_ps(ctx, rctx->ps_shader->current);
-                       r600_pipe_shader_ps(ctx, rctx->ps_shader);
+                       r600_pipe_shader_ps(ctx, rctx->ps_shader->current);

-               r600_context_pipe_state_set(rctx,&rctx->ps_shader->rstate);
+               ps_dirty = 1;

+       if (ps_dirty)
        if (rctx->dual_src_blend)
-               rctx->cb_shader_mask = rctx->ps_shader->ps_cb_shader_mask | 
+               rctx->cb_shader_mask = rctx->ps_shader->current->ps_cb_shader_mask 
| rctx->fb_cb_shader_mask;
                rctx->cb_shader_mask = rctx->fb_cb_shader_mask;
@@ -827,12 +925,12 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *dinfo)
        if (rctx->chip_class<= R700)
                r600_pipe_state_mod_reg(&rctx->vgt, rctx->cb_color_control);
-                               rctx->vs_shader->pa_cl_vs_out_cntl |
-                               (rctx->rasterizer->clip_plane_enable&  
+                               rctx->vs_shader->current->pa_cl_vs_out_cntl |
+                               (rctx->rasterizer->clip_plane_enable&  
                                rctx->pa_cl_clip_cntl |
-                               (rctx->vs_shader->shader.clip_dist_write ||
-                                rctx->vs_shader->shader.vs_prohibit_ucps ?
(rctx->vs_shader->current->shader.clip_dist_write ||
rctx->vs_shader->current->shader.vs_prohibit_ucps ?
                                 0 : rctx->rasterizer->clip_plane_enable&  


mesa-dev mailing list

Reply via email to