On 05.06.2017 18:50, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

---
  src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
  src/gallium/drivers/radeon/r600_texture.c     |  1 +
  src/gallium/drivers/radeonsi/si_state.c       | 65 ++++++++++++++++++++++-----
  3 files changed, 55 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index b17b690..6b78a07 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -270,20 +270,21 @@ struct r600_texture {
        unsigned                        num_slow_clears;
/* Counter that should be non-zero if the texture is bound to a
         * framebuffer. Implemented in radeonsi only.
         */
        uint32_t                        framebuffers_bound;
  };
struct r600_surface {
        struct pipe_surface             base;
+       enum pipe_format                linear_format;
/* These can vary with block-compressed textures. */
        unsigned width0;
        unsigned height0;
bool color_initialized;
        bool depth_initialized;
/* Misc. color flags. */
        bool alphatest_bypass;
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 4d72b86..48ae788 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1943,20 +1943,21 @@ struct pipe_surface *r600_create_surface_custom(struct 
pipe_context *pipe,
        assert(templ->u.tex.last_layer <= util_max_layer(texture, 
templ->u.tex.level));
pipe_reference_init(&surface->base.reference, 1);
        pipe_resource_reference(&surface->base.texture, texture);
        surface->base.context = pipe;
        surface->base.format = templ->format;
        surface->base.width = width;
        surface->base.height = height;
        surface->base.u = templ->u;
+ surface->linear_format = util_format_linear(templ->format);
        surface->width0 = width0;
        surface->height0 = height0;
surface->dcc_incompatible =
                texture->target != PIPE_BUFFER &&
                vi_dcc_formats_are_incompatible(texture, templ->u.tex.level,
                                                templ->format);
        return &surface->base;
  }
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 6cf3559..50a0bd1 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2423,32 +2423,84 @@ static void si_dec_framebuffer_counters(const struct 
pipe_framebuffer_state *sta
if (!state->cbufs[i])
                        continue;
                surf = (struct r600_surface*)state->cbufs[i];
                rtex = (struct r600_texture*)surf->base.texture;
p_atomic_dec(&rtex->framebuffers_bound);
        }
  }
+static void si_framebuffer_cache_flush(struct si_context *sctx)
+{
+       /* Only flush vL1 and L2 when changing the framebuffer state, because
+        * the only client not using TC that can change textures is
+        * the framebuffer.
+        *
+        * Flush all CB and DB caches here because all buffers can be used
+        * for write by both TC (with shader image stores) and CB/DB.
+        */
+       sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+                        SI_CONTEXT_INV_GLOBAL_L2 |

This one bit shouldn't be needed on gfx9, right? Maybe for a separate patch.



+                        SI_CONTEXT_FLUSH_AND_INV_CB |
+                        SI_CONTEXT_FLUSH_AND_INV_DB |
+                        SI_CONTEXT_CS_PARTIAL_FLUSH;
+}
+
  static void si_set_framebuffer_state(struct pipe_context *ctx,
                                     const struct pipe_framebuffer_state *state)
  {
        struct si_context *sctx = (struct si_context *)ctx;
        struct pipe_constant_buffer constbuf = {0};
        struct r600_surface *surf = NULL;
        struct r600_texture *rtex;
        bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
        unsigned old_nr_samples = sctx->framebuffer.nr_samples;
        bool unbound = false;
        int i;
+ /* Fast path for linear <-> sRGB changes and no-op changes. */

Are linear <-> sRGB changes really that common to deserve this special treatment? Especially considering that IMO it makes the function as a whole quite a bit more difficult to follow.

Perhaps we could instead use the initial comparison loop to also speed up cases where only a subset of the cbufs changes? Or the cbufs are unchanged but zsbuf is changed? Although I wouldn't be surprised if those cases are even less common. Hmm...

If anything, in case you do want to keep the optimization as-is, could you please move it to a separate function?


+       if (state->nr_cbufs &&
+           state->nr_cbufs == sctx->framebuffer.state.nr_cbufs &&
+           state->zsbuf == sctx->framebuffer.state.zsbuf) {
+               struct pipe_surface **cbufs = sctx->framebuffer.state.cbufs;
+
+               /* See if the surfaces are equivalent. */
+               for (i = 0; i < state->nr_cbufs; i++) {
+                       if (!!state->cbufs[i] != !!cbufs[i] ||
+                           state->cbufs[i]->texture != cbufs[i]->texture ||
+                           memcmp(&state->cbufs[i]->u.tex, &cbufs[i]->u.tex,
+                                  sizeof(cbufs[i]->u.tex)) ||
+                           ((struct 
r600_surface*)state->cbufs[i])->linear_format !=
+                           ((struct r600_surface*)cbufs[i])->linear_format)
+                               break;
+               }
+               if (i == state->nr_cbufs) {
+                       /* We need to make sure the surfaces are initialized. */
+                       for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
+                               surf = (struct r600_surface*)state->cbufs[i];
+                               if (!surf->color_initialized)
+                                       si_initialize_color_surface(sctx, surf);
+                       }
+
+                       /* Decompression passes may need a cache flush. */
+                       if (sctx->decompression_enabled)
+                               si_framebuffer_cache_flush(sctx);
+
+                       /* Just re-emit the FB state and get out. */
+                       util_copy_framebuffer_state(&sctx->framebuffer.state, 
state);
+                       sctx->framebuffer.dirty_cbufs |= (1 << state->nr_cbufs) 
- 1;
+                       si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
+                       return;
+               }
+        }

I don't know if it's just my mail client, but the indentation of the last brace looks wrong.

Cheers,
Nicolai


+
        for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
                if (!sctx->framebuffer.state.cbufs[i])
                        continue;
rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
                if (rtex->dcc_gather_statistics)
                        vi_separate_dcc_stop_query(ctx, rtex);
        }
/* Disable DCC if the formats are incompatible. */
@@ -2472,32 +2524,21 @@ static void si_set_framebuffer_state(struct 
pipe_context *ctx,
                        unbound = true;
                }
if (vi_dcc_enabled(rtex, surf->base.u.tex.level))
                        if (!r600_texture_disable_dcc(&sctx->b, rtex))
                                sctx->b.decompress_dcc(ctx, rtex);
surf->dcc_incompatible = false;
        }
- /* Only flush TC when changing the framebuffer state, because
-        * the only client not using TC that can change textures is
-        * the framebuffer.
-        *
-        * Flush all CB and DB caches here because all buffers can be used
-        * for write by both TC (with shader image stores) and CB/DB.
-        */
-       sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
-                        SI_CONTEXT_INV_GLOBAL_L2 |
-                        SI_CONTEXT_FLUSH_AND_INV_CB |
-                        SI_CONTEXT_FLUSH_AND_INV_DB |
-                        SI_CONTEXT_CS_PARTIAL_FLUSH;
+       si_framebuffer_cache_flush(sctx);
/* Take the maximum of the old and new count. If the new count is lower,
         * dirtying is needed to disable the unbound colorbuffers.
         */
        sctx->framebuffer.dirty_cbufs |=
                (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) 
- 1;
        sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != 
state->zsbuf;
si_dec_framebuffer_counters(&sctx->framebuffer.state);
        util_copy_framebuffer_state(&sctx->framebuffer.state, state);



--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to