On 05.06.2017 18:50, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>
---
src/gallium/drivers/radeon/r600_pipe_common.h | 1 +
src/gallium/drivers/radeon/r600_texture.c | 1 +
src/gallium/drivers/radeonsi/si_state.c | 65 ++++++++++++++++++++++-----
3 files changed, 55 insertions(+), 12 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h
b/src/gallium/drivers/radeon/r600_pipe_common.h
index b17b690..6b78a07 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -270,20 +270,21 @@ struct r600_texture {
unsigned num_slow_clears;
/* Counter that should be non-zero if the texture is bound to a
* framebuffer. Implemented in radeonsi only.
*/
uint32_t framebuffers_bound;
};
struct r600_surface {
struct pipe_surface base;
+ enum pipe_format linear_format;
/* These can vary with block-compressed textures. */
unsigned width0;
unsigned height0;
bool color_initialized;
bool depth_initialized;
/* Misc. color flags. */
bool alphatest_bypass;
diff --git a/src/gallium/drivers/radeon/r600_texture.c
b/src/gallium/drivers/radeon/r600_texture.c
index 4d72b86..48ae788 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1943,20 +1943,21 @@ struct pipe_surface *r600_create_surface_custom(struct
pipe_context *pipe,
assert(templ->u.tex.last_layer <= util_max_layer(texture,
templ->u.tex.level));
pipe_reference_init(&surface->base.reference, 1);
pipe_resource_reference(&surface->base.texture, texture);
surface->base.context = pipe;
surface->base.format = templ->format;
surface->base.width = width;
surface->base.height = height;
surface->base.u = templ->u;
+ surface->linear_format = util_format_linear(templ->format);
surface->width0 = width0;
surface->height0 = height0;
surface->dcc_incompatible =
texture->target != PIPE_BUFFER &&
vi_dcc_formats_are_incompatible(texture, templ->u.tex.level,
templ->format);
return &surface->base;
}
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 6cf3559..50a0bd1 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2423,32 +2423,84 @@ static void si_dec_framebuffer_counters(const struct
pipe_framebuffer_state *sta
if (!state->cbufs[i])
continue;
surf = (struct r600_surface*)state->cbufs[i];
rtex = (struct r600_texture*)surf->base.texture;
p_atomic_dec(&rtex->framebuffers_bound);
}
}
+static void si_framebuffer_cache_flush(struct si_context *sctx)
+{
+ /* Only flush vL1 and L2 when changing the framebuffer state, because
+ * the only client not using TC that can change textures is
+ * the framebuffer.
+ *
+ * Flush all CB and DB caches here because all buffers can be used
+ * for write by both TC (with shader image stores) and CB/DB.
+ */
+ sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+ SI_CONTEXT_INV_GLOBAL_L2 |
This one bit shouldn't be needed on gfx9, right? Maybe for a separate patch.
+ SI_CONTEXT_FLUSH_AND_INV_CB |
+ SI_CONTEXT_FLUSH_AND_INV_DB |
+ SI_CONTEXT_CS_PARTIAL_FLUSH;
+}
+
static void si_set_framebuffer_state(struct pipe_context *ctx,
const struct pipe_framebuffer_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
struct pipe_constant_buffer constbuf = {0};
struct r600_surface *surf = NULL;
struct r600_texture *rtex;
bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
unsigned old_nr_samples = sctx->framebuffer.nr_samples;
bool unbound = false;
int i;
+ /* Fast path for linear <-> sRGB changes and no-op changes. */
Are linear <-> sRGB changes really that common to deserve this special
treatment? Especially considering that IMO it makes the function as a
whole quite a bit more difficult to follow.
Perhaps we could instead use the initial comparison loop to also speed
up cases where only a subset of the cbufs changes? Or the cbufs are
unchanged but zsbuf is changed? Although I wouldn't be surprised if
those cases are even less common. Hmm...
If anything, in case you do want to keep the optimization as-is, could
you please move it to a separate function?
+ if (state->nr_cbufs &&
+ state->nr_cbufs == sctx->framebuffer.state.nr_cbufs &&
+ state->zsbuf == sctx->framebuffer.state.zsbuf) {
+ struct pipe_surface **cbufs = sctx->framebuffer.state.cbufs;
+
+ /* See if the surfaces are equivalent. */
+ for (i = 0; i < state->nr_cbufs; i++) {
+ if (!!state->cbufs[i] != !!cbufs[i] ||
+ state->cbufs[i]->texture != cbufs[i]->texture ||
+ memcmp(&state->cbufs[i]->u.tex, &cbufs[i]->u.tex,
+ sizeof(cbufs[i]->u.tex)) ||
+ ((struct
r600_surface*)state->cbufs[i])->linear_format !=
+ ((struct r600_surface*)cbufs[i])->linear_format)
+ break;
+ }
+ if (i == state->nr_cbufs) {
+ /* We need to make sure the surfaces are initialized. */
+ for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
+ surf = (struct r600_surface*)state->cbufs[i];
+ if (!surf->color_initialized)
+ si_initialize_color_surface(sctx, surf);
+ }
+
+ /* Decompression passes may need a cache flush. */
+ if (sctx->decompression_enabled)
+ si_framebuffer_cache_flush(sctx);
+
+ /* Just re-emit the FB state and get out. */
+ util_copy_framebuffer_state(&sctx->framebuffer.state,
state);
+ sctx->framebuffer.dirty_cbufs |= (1 << state->nr_cbufs)
- 1;
+ si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
+ return;
+ }
+ }
I don't know if it's just my mail client, but the indentation of the
last brace looks wrong.
Cheers,
Nicolai
+
for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
if (!sctx->framebuffer.state.cbufs[i])
continue;
rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
if (rtex->dcc_gather_statistics)
vi_separate_dcc_stop_query(ctx, rtex);
}
/* Disable DCC if the formats are incompatible. */
@@ -2472,32 +2524,21 @@ static void si_set_framebuffer_state(struct
pipe_context *ctx,
unbound = true;
}
if (vi_dcc_enabled(rtex, surf->base.u.tex.level))
if (!r600_texture_disable_dcc(&sctx->b, rtex))
sctx->b.decompress_dcc(ctx, rtex);
surf->dcc_incompatible = false;
}
- /* Only flush TC when changing the framebuffer state, because
- * the only client not using TC that can change textures is
- * the framebuffer.
- *
- * Flush all CB and DB caches here because all buffers can be used
- * for write by both TC (with shader image stores) and CB/DB.
- */
- sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
- SI_CONTEXT_INV_GLOBAL_L2 |
- SI_CONTEXT_FLUSH_AND_INV_CB |
- SI_CONTEXT_FLUSH_AND_INV_DB |
- SI_CONTEXT_CS_PARTIAL_FLUSH;
+ si_framebuffer_cache_flush(sctx);
/* Take the maximum of the old and new count. If the new count is lower,
* dirtying is needed to disable the unbound colorbuffers.
*/
sctx->framebuffer.dirty_cbufs |=
(1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs))
- 1;
sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf !=
state->zsbuf;
si_dec_framebuffer_counters(&sctx->framebuffer.state);
util_copy_framebuffer_state(&sctx->framebuffer.state, state);
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev