This should increase performance if constant uploads are done with the CP DMA.
---
 src/gallium/drivers/r600/evergreen_hw_context.c |  21 +---
 src/gallium/drivers/r600/r600_hw_context.c      | 126 +++++++++++++++++++++---
 src/gallium/drivers/r600/r600_pipe.h            |   2 +
 3 files changed, 117 insertions(+), 32 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c 
b/src/gallium/drivers/r600/evergreen_hw_context.c
index 075ab17..c428bc1 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -121,18 +121,8 @@ void evergreen_cp_dma_clear_buffer(struct r600_context 
*rctx,
 
        offset += r600_resource_va(&rctx->screen->screen, dst);
 
-       /* We flush the caches, because we might read from or write
-        * to resources which are bound right now. */
-       rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
-                      R600_CONTEXT_INV_VERTEX_CACHE |
-                      R600_CONTEXT_INV_TEX_CACHE |
-                      R600_CONTEXT_FLUSH_AND_INV |
-                      R600_CONTEXT_FLUSH_AND_INV_CB |
-                      R600_CONTEXT_FLUSH_AND_INV_DB |
-                      R600_CONTEXT_FLUSH_AND_INV_CB_META |
-                      R600_CONTEXT_FLUSH_AND_INV_DB_META |
-                      R600_CONTEXT_STREAMOUT_FLUSH |
-                      R600_CONTEXT_WAIT_3D_IDLE;
+       /* Flush the cache where the resource is bound. */
+       r600_flag_resource_cache_flush(rctx, dst);
 
        while (size) {
                unsigned sync = 0;
@@ -169,10 +159,9 @@ void evergreen_cp_dma_clear_buffer(struct r600_context 
*rctx,
                offset += byte_count;
        }
 
-       /* Invalidate the read caches. */
-       rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
-                      R600_CONTEXT_INV_VERTEX_CACHE |
-                      R600_CONTEXT_INV_TEX_CACHE;
+       /* Flush the cache again in case the 3D engine has been prefetching
+        * the resource. */
+       r600_flag_resource_cache_flush(rctx, dst);
 
        util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
                       offset + size);
diff --git a/src/gallium/drivers/r600/r600_hw_context.c 
b/src/gallium/drivers/r600/r600_hw_context.c
index ff36573..e2444cc 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -626,18 +626,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
        dst_offset += r600_resource_va(&rctx->screen->screen, dst);
        src_offset += r600_resource_va(&rctx->screen->screen, src);
 
-       /* We flush the caches, because we might read from or write
-        * to resources which are bound right now. */
-       rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
-                      R600_CONTEXT_INV_VERTEX_CACHE |
-                      R600_CONTEXT_INV_TEX_CACHE |
-                      R600_CONTEXT_FLUSH_AND_INV |
-                      R600_CONTEXT_FLUSH_AND_INV_CB |
-                      R600_CONTEXT_FLUSH_AND_INV_DB |
-                      R600_CONTEXT_FLUSH_AND_INV_CB_META |
-                      R600_CONTEXT_FLUSH_AND_INV_DB_META |
-                      R600_CONTEXT_STREAMOUT_FLUSH |
-                      R600_CONTEXT_WAIT_3D_IDLE;
+       /* Flush the caches where the resources are bound. */
+       r600_flag_resource_cache_flush(rctx, src);
+       r600_flag_resource_cache_flush(rctx, dst);
 
        /* There are differences between R700 and EG in CP DMA,
         * but we only use the common bits here. */
@@ -679,10 +670,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
                dst_offset += byte_count;
        }
 
-       /* Invalidate the read caches. */
-       rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
-                      R600_CONTEXT_INV_VERTEX_CACHE |
-                      R600_CONTEXT_INV_TEX_CACHE;
+       /* Flush the cache of the dst resource again in case the 3D engine
+        * has been prefetching it. */
+       r600_flag_resource_cache_flush(rctx, dst);
 
        util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
                       dst_offset + size);
@@ -736,3 +726,107 @@ void r600_dma_copy(struct r600_context *rctx,
        util_range_add(&rdst->valid_buffer_range, dst_offset,
                       dst_offset + size);
 }
+
+/* Flag the cache of the resource for it to be flushed later if the resource
+ * is bound. Otherwise do nothing. Used for synchronization between engines.
+ */
+void r600_flag_resource_cache_flush(struct r600_context *rctx,
+                                   struct pipe_resource *res)
+{
+       /* Check vertex buffers. */
+       uint32_t mask = rctx->vertex_buffer_state.enabled_mask;
+       while (mask) {
+               uint32_t i = u_bit_scan(&mask);
+               if (rctx->vertex_buffer_state.vb[i].buffer == res) {
+                       rctx->flags |= R600_CONTEXT_INV_VERTEX_CACHE;
+               }
+       }
+
+       /* Check vertex buffers for compute. */
+       mask = rctx->cs_vertex_buffer_state.enabled_mask;
+       while (mask) {
+               uint32_t i = u_bit_scan(&mask);
+               if (rctx->cs_vertex_buffer_state.vb[i].buffer == res) {
+                       rctx->flags |= R600_CONTEXT_INV_VERTEX_CACHE;
+               }
+       }
+
+       /* Check constant buffers. */
+       unsigned shader;
+       for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) {
+               struct r600_constbuf_state *state = 
&rctx->constbuf_state[shader];
+               uint32_t mask = state->enabled_mask;
+
+               while (mask) {
+                       unsigned i = u_bit_scan(&mask);
+                       if (state->cb[i].buffer == res) {
+                               rctx->flags |= R600_CONTEXT_INV_CONST_CACHE;
+
+                               shader = PIPE_SHADER_TYPES; /* break the outer 
loop */
+                               break;
+                       }
+               }
+       }
+
+       /* Check textures. */
+       for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) {
+               struct r600_samplerview_state *state = 
&rctx->samplers[shader].views;
+               uint32_t mask = state->enabled_mask;
+
+               while (mask) {
+                       uint32_t i = u_bit_scan(&mask);
+                       if (&state->views[i]->tex_resource->b.b == res) {
+                               rctx->flags |= R600_CONTEXT_INV_TEX_CACHE;
+
+                               shader = PIPE_SHADER_TYPES; /* break the outer 
loop */
+                               break;
+                       }
+               }
+       }
+
+       /* Check streamout buffers. */
+       int i;
+       for (i = 0; i < rctx->streamout.num_targets; i++) {
+               if (rctx->streamout.targets[i]->b.buffer == res) {
+                       rctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH |
+                                      R600_CONTEXT_FLUSH_AND_INV |
+                                      R600_CONTEXT_WAIT_3D_IDLE;
+                       break;
+               }
+       }
+
+       /* Check colorbuffers. */
+       for (i = 0; i < rctx->framebuffer.state.nr_cbufs; i++) {
+               if (rctx->framebuffer.state.cbufs[i] &&
+                   rctx->framebuffer.state.cbufs[i]->texture == res) {
+                       struct r600_texture *tex =
+                               (struct 
r600_texture*)rctx->framebuffer.state.cbufs[i]->texture;
+
+                       rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB |
+                                      R600_CONTEXT_FLUSH_AND_INV |
+                                      R600_CONTEXT_WAIT_3D_IDLE;
+
+                       if (tex->cmask_size || tex->fmask_size) {
+                               rctx->flags |= 
R600_CONTEXT_FLUSH_AND_INV_CB_META;
+                       }
+                       break;
+               }
+       }
+
+       /* Check a depth buffer. */
+       if (rctx->framebuffer.state.zsbuf) {
+               if (rctx->framebuffer.state.zsbuf->texture == res) {
+                       rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB |
+                                      R600_CONTEXT_FLUSH_AND_INV |
+                                      R600_CONTEXT_WAIT_3D_IDLE;
+               }
+
+               struct r600_texture *tex =
+                       (struct 
r600_texture*)rctx->framebuffer.state.zsbuf->texture;
+               if (tex && tex->htile && &tex->htile->b.b == res) {
+                       rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB_META |
+                                      R600_CONTEXT_FLUSH_AND_INV |
+                                      R600_CONTEXT_WAIT_3D_IDLE;
+               }
+       }
+}
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index 66ea258..d5f54c4 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -834,6 +834,8 @@ boolean r600_dma_blit(struct pipe_context *ctx,
                        const struct pipe_box *src_box);
 void r600_emit_streamout_begin(struct r600_context *ctx, struct r600_atom 
*atom);
 void r600_emit_streamout_end(struct r600_context *ctx);
+void r600_flag_resource_cache_flush(struct r600_context *rctx,
+                                   struct pipe_resource *res);
 
 /*
  * evergreen_hw_context.c
-- 
1.8.1.2

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to