From: Marek Olšák <marek.ol...@amd.com> There is a kernel patch that adds the new flag. --- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 36 ++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 10 deletions(-)
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index a3feeb93026..eb050b8fdb2 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -26,20 +26,24 @@ * of the Software. */ #include "amdgpu_cs.h" #include "util/os_time.h" #include <inttypes.h> #include <stdio.h> #include "amd/common/sid.h" +#ifndef AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE +#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3) +#endif + DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false) /* FENCES */ static struct pipe_fence_handle * amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type, unsigned ip_instance, unsigned ring) { struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence); @@ -801,56 +805,68 @@ static void amdgpu_set_ib_size(struct amdgpu_ib *ib) } static void amdgpu_ib_finalize(struct amdgpu_winsys *ws, struct amdgpu_ib *ib) { amdgpu_set_ib_size(ib); ib->used_ib_space += ib->base.current.cdw * 4; ib->used_ib_space = align(ib->used_ib_space, ws->info.ib_start_alignment); ib->max_ib_size = MAX2(ib->max_ib_size, ib->base.prev_dw + ib->base.current.cdw); } -static bool amdgpu_init_cs_context(struct amdgpu_cs_context *cs, +static bool amdgpu_init_cs_context(struct amdgpu_winsys *ws, + struct amdgpu_cs_context *cs, enum ring_type ring_type) { switch (ring_type) { case RING_DMA: cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_DMA; break; case RING_UVD: cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD; break; case RING_UVD_ENC: cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD_ENC; break; case RING_VCE: cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCE; break; - case RING_COMPUTE: - cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_COMPUTE; - break; - case RING_VCN_DEC: cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_DEC; break; - case RING_VCN_ENC: + case RING_VCN_ENC: cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_ENC; break; - default: + case RING_COMPUTE: case RING_GFX: - cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_GFX; + cs->ib[IB_MAIN].ip_type = ring_type == RING_GFX ? AMDGPU_HW_IP_GFX : + AMDGPU_HW_IP_COMPUTE; + + /* The kernel shouldn't invalidate L2 and vL1. The proper place for cache + * invalidation is the beginning of IBs (the previous commit does that), + * because completion of an IB doesn't care about the state of GPU caches, + * but the beginning of an IB does. Draw calls from multiple IBs can be + * executed in parallel, so draw calls from the current IB can finish after + * the next IB starts drawing, and so the cache flush at the end of IB + * is always late. + */ + if (ws->info.drm_minor >= 26) + cs->ib[IB_MAIN].flags = AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE; break; + + default: + assert(0); } memset(cs->buffer_indices_hashlist, -1, sizeof(cs->buffer_indices_hashlist)); cs->last_added_bo = NULL; return true; } static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs) { unsigned i; @@ -918,26 +934,26 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx, cs->flush_data = flush_ctx; cs->ring_type = ring_type; struct amdgpu_cs_fence_info fence_info; fence_info.handle = cs->ctx->user_fence_bo; fence_info.offset = cs->ring_type; amdgpu_cs_chunk_fence_info_to_data(&fence_info, (void*)&cs->fence_chunk); cs->main.ib_type = IB_MAIN; - if (!amdgpu_init_cs_context(&cs->csc1, ring_type)) { + if (!amdgpu_init_cs_context(ctx->ws, &cs->csc1, ring_type)) { FREE(cs); return NULL; } - if (!amdgpu_init_cs_context(&cs->csc2, ring_type)) { + if (!amdgpu_init_cs_context(ctx->ws, &cs->csc2, ring_type)) { amdgpu_destroy_cs_context(&cs->csc1); FREE(cs); return NULL; } /* Set the first submission context as current. */ cs->csc = &cs->csc1; cs->cst = &cs->csc2; if (!amdgpu_get_new_ib(&ctx->ws->base, cs, IB_MAIN)) { -- 2.15.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev