From: Marek Olšák <marek.ol...@amd.com> This should decrease IB fetch latency. --- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 0a657f7..8fbe8ae 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -609,21 +609,22 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs, buffer->u.real.priority_usage |= 1ull << priority; buffer->usage |= usage; cs->last_added_bo = bo; cs->last_added_bo_index = index; cs->last_added_bo_usage = buffer->usage; cs->last_added_bo_priority_usage = buffer->u.real.priority_usage; return index; } -static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib) +static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib, + enum ring_type ring_type) { struct pb_buffer *pb; uint8_t *mapped; unsigned buffer_size; /* Always create a buffer that is at least as large as the maximum seen IB * size, aligned to a power of two (and multiplied by 4 to reduce internal * fragmentation if chaining is not available). Limit to 512k dwords, which * is the largest power of two that fits into the size field of the * INDIRECT_BUFFER packet. @@ -639,21 +640,25 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib) case IB_MAIN: buffer_size = MAX2(buffer_size, 8 * 1024 * 4); break; default: unreachable("unhandled IB type"); } pb = ws->base.buffer_create(&ws->base, buffer_size, ws->info.gart_page_size, RADEON_DOMAIN_GTT, - RADEON_FLAG_NO_INTERPROCESS_SHARING); + RADEON_FLAG_NO_INTERPROCESS_SHARING | + (ring_type == RING_GFX || + ring_type == RING_COMPUTE || + ring_type == RING_DMA ? + RADEON_FLAG_GTT_WC : 0)); if (!pb) return false; mapped = ws->base.buffer_map(pb, NULL, PIPE_TRANSFER_WRITE); if (!mapped) { pb_reference(&pb, NULL); return false; } pb_reference(&ib->big_ib_buffer, pb); @@ -709,21 +714,21 @@ static bool amdgpu_get_new_ib(struct radeon_winsys *ws, struct amdgpu_cs *cs, ib->max_ib_size = ib->max_ib_size - ib->max_ib_size / 32; ib->base.prev_dw = 0; ib->base.num_prev = 0; ib->base.current.cdw = 0; ib->base.current.buf = NULL; /* Allocate a new buffer for IBs if the current buffer is all used. */ if (!ib->big_ib_buffer || ib->used_ib_space + ib_size > ib->big_ib_buffer->size) { - if (!amdgpu_ib_new_buffer(aws, ib)) + if (!amdgpu_ib_new_buffer(aws, ib, cs->ring_type)) return false; } info->va_start = amdgpu_winsys_bo(ib->big_ib_buffer)->va + ib->used_ib_space; info->ib_bytes = 0; /* ib_bytes is in dwords and the conversion to bytes will be done before * the CS ioctl. */ ib->ptr_ib_size = &info->ib_bytes; ib->ptr_ib_size_inside_ib = false; @@ -919,21 +924,21 @@ static bool amdgpu_cs_check_space(struct radeon_winsys_cs *rcs, unsigned dw) new_prev = REALLOC(rcs->prev, sizeof(*new_prev) * rcs->max_prev, sizeof(*new_prev) * new_max_prev); if (!new_prev) return false; rcs->prev = new_prev; rcs->max_prev = new_max_prev; } - if (!amdgpu_ib_new_buffer(cs->ctx->ws, ib)) + if (!amdgpu_ib_new_buffer(cs->ctx->ws, ib, cs->ring_type)) return false; assert(ib->used_ib_space == 0); va = amdgpu_winsys_bo(ib->big_ib_buffer)->va; /* This space was originally reserved. */ rcs->current.max_dw += 4; assert(ib->used_ib_space + 4 * rcs->current.max_dw <= ib->big_ib_buffer->size); /* Pad with NOPs and add INDIRECT_BUFFER packet */ -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev