From: Nicolai Hähnle <nicolai.haeh...@amd.com> This is really the behavior we want most of the time, but having a SYNCHRONIZED flag instead of an UNSYNCHRONIZED one has the advantage that OR'ing different flags together always results in stronger guarantees.
The parent BOs of sub-allocated buffers will be added unsynchronized. --- src/gallium/drivers/r300/r300_emit.c | 19 +++++++++++-------- src/gallium/drivers/radeon/r600_cs.h | 6 ++++-- src/gallium/drivers/radeon/radeon_uvd.c | 3 ++- src/gallium/drivers/radeon/radeon_vce.c | 3 ++- src/gallium/drivers/radeon/radeon_winsys.h | 7 ++++++- 5 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 95971de..671aa62 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1314,91 +1314,94 @@ boolean r300_emit_buffer_validate(struct r300_context *r300, validate: if (r300->fb_state.dirty) { /* Color buffers... */ for (i = 0; i < fb->nr_cbufs; i++) { if (!fb->cbufs[i]) continue; tex = r300_resource(fb->cbufs[i]->texture); assert(tex && tex->buf && "cbuf is marked, but NULL!"); r300->rws->cs_add_buffer(r300->cs, tex->buf, - RADEON_USAGE_READWRITE, + RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, r300_surface(fb->cbufs[i])->domain, tex->b.b.nr_samples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : RADEON_PRIO_COLOR_BUFFER); } /* ...depth buffer... */ if (fb->zsbuf) { tex = r300_resource(fb->zsbuf->texture); assert(tex && tex->buf && "zsbuf is marked, but NULL!"); r300->rws->cs_add_buffer(r300->cs, tex->buf, - RADEON_USAGE_READWRITE, + RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, r300_surface(fb->zsbuf)->domain, tex->b.b.nr_samples > 1 ? RADEON_PRIO_DEPTH_BUFFER_MSAA : RADEON_PRIO_DEPTH_BUFFER); } } /* The AA resolve buffer. */ if (r300->aa_state.dirty) { if (aa->dest) { r300->rws->cs_add_buffer(r300->cs, aa->dest->buf, - RADEON_USAGE_WRITE, + RADEON_USAGE_WRITE | RADEON_USAGE_SYNCHRONIZED, aa->dest->domain, RADEON_PRIO_COLOR_BUFFER); } } if (r300->textures_state.dirty) { /* ...textures... */ for (i = 0; i < texstate->count; i++) { if (!(texstate->tx_enable & (1 << i))) { continue; } tex = r300_resource(texstate->sampler_views[i]->base.texture); - r300->rws->cs_add_buffer(r300->cs, tex->buf, RADEON_USAGE_READ, + r300->rws->cs_add_buffer(r300->cs, tex->buf, + RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED, tex->domain, RADEON_PRIO_SAMPLER_TEXTURE); } } /* ...occlusion query buffer... */ if (r300->query_current) r300->rws->cs_add_buffer(r300->cs, r300->query_current->buf, - RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT, + RADEON_USAGE_WRITE | RADEON_USAGE_SYNCHRONIZED, + RADEON_DOMAIN_GTT, RADEON_PRIO_QUERY); /* ...vertex buffer for SWTCL path... */ if (r300->vbo) r300->rws->cs_add_buffer(r300->cs, r300->vbo, - RADEON_USAGE_READ, RADEON_DOMAIN_GTT, + RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED, + RADEON_DOMAIN_GTT, RADEON_PRIO_VERTEX_BUFFER); /* ...vertex buffers for HWTCL path... */ if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) { struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; struct pipe_vertex_buffer *last = r300->vertex_buffer + r300->nr_vertex_buffers; struct pipe_resource *buf; for (; vbuf != last; vbuf++) { buf = vbuf->buffer; if (!buf) continue; r300->rws->cs_add_buffer(r300->cs, r300_resource(buf)->buf, - RADEON_USAGE_READ, + RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED, r300_resource(buf)->domain, RADEON_PRIO_SAMPLER_BUFFER); } } /* ...and index buffer for HWTCL path. */ if (index_buffer) r300->rws->cs_add_buffer(r300->cs, r300_resource(index_buffer)->buf, - RADEON_USAGE_READ, + RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED, r300_resource(index_buffer)->domain, RADEON_PRIO_INDEX_BUFFER); /* Now do the validation (flush is called inside cs_validate on failure). */ if (!r300->rws->cs_validate(r300->cs)) { /* Ooops, an infinite loop, give up. */ if (flushed) return FALSE; flushed = TRUE; diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h index 6c15df8..28bdf15 100644 --- a/src/gallium/drivers/radeon/r600_cs.h +++ b/src/gallium/drivers/radeon/r600_cs.h @@ -66,22 +66,24 @@ radeon_cs_memory_below_limit(struct r600_common_screen *screen, * The buffer list becomes empty after every context flush and must be * rebuilt. */ static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rctx, struct r600_ring *ring, struct r600_resource *rbo, enum radeon_bo_usage usage, enum radeon_bo_priority priority) { assert(usage); - return rctx->ws->cs_add_buffer(ring->cs, rbo->buf, usage, - rbo->domains, priority) * 4; + return rctx->ws->cs_add_buffer( + ring->cs, rbo->buf, + (enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED), + rbo->domains, priority) * 4; } /** * Same as above, but also checks memory usage and flushes the context * accordingly. * * When this SHOULD NOT be used: * * - if r600_context_add_resource_size has been called for the buffer * followed by *_need_cs_space for checking the memory usage diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index d5d654a..3ae0eaa 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -106,21 +106,22 @@ static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val) radeon_emit(dec->cs, val); } /* send a command to the VCPU through the GPCOM registers */ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, struct pb_buffer* buf, uint32_t off, enum radeon_bo_usage usage, enum radeon_bo_domain domain) { int reloc_idx; - reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage, domain, + reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, + domain, RADEON_PRIO_UVD); if (!dec->use_legacy) { uint64_t addr; addr = dec->ws->buffer_get_virtual_address(buf); addr = addr + off; set_reg(dec, RUVD_GPCOM_VCPU_DATA0, addr); set_reg(dec, RUVD_GPCOM_VCPU_DATA1, addr >> 32); } else { set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off); set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4); diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c index 92cb8ce..8b5d277 100644 --- a/src/gallium/drivers/radeon/radeon_vce.c +++ b/src/gallium/drivers/radeon/radeon_vce.c @@ -530,21 +530,22 @@ bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen) /** * Add the buffer as relocation to the current command submission */ void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf, enum radeon_bo_usage usage, enum radeon_bo_domain domain, signed offset) { int reloc_idx; - reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage, domain, RADEON_PRIO_VCE); + reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, + domain, RADEON_PRIO_VCE); if (enc->use_vm) { uint64_t addr; addr = enc->ws->buffer_get_virtual_address(buf); addr = addr + offset; RVCE_CS(addr >> 32); RVCE_CS(addr); } else { RVCE_CS(reloc_idx * 4); RVCE_CS(offset); } diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 91f6e89..809a203 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -50,21 +50,26 @@ enum radeon_bo_domain { /* bitfield */ enum radeon_bo_flag { /* bitfield */ RADEON_FLAG_GTT_WC = (1 << 0), RADEON_FLAG_CPU_ACCESS = (1 << 1), RADEON_FLAG_NO_CPU_ACCESS = (1 << 2), }; enum radeon_bo_usage { /* bitfield */ RADEON_USAGE_READ = 2, RADEON_USAGE_WRITE = 4, - RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE + RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE, + + /* The winsys ensures that the CS submission will be scheduled after + * previously flushed CSs referencing this BO in a conflicting way. + */ + RADEON_USAGE_SYNCHRONIZED = 8 }; enum ring_type { RING_GFX = 0, RING_COMPUTE, RING_DMA, RING_UVD, RING_VCE, RING_LAST, }; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev