On Thu, Feb 28, 2013 at 4:05 AM, Marek Olšák <mar...@gmail.com> wrote: > TF2 doesn't use CP DMA with this patch at all, so I'm not seeing any > issue obviously. This patch is an optimization, not a workaround. It > also doesn't affect CP DMA in any way. It only detects when a buffer > range can be mapped without the CPU-GPU synchronization, which is the > most efficient way of mapping a buffer (no GEM_WAIT_IDLE, no temporary > staging resource, no copying in buffer_unmap). > > CP DMA is still probably not reliable with 6xx/7xx, but we won't be > able to use TF2 anymore to test it.
Well, it's possible there are hardware issues with it, but CP DMA has been around since r100 and from what I can tell internally, it's pretty reliable. I think the issue with TF2 were more circumstantial, due to the combination of state and buffers (even streamout failed to work correctly in those circumstances). Maybe a env var to selectively enable it? I'm not pressed either way, just seems a shame not to use it. Alex > > Marek > > On Thu, Feb 28, 2013 at 12:46 AM, Alex Deucher <alexdeuc...@gmail.com> wrote: >> On Wed, Feb 27, 2013 at 6:11 PM, Marek Olšák <mar...@gmail.com> wrote: >>> Any driver can implement this simple and efficient optimization. >>> Team Fortress 2 hits it always. The DISCARD_RANGE codepath is not even used >>> with TF2 anymore, so we avoid a ton of useless buffer copies. >> >> With this patch applied are you still seeing issues any issues with CP >> DMA? If not maybe we can leave it enabled for 6xx/7xx. >> >> Alex >> >>> --- >>> src/gallium/drivers/r600/evergreen_hw_context.c | 3 +++ >>> src/gallium/drivers/r600/evergreen_state.c | 4 ++++ >>> src/gallium/drivers/r600/r600.h | 11 +++++++++++ >>> src/gallium/drivers/r600/r600_buffer.c | 17 +++++++++++++++++ >>> src/gallium/drivers/r600/r600_hw_context.c | 6 ++++++ >>> src/gallium/drivers/r600/r600_state_common.c | 4 ++++ >>> 6 files changed, 45 insertions(+) >>> >>> diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c >>> b/src/gallium/drivers/r600/evergreen_hw_context.c >>> index b0f64b4..f81d7f3 100644 >>> --- a/src/gallium/drivers/r600/evergreen_hw_context.c >>> +++ b/src/gallium/drivers/r600/evergreen_hw_context.c >>> @@ -241,4 +241,7 @@ void evergreen_dma_copy(struct r600_context *rctx, >>> src_offset += csize << shift; >>> size -= csize; >>> } >>> + >>> + util_range_add(&rdst->valid_buffer_range, dst_offset, >>> + dst_offset + size); >>> } >>> diff --git a/src/gallium/drivers/r600/evergreen_state.c >>> b/src/gallium/drivers/r600/evergreen_state.c >>> index 38f83e9..4a41c28 100644 >>> --- a/src/gallium/drivers/r600/evergreen_state.c >>> +++ b/src/gallium/drivers/r600/evergreen_state.c >>> @@ -1376,6 +1376,10 @@ void evergreen_init_color_surface_rat(struct >>> r600_context *rctx, >>> * elements. */ >>> surf->cb_color_dim = pipe_buffer->width0; >>> >>> + /* Set the buffer range the GPU will have access to: */ >>> + util_range_add(&r600_resource(pipe_buffer)->valid_buffer_range, >>> + 0, pipe_buffer->width0); >>> + >>> surf->cb_color_cmask = surf->cb_color_base; >>> surf->cb_color_cmask_slice = 0; >>> surf->cb_color_fmask = surf->cb_color_base; >>> diff --git a/src/gallium/drivers/r600/r600.h >>> b/src/gallium/drivers/r600/r600.h >>> index d018ebb..15196f7 100644 >>> --- a/src/gallium/drivers/r600/r600.h >>> +++ b/src/gallium/drivers/r600/r600.h >>> @@ -28,6 +28,7 @@ >>> >>> #include "../../winsys/radeon/drm/radeon_winsys.h" >>> #include "util/u_double_list.h" >>> +#include "util/u_range.h" >>> #include "util/u_transfer.h" >>> >>> #define R600_ERR(fmt, args...) \ >>> @@ -50,6 +51,16 @@ struct r600_resource { >>> >>> /* Resource state. */ >>> unsigned domains; >>> + >>> + /* The buffer range which is initialized (with a write transfer, >>> + * streamout, DMA, or as a random access target). The rest of >>> + * the buffer is considered invalid and can be mapped >>> unsynchronized. >>> + * >>> + * This allows unsychronized mapping of a buffer range which hasn't >>> + * been used yet. It's for applications which forget to use >>> + * the unsynchronized map flag and expect the driver to figure it >>> out. >>> + */ >>> + struct util_range valid_buffer_range; >>> }; >>> >>> #define R600_BLOCK_MAX_BO 32 >>> diff --git a/src/gallium/drivers/r600/r600_buffer.c >>> b/src/gallium/drivers/r600/r600_buffer.c >>> index da6e529..8080611 100644 >>> --- a/src/gallium/drivers/r600/r600_buffer.c >>> +++ b/src/gallium/drivers/r600/r600_buffer.c >>> @@ -34,6 +34,7 @@ static void r600_buffer_destroy(struct pipe_screen >>> *screen, >>> { >>> struct r600_resource *rbuffer = r600_resource(buf); >>> >>> + util_range_destroy(&rbuffer->valid_buffer_range); >>> pb_reference(&rbuffer->buf, NULL); >>> FREE(rbuffer); >>> } >>> @@ -98,6 +99,14 @@ static void *r600_buffer_transfer_map(struct >>> pipe_context *ctx, >>> >>> assert(box->x + box->width <= resource->width0); >>> >>> + /* See if the buffer range being mapped has never been initialized, >>> + * in which case it can be mapped unsynchronized. */ >>> + if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && >>> + usage & PIPE_TRANSFER_WRITE && >>> + !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, >>> box->x + box->width)) { >>> + usage |= PIPE_TRANSFER_UNSYNCHRONIZED; >>> + } >>> + >>> if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && >>> !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { >>> assert(usage & PIPE_TRANSFER_WRITE); >>> @@ -182,6 +191,7 @@ static void r600_buffer_transfer_unmap(struct >>> pipe_context *pipe, >>> { >>> struct r600_context *rctx = (struct r600_context*)pipe; >>> struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; >>> + struct r600_resource *rbuffer = r600_resource(transfer->resource); >>> >>> if (rtransfer->staging) { >>> struct pipe_resource *dst, *src; >>> @@ -207,6 +217,11 @@ static void r600_buffer_transfer_unmap(struct >>> pipe_context *pipe, >>> } >>> pipe_resource_reference((struct >>> pipe_resource**)&rtransfer->staging, NULL); >>> } >>> + >>> + if (transfer->usage & PIPE_TRANSFER_WRITE) { >>> + util_range_add(&rbuffer->valid_buffer_range, >>> transfer->box.x, >>> + transfer->box.x + transfer->box.width); >>> + } >>> util_slab_free(&rctx->pool_transfers, transfer); >>> } >>> >>> @@ -263,6 +278,7 @@ bool r600_init_resource(struct r600_screen *rscreen, >>> >>> res->cs_buf = rscreen->ws->buffer_get_cs_handle(res->buf); >>> res->domains = domains; >>> + util_range_set_empty(&res->valid_buffer_range); >>> return true; >>> } >>> >>> @@ -279,6 +295,7 @@ struct pipe_resource *r600_buffer_create(struct >>> pipe_screen *screen, >>> pipe_reference_init(&rbuffer->b.b.reference, 1); >>> rbuffer->b.b.screen = screen; >>> rbuffer->b.vtbl = &r600_buffer_vtbl; >>> + util_range_init(&rbuffer->valid_buffer_range); >>> >>> if (!r600_init_resource(rscreen, rbuffer, templ->width0, alignment, >>> TRUE, templ->usage)) { >>> FREE(rbuffer); >>> diff --git a/src/gallium/drivers/r600/r600_hw_context.c >>> b/src/gallium/drivers/r600/r600_hw_context.c >>> index 450f6ac..677c6fc 100644 >>> --- a/src/gallium/drivers/r600/r600_hw_context.c >>> +++ b/src/gallium/drivers/r600/r600_hw_context.c >>> @@ -1143,6 +1143,9 @@ void r600_cp_dma_copy_buffer(struct r600_context >>> *rctx, >>> >>> /* Invalidate the read caches. */ >>> rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES; >>> + >>> + util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset, >>> + dst_offset + size); >>> } >>> >>> void r600_need_dma_space(struct r600_context *ctx, unsigned num_dw) >>> @@ -1189,4 +1192,7 @@ void r600_dma_copy(struct r600_context *rctx, >>> src_offset += csize << shift; >>> size -= csize; >>> } >>> + >>> + util_range_add(&rdst->valid_buffer_range, dst_offset, >>> + dst_offset + size); >>> } >>> diff --git a/src/gallium/drivers/r600/r600_state_common.c >>> b/src/gallium/drivers/r600/r600_state_common.c >>> index 8906695..69c6be1 100644 >>> --- a/src/gallium/drivers/r600/r600_state_common.c >>> +++ b/src/gallium/drivers/r600/r600_state_common.c >>> @@ -967,6 +967,7 @@ r600_create_so_target(struct pipe_context *ctx, >>> { >>> struct r600_context *rctx = (struct r600_context *)ctx; >>> struct r600_so_target *t; >>> + struct r600_resource *rbuffer = (struct r600_resource*)buffer; >>> >>> t = CALLOC_STRUCT(r600_so_target); >>> if (!t) { >>> @@ -986,6 +987,9 @@ r600_create_so_target(struct pipe_context *ctx, >>> pipe_resource_reference(&t->b.buffer, buffer); >>> t->b.buffer_offset = buffer_offset; >>> t->b.buffer_size = buffer_size; >>> + >>> + util_range_add(&rbuffer->valid_buffer_range, buffer_offset, >>> + buffer_offset + buffer_size); >>> return &t->b; >>> } >>> >>> -- >>> 1.7.10.4 >>> >>> _______________________________________________ >>> mesa-dev mailing list >>> mesa-dev@lists.freedesktop.org >>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev