You check for streamout and CP DMA support, but you don't use resource_copy_region if DMA is not supported. The CP DMA and streamout-based buffer copying is only used by resource_copy_region.
The last parameter of buffer_wait should be RADEON_USAGE_WRITE (you're waiting for the last write to the staging buffer), but that parameter is not used by the winsys yet. Other than those two, the patch looks good. CP DMA != async DMA (dma_copy). CP DMA is actually a feature of the graphics ring. Marek On Tue, Mar 4, 2014 at 6:23 PM, Niels Ole Salscheider <niels_...@salscheider-online.de> wrote: > Using DMA for reads is much faster. > > Signed-off-by: Niels Ole Salscheider <niels_...@salscheider-online.de> > --- > src/gallium/drivers/radeon/r600_buffer_common.c | 78 > +++++++++++++++++++------ > 1 file changed, 60 insertions(+), 18 deletions(-) > > diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c > b/src/gallium/drivers/radeon/r600_buffer_common.c > index 340ebb2..ed3a08c 100644 > --- a/src/gallium/drivers/radeon/r600_buffer_common.c > +++ b/src/gallium/drivers/radeon/r600_buffer_common.c > @@ -260,6 +260,46 @@ static void *r600_buffer_transfer_map(struct > pipe_context *ctx, > /* At this point, the buffer is always idle (we checked it > above). */ > usage |= PIPE_TRANSFER_UNSYNCHRONIZED; > } > + /* Using DMA for larger reads is much faster */ > + else if ((usage & PIPE_TRANSFER_READ) && > + !(usage & PIPE_TRANSFER_WRITE) && > + (rbuffer->domains == RADEON_DOMAIN_VRAM) && > + (rscreen->has_cp_dma || > + (rscreen->has_streamout && > + /* The buffer range must be aligned to 4 with streamout. */ > + box->x % 4 == 0 && box->width % 4 == 0))) { > + unsigned offset; > + struct r600_resource *staging = NULL; > + > + u_upload_alloc(rctx->uploader, 0, > + box->width + (box->x % > R600_MAP_BUFFER_ALIGNMENT), > + &offset, (struct pipe_resource**)&staging, > (void**)&data); > + > + if (staging) { > + data += box->x % R600_MAP_BUFFER_ALIGNMENT; > + > + /* Copy the staging buffer into the original one. */ > + if (rctx->dma_copy(ctx, (struct > pipe_resource*)staging, 0, > + box->x % > R600_MAP_BUFFER_ALIGNMENT, > + 0, 0, resource, level, box)) > { > + rctx->rings.gfx.flush(rctx, 0); > + if (rctx->rings.dma.cs) > + rctx->rings.dma.flush(rctx, 0); > + > + /* Wait for any offloaded CS flush to complete > + * to avoid busy-waiting in the winsys. */ > + rctx->ws->cs_sync_flush(rctx->rings.gfx.cs); > + if (rctx->rings.dma.cs) > + > rctx->ws->cs_sync_flush(rctx->rings.dma.cs); > + > + rctx->ws->buffer_wait(staging->buf, > RADEON_USAGE_READ); > + return r600_buffer_get_transfer(ctx, > resource, level, usage, box, > + ptransfer, > data, staging, offset); > + } else { > + pipe_resource_reference((struct > pipe_resource**)&staging, NULL); > + } > + } > + } > > data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage); > if (!data) { > @@ -279,24 +319,26 @@ static void r600_buffer_transfer_unmap(struct > pipe_context *ctx, > struct r600_resource *rbuffer = r600_resource(transfer->resource); > > if (rtransfer->staging) { > - struct pipe_resource *dst, *src; > - unsigned soffset, doffset, size; > - struct pipe_box box; > - > - dst = transfer->resource; > - src = &rtransfer->staging->b.b; > - size = transfer->box.width; > - doffset = transfer->box.x; > - soffset = rtransfer->offset + transfer->box.x % > R600_MAP_BUFFER_ALIGNMENT; > - > - u_box_1d(soffset, size, &box); > - > - /* Copy the staging buffer into the original one. */ > - if (!(size % 4) && !(doffset % 4) && !(soffset % 4) && > - rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box)) > { > - /* DONE. */ > - } else { > - ctx->resource_copy_region(ctx, dst, 0, doffset, 0, 0, > src, 0, &box); > + if (rtransfer->transfer.usage & PIPE_TRANSFER_WRITE) { > + struct pipe_resource *dst, *src; > + unsigned soffset, doffset, size; > + struct pipe_box box; > + > + dst = transfer->resource; > + src = &rtransfer->staging->b.b; > + size = transfer->box.width; > + doffset = transfer->box.x; > + soffset = rtransfer->offset + transfer->box.x % > R600_MAP_BUFFER_ALIGNMENT; > + > + u_box_1d(soffset, size, &box); > + > + /* Copy the staging buffer into the original one. */ > + if (!(size % 4) && !(doffset % 4) && !(soffset % 4) && > + rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, > 0, &box)) { > + /* DONE. */ > + } else { > + ctx->resource_copy_region(ctx, dst, 0, > doffset, 0, 0, src, 0, &box); > + } > } > pipe_resource_reference((struct > pipe_resource**)&rtransfer->staging, NULL); > } > -- > 1.9.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev