I pushed this patch without a review because of bugs that needed to be fixed.
Marek On Tue, Mar 11, 2014 at 3:09 PM, Marek Olšák <mar...@gmail.com> wrote: > From: Marek Olšák <marek.ol...@amd.com> > > v2: - allow byte-aligned DMA buffer copies on Evergreen > - fix piglit/texsubimage regression > - use the fallback for 3D copies (depth > 1) as well > --- > src/gallium/drivers/r600/evergreen_state.c | 40 +++++++++-------- > src/gallium/drivers/r600/r600_state.c | 44 +++++++++++-------- > src/gallium/drivers/radeon/r600_buffer_common.c | 58 > +++++++++++-------------- > src/gallium/drivers/radeon/r600_pipe_common.h | 17 ++++---- > src/gallium/drivers/radeon/r600_texture.c | 18 +++----- > src/gallium/drivers/radeonsi/si_state.c | 19 ++++---- > 6 files changed, 99 insertions(+), 97 deletions(-) > > diff --git a/src/gallium/drivers/r600/evergreen_state.c > b/src/gallium/drivers/r600/evergreen_state.c > index dca7c58..1e14b01 100644 > --- a/src/gallium/drivers/r600/evergreen_state.c > +++ b/src/gallium/drivers/r600/evergreen_state.c > @@ -3329,13 +3329,13 @@ static void evergreen_dma_copy_tile(struct > r600_context *rctx, > } > } > > -static boolean evergreen_dma_blit(struct pipe_context *ctx, > - struct pipe_resource *dst, > - unsigned dst_level, > - unsigned dst_x, unsigned dst_y, unsigned > dst_z, > - struct pipe_resource *src, > - unsigned src_level, > - const struct pipe_box *src_box) > +static void evergreen_dma_blit(struct pipe_context *ctx, > + struct pipe_resource *dst, > + unsigned dst_level, > + unsigned dstx, unsigned dsty, unsigned dstz, > + struct pipe_resource *src, > + unsigned src_level, > + const struct pipe_box *src_box) > { > struct r600_context *rctx = (struct r600_context *)ctx; > struct r600_texture *rsrc = (struct r600_texture*)src; > @@ -3343,22 +3343,22 @@ static boolean evergreen_dma_blit(struct pipe_context > *ctx, > unsigned dst_pitch, src_pitch, bpp, dst_mode, src_mode, copy_height; > unsigned src_w, dst_w; > unsigned src_x, src_y; > + unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz; > > if (rctx->b.rings.dma.cs == NULL) { > - return FALSE; > + goto fallback; > } > > if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { > evergreen_dma_copy(rctx, dst, src, dst_x, src_box->x, > src_box->width); > - return TRUE; > + return; > } > > - if (src->format != dst->format) { > - return FALSE; > - } > - if (rdst->dirty_level_mask != 0) { > - return FALSE; > + if (src->format != dst->format || src_box->depth > 1 || > + rdst->dirty_level_mask != 0) { > + goto fallback; > } > + > if (rsrc->dirty_level_mask) { > ctx->flush_resource(ctx, src); > } > @@ -3383,13 +3383,13 @@ static boolean evergreen_dma_blit(struct pipe_context > *ctx, > > if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) { > /* FIXME evergreen can do partial blit */ > - return FALSE; > + goto fallback; > } > /* the x test here are currently useless (because we don't support > partial blit) > * but keep them around so we don't forget about those > */ > if ((src_pitch & 0x7) || (src_box->x & 0x7) || (dst_x & 0x7) || > (src_box->y & 0x7) || (dst_y & 0x7)) { > - return FALSE; > + goto fallback; > } > > /* 128 bpp surfaces require non_disp_tiling for both > @@ -3400,7 +3400,7 @@ static boolean evergreen_dma_blit(struct pipe_context > *ctx, > if ((rctx->b.chip_class == CAYMAN) && > (src_mode != dst_mode) && > (util_format_get_blocksize(src->format) >= 16)) { > - return FALSE; > + goto fallback; > } > > if (src_mode == dst_mode) { > @@ -3423,7 +3423,11 @@ static boolean evergreen_dma_blit(struct pipe_context > *ctx, > src, src_level, src_x, src_y, > src_box->z, > copy_height, dst_pitch, bpp); > } > - return TRUE; > + return; > + > +fallback: > + ctx->resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, > + src, src_level, src_box); > } > > void evergreen_init_state_functions(struct r600_context *rctx) > diff --git a/src/gallium/drivers/r600/r600_state.c > b/src/gallium/drivers/r600/r600_state.c > index 6d89e6c..2a4813b 100644 > --- a/src/gallium/drivers/r600/r600_state.c > +++ b/src/gallium/drivers/r600/r600_state.c > @@ -2883,13 +2883,13 @@ static boolean r600_dma_copy_tile(struct r600_context > *rctx, > return TRUE; > } > > -static boolean r600_dma_blit(struct pipe_context *ctx, > - struct pipe_resource *dst, > - unsigned dst_level, > - unsigned dst_x, unsigned dst_y, unsigned dst_z, > - struct pipe_resource *src, > - unsigned src_level, > - const struct pipe_box *src_box) > +static void r600_dma_blit(struct pipe_context *ctx, > + struct pipe_resource *dst, > + unsigned dst_level, > + unsigned dstx, unsigned dsty, unsigned dstz, > + struct pipe_resource *src, > + unsigned src_level, > + const struct pipe_box *src_box) > { > struct r600_context *rctx = (struct r600_context *)ctx; > struct r600_texture *rsrc = (struct r600_texture*)src; > @@ -2897,18 +2897,22 @@ static boolean r600_dma_blit(struct pipe_context *ctx, > unsigned dst_pitch, src_pitch, bpp, dst_mode, src_mode, copy_height; > unsigned src_w, dst_w; > unsigned src_x, src_y; > + unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz; > > if (rctx->b.rings.dma.cs == NULL) { > - return FALSE; > + goto fallback; > } > > if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { > + if (dst_x % 4 || src_box->x % 4 || src_box->width % 4) > + goto fallback; > + > r600_dma_copy(rctx, dst, src, dst_x, src_box->x, > src_box->width); > - return TRUE; > + return; > } > > - if (src->format != dst->format) { > - return FALSE; > + if (src->format != dst->format || src_box->depth > 1) { > + goto fallback; > } > > src_x = util_format_get_nblocksx(src->format, src_box->x); > @@ -2931,11 +2935,11 @@ static boolean r600_dma_blit(struct pipe_context *ctx, > > if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) { > /* strick requirement on r6xx/r7xx */ > - return FALSE; > + goto fallback; > } > /* lot of constraint on alignment this should capture them all */ > if ((src_pitch & 0x7) || (src_box->y & 0x7) || (dst_y & 0x7)) { > - return FALSE; > + goto fallback; > } > > if (src_mode == dst_mode) { > @@ -2955,15 +2959,21 @@ static boolean r600_dma_blit(struct pipe_context *ctx, > size = src_box->height * src_pitch; > /* must be dw aligned */ > if ((dst_offset & 0x3) || (src_offset & 0x3) || (size & 0x3)) > { > - return FALSE; > + goto fallback; > } > r600_dma_copy(rctx, dst, src, dst_offset, src_offset, size); > } else { > - return r600_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, > dst_z, > + if (!r600_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, > dst_z, > src, src_level, src_x, src_y, > src_box->z, > - copy_height, dst_pitch, bpp); > + copy_height, dst_pitch, bpp)) { > + goto fallback; > + } > } > - return TRUE; > + return; > + > +fallback: > + ctx->resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, > + src, src_level, src_box); > } > > void r600_init_state_functions(struct r600_context *rctx) > diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c > b/src/gallium/drivers/radeon/r600_buffer_common.c > index 90ca8cb..a7ecfb3 100644 > --- a/src/gallium/drivers/radeon/r600_buffer_common.c > +++ b/src/gallium/drivers/radeon/r600_buffer_common.c > @@ -190,6 +190,17 @@ static void *r600_buffer_get_transfer(struct > pipe_context *ctx, > return data; > } > > +static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx, > + unsigned dstx, unsigned srcx, unsigned > size) > +{ > + bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4); > + > + return rctx->screen->has_cp_dma || > + (dword_aligned && (rctx->rings.dma.cs || > + rctx->screen->has_streamout)); > + > +} > + > static void *r600_buffer_transfer_map(struct pipe_context *ctx, > struct pipe_resource *resource, > unsigned level, > @@ -233,10 +244,7 @@ static void *r600_buffer_transfer_map(struct > pipe_context *ctx, > else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && > !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && > !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) && > - (rscreen->has_cp_dma || > - (rscreen->has_streamout && > - /* The buffer range must be aligned to 4 with streamout. */ > - box->x % 4 == 0 && box->width % 4 == 0))) { > + r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) { > assert(usage & PIPE_TRANSFER_WRITE); > > /* Check if mapping this buffer would cause waiting for the > GPU. */ > @@ -260,10 +268,11 @@ static void *r600_buffer_transfer_map(struct > pipe_context *ctx, > /* At this point, the buffer is always idle (we checked it > above). */ > usage |= PIPE_TRANSFER_UNSYNCHRONIZED; > } > - /* Using DMA for larger reads is much faster */ > + /* Using a staging buffer in GTT for larger reads is much faster. */ > else if ((usage & PIPE_TRANSFER_READ) && > !(usage & PIPE_TRANSFER_WRITE) && > - (rbuffer->domains == RADEON_DOMAIN_VRAM)) { > + rbuffer->domains == RADEON_DOMAIN_VRAM && > + r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) { > unsigned offset; > struct r600_resource *staging = NULL; > > @@ -274,26 +283,16 @@ static void *r600_buffer_transfer_map(struct > pipe_context *ctx, > if (staging) { > data += box->x % R600_MAP_BUFFER_ALIGNMENT; > > - /* Copy the staging buffer into the original one. */ > - if (rctx->dma_copy(ctx, (struct > pipe_resource*)staging, 0, > - box->x % > R600_MAP_BUFFER_ALIGNMENT, > - 0, 0, resource, level, box)) > { > - rctx->rings.gfx.flush(rctx, 0); > - if (rctx->rings.dma.cs) > - rctx->rings.dma.flush(rctx, 0); > - > - /* Wait for any offloaded CS flush to complete > - * to avoid busy-waiting in the winsys. */ > - rctx->ws->cs_sync_flush(rctx->rings.gfx.cs); > - if (rctx->rings.dma.cs) > - > rctx->ws->cs_sync_flush(rctx->rings.dma.cs); > - > - rctx->ws->buffer_wait(staging->buf, > RADEON_USAGE_WRITE); > - return r600_buffer_get_transfer(ctx, > resource, level, usage, box, > - ptransfer, > data, staging, offset); > - } else { > - pipe_resource_reference((struct > pipe_resource**)&staging, NULL); > - } > + /* Copy the VRAM buffer to the staging buffer. */ > + rctx->dma_copy(ctx, &staging->b.b, 0, > + box->x % R600_MAP_BUFFER_ALIGNMENT, > + 0, 0, resource, level, box); > + > + /* Just do the synchronization. The buffer is mapped > already. */ > + r600_buffer_map_sync_with_rings(rctx, staging, > PIPE_TRANSFER_READ); > + > + return r600_buffer_get_transfer(ctx, resource, level, > usage, box, > + ptransfer, data, > staging, offset); > } > } > > @@ -329,12 +328,7 @@ static void r600_buffer_transfer_unmap(struct > pipe_context *ctx, > u_box_1d(soffset, size, &box); > > /* Copy the staging buffer into the original one. */ > - if (!(size % 4) && !(doffset % 4) && !(soffset % 4) && > - rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, > 0, &box)) { > - /* DONE. */ > - } else { > - ctx->resource_copy_region(ctx, dst, 0, > doffset, 0, 0, src, 0, &box); > - } > + rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, > &box); > } > pipe_resource_reference((struct > pipe_resource**)&rtransfer->staging, NULL); > } > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h > b/src/gallium/drivers/radeon/r600_pipe_common.h > index 493a8fc..5e544c5 100644 > --- a/src/gallium/drivers/radeon/r600_pipe_common.h > +++ b/src/gallium/drivers/radeon/r600_pipe_common.h > @@ -344,15 +344,14 @@ struct r600_common_context { > boolean current_render_cond_cond; > boolean predicate_drawing; > > - /* Copy one resource to another using async DMA. > - * False is returned if the copy couldn't be done. */ > - boolean (*dma_copy)(struct pipe_context *ctx, > - struct pipe_resource *dst, > - unsigned dst_level, > - unsigned dst_x, unsigned dst_y, unsigned dst_z, > - struct pipe_resource *src, > - unsigned src_level, > - const struct pipe_box *src_box); > + /* Copy one resource to another using async DMA. */ > + void (*dma_copy)(struct pipe_context *ctx, > + struct pipe_resource *dst, > + unsigned dst_level, > + unsigned dst_x, unsigned dst_y, unsigned dst_z, > + struct pipe_resource *src, > + unsigned src_level, > + const struct pipe_box *src_box); > > void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource > *dst, > unsigned offset, unsigned size, unsigned value); > diff --git a/src/gallium/drivers/radeon/r600_texture.c > b/src/gallium/drivers/radeon/r600_texture.c > index e3b918e..5c32a84 100644 > --- a/src/gallium/drivers/radeon/r600_texture.c > +++ b/src/gallium/drivers/radeon/r600_texture.c > @@ -80,12 +80,8 @@ static void r600_copy_to_staging_texture(struct > pipe_context *ctx, struct r600_t > return; > } > > - if (!rctx->dma_copy(ctx, dst, 0, 0, 0, 0, > - src, transfer->level, > - &transfer->box)) { > - ctx->resource_copy_region(ctx, dst, 0, 0, 0, 0, > - src, transfer->level, > &transfer->box); > - } > + rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level, > + &transfer->box); > } > > /* Copy from a transfer's staging texture to a full GPU one. */ > @@ -106,13 +102,9 @@ static void r600_copy_from_staging_texture(struct > pipe_context *ctx, struct r600 > return; > } > > - if (!rctx->dma_copy(ctx, dst, transfer->level, > - transfer->box.x, transfer->box.y, > transfer->box.z, > - src, 0, &sbox)) { > - ctx->resource_copy_region(ctx, dst, transfer->level, > - transfer->box.x, transfer->box.y, > transfer->box.z, > - src, 0, &sbox); > - } > + rctx->dma_copy(ctx, dst, transfer->level, > + transfer->box.x, transfer->box.y, transfer->box.z, > + src, 0, &sbox); > } > > static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned > level, > diff --git a/src/gallium/drivers/radeonsi/si_state.c > b/src/gallium/drivers/radeonsi/si_state.c > index f3d7f21..3015611 100644 > --- a/src/gallium/drivers/radeonsi/si_state.c > +++ b/src/gallium/drivers/radeonsi/si_state.c > @@ -2924,16 +2924,19 @@ static void *si_create_blend_custom(struct si_context > *sctx, unsigned mode) > return si_create_blend_state_mode(&sctx->b.b, &blend, mode); > } > > -static boolean si_dma_copy(struct pipe_context *ctx, > - struct pipe_resource *dst, > - unsigned dst_level, > - unsigned dst_x, unsigned dst_y, unsigned dst_z, > - struct pipe_resource *src, > - unsigned src_level, > - const struct pipe_box *src_box) > +static void si_dma_copy(struct pipe_context *ctx, > + struct pipe_resource *dst, > + unsigned dst_level, > + unsigned dst_x, unsigned dst_y, unsigned dst_z, > + struct pipe_resource *src, > + unsigned src_level, > + const struct pipe_box *src_box) > { > /* XXX implement this or share evergreen_dma_blit with r600g */ > - return FALSE; > + > + /* Fallback: */ > + ctx->resource_copy_region(ctx, dst, dst_level, dst_x, dst_y, dst_z, > + src, src_level, src_box); > } > > static void si_set_occlusion_query_state(struct pipe_context *ctx, bool > enable) > -- > 1.8.3.2 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev