The buffer upload code required dword alignment. I don't know why. It might have been for R600-R700.
Marek On Sun, Mar 9, 2014 at 11:59 AM, Niels Ole Salscheider <niels_...@salscheider-online.de> wrote: > On Sunday 09 March 2014, 02:24:51, Marek Olšák wrote: >> From: Marek Olšák <marek.ol...@amd.com> >> >> --- >> src/gallium/drivers/r600/evergreen_state.c | 37 +++++++++------- >> src/gallium/drivers/r600/r600_state.c | 41 ++++++++++------- >> src/gallium/drivers/radeon/r600_buffer_common.c | 58 >> +++++++++++-------------- src/gallium/drivers/radeon/r600_pipe_common.h | >> 17 ++++---- >> src/gallium/drivers/radeon/r600_texture.c | 18 +++----- >> src/gallium/drivers/radeonsi/si_state.c | 19 ++++---- >> 6 files changed, 97 insertions(+), 93 deletions(-) >> >> diff --git a/src/gallium/drivers/r600/evergreen_state.c >> b/src/gallium/drivers/r600/evergreen_state.c index dca7c58..5e57f8d 100644 >> --- a/src/gallium/drivers/r600/evergreen_state.c >> +++ b/src/gallium/drivers/r600/evergreen_state.c >> @@ -3329,13 +3329,13 @@ static void evergreen_dma_copy_tile(struct >> r600_context *rctx, } >> } >> >> -static boolean evergreen_dma_blit(struct pipe_context *ctx, >> - struct pipe_resource *dst, >> - unsigned dst_level, >> - unsigned dst_x, unsigned dst_y, unsigned >> dst_z, >> - struct pipe_resource *src, >> - unsigned src_level, >> - const struct pipe_box *src_box) >> +static void evergreen_dma_blit(struct pipe_context *ctx, >> + struct pipe_resource *dst, >> + unsigned dst_level, >> + unsigned dst_x, unsigned dst_y, unsigned dst_z, >> + struct pipe_resource *src, >> + unsigned src_level, >> + const struct pipe_box *src_box) >> { >> struct r600_context *rctx = (struct r600_context *)ctx; >> struct r600_texture *rsrc = (struct r600_texture*)src; >> @@ -3345,19 +3345,22 @@ static boolean evergreen_dma_blit(struct >> pipe_context *ctx, unsigned src_x, src_y; >> >> if (rctx->b.rings.dma.cs == NULL) { >> - return FALSE; >> + goto fallback; >> } >> >> if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { >> + if (dst_x % 4 || src_box->x % 4 || src_box->width % 4) >> + goto fallback; > > Why do we need this? I think that the async DMA engine can handle byte aligned > copies. It is streamout that needs x and width to be dw aligned, isn't it? > >> + >> evergreen_dma_copy(rctx, dst, src, dst_x, src_box->x, src_box- >>width); >> - return TRUE; >> + return; >> } >> >> if (src->format != dst->format) { >> - return FALSE; >> + goto fallback; >> } >> if (rdst->dirty_level_mask != 0) { >> - return FALSE; >> + goto fallback; >> } >> if (rsrc->dirty_level_mask) { >> ctx->flush_resource(ctx, src); >> @@ -3383,13 +3386,13 @@ static boolean evergreen_dma_blit(struct >> pipe_context *ctx, >> >> if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) { >> /* FIXME evergreen can do partial blit */ >> - return FALSE; >> + goto fallback; >> } >> /* the x test here are currently useless (because we don't support > partial >> blit) * but keep them around so we don't forget about those >> */ >> if ((src_pitch & 0x7) || (src_box->x & 0x7) || (dst_x & 0x7) || >> (src_box->y & 0x7) || (dst_y & 0x7)) { - return FALSE; >> + goto fallback; >> } >> >> /* 128 bpp surfaces require non_disp_tiling for both >> @@ -3400,7 +3403,7 @@ static boolean evergreen_dma_blit(struct pipe_context >> *ctx, if ((rctx->b.chip_class == CAYMAN) && >> (src_mode != dst_mode) && >> (util_format_get_blocksize(src->format) >= 16)) { >> - return FALSE; >> + goto fallback; >> } >> >> if (src_mode == dst_mode) { >> @@ -3423,7 +3426,11 @@ static boolean evergreen_dma_blit(struct pipe_context >> *ctx, src, src_level, src_x, src_y, src_box->z, >> copy_height, dst_pitch, bpp); >> } >> - return TRUE; >> + return; >> + >> +fallback: >> + ctx->resource_copy_region(ctx, dst, dst_level, dst_x, dst_y, dst_z, >> + src, src_level, src_box); >> } >> >> void evergreen_init_state_functions(struct r600_context *rctx) >> diff --git a/src/gallium/drivers/r600/r600_state.c >> b/src/gallium/drivers/r600/r600_state.c index 6d89e6c..a0e6d2d 100644 >> --- a/src/gallium/drivers/r600/r600_state.c >> +++ b/src/gallium/drivers/r600/r600_state.c >> @@ -2883,13 +2883,13 @@ static boolean r600_dma_copy_tile(struct >> r600_context *rctx, return TRUE; >> } >> >> -static boolean r600_dma_blit(struct pipe_context *ctx, >> - struct pipe_resource *dst, >> - unsigned dst_level, >> - unsigned dst_x, unsigned dst_y, unsigned dst_z, >> - struct pipe_resource *src, >> - unsigned src_level, >> - const struct pipe_box *src_box) >> +static void r600_dma_blit(struct pipe_context *ctx, >> + struct pipe_resource *dst, >> + unsigned dst_level, >> + unsigned dst_x, unsigned dst_y, unsigned dst_z, >> + struct pipe_resource *src, >> + unsigned src_level, >> + const struct pipe_box *src_box) >> { >> struct r600_context *rctx = (struct r600_context *)ctx; >> struct r600_texture *rsrc = (struct r600_texture*)src; >> @@ -2899,16 +2899,19 @@ static boolean r600_dma_blit(struct pipe_context >> *ctx, unsigned src_x, src_y; >> >> if (rctx->b.rings.dma.cs == NULL) { >> - return FALSE; >> + goto fallback; >> } >> >> if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { >> + if (dst_x % 4 || src_box->x % 4 || src_box->width % 4) >> + goto fallback; >> + >> r600_dma_copy(rctx, dst, src, dst_x, src_box->x, >> src_box->width); >> - return TRUE; >> + return; >> } >> >> if (src->format != dst->format) { >> - return FALSE; >> + goto fallback; >> } >> >> src_x = util_format_get_nblocksx(src->format, src_box->x); >> @@ -2931,11 +2934,11 @@ static boolean r600_dma_blit(struct pipe_context >> *ctx, >> >> if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) { >> /* strick requirement on r6xx/r7xx */ >> - return FALSE; >> + goto fallback; >> } >> /* lot of constraint on alignment this should capture them all */ >> if ((src_pitch & 0x7) || (src_box->y & 0x7) || (dst_y & 0x7)) { >> - return FALSE; >> + goto fallback; >> } >> >> if (src_mode == dst_mode) { >> @@ -2955,15 +2958,21 @@ static boolean r600_dma_blit(struct pipe_context >> *ctx, size = src_box->height * src_pitch; >> /* must be dw aligned */ >> if ((dst_offset & 0x3) || (src_offset & 0x3) || (size & 0x3)) { >> - return FALSE; >> + goto fallback; >> } >> r600_dma_copy(rctx, dst, src, dst_offset, src_offset, size); >> } else { >> - return r600_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, >> dst_z, >> + if (!r600_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, >> dst_z, >> src, src_level, src_x, src_y, >> src_box->z, >> - copy_height, dst_pitch, bpp); >> + copy_height, dst_pitch, bpp)) { >> + goto fallback; >> + } >> } >> - return TRUE; >> + return; >> + >> +fallback: >> + ctx->resource_copy_region(ctx, dst, dst_level, dst_x, dst_y, dst_z, >> + src, src_level, src_box); >> } >> >> void r600_init_state_functions(struct r600_context *rctx) >> diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c >> b/src/gallium/drivers/radeon/r600_buffer_common.c index 90ca8cb..a7ecfb3 >> 100644 >> --- a/src/gallium/drivers/radeon/r600_buffer_common.c >> +++ b/src/gallium/drivers/radeon/r600_buffer_common.c >> @@ -190,6 +190,17 @@ static void *r600_buffer_get_transfer(struct >> pipe_context *ctx, return data; >> } >> >> +static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx, >> + unsigned dstx, unsigned srcx, unsigned >> size) >> +{ >> + bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4); >> + >> + return rctx->screen->has_cp_dma || >> + (dword_aligned && (rctx->rings.dma.cs || >> + rctx->screen->has_streamout)); >> + >> +} >> + >> static void *r600_buffer_transfer_map(struct pipe_context *ctx, >> struct pipe_resource *resource, >> unsigned level, >> @@ -233,10 +244,7 @@ static void *r600_buffer_transfer_map(struct >> pipe_context *ctx, else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && >> !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && >> !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) && >> - (rscreen->has_cp_dma || >> - (rscreen->has_streamout && >> - /* The buffer range must be aligned to 4 with streamout. */ >> - box->x % 4 == 0 && box->width % 4 == 0))) { >> + r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) { >> assert(usage & PIPE_TRANSFER_WRITE); >> >> /* Check if mapping this buffer would cause waiting for the >> GPU. */ >> @@ -260,10 +268,11 @@ static void *r600_buffer_transfer_map(struct >> pipe_context *ctx, /* At this point, the buffer is always idle (we checked >> it above). */ usage |= PIPE_TRANSFER_UNSYNCHRONIZED; >> } >> - /* Using DMA for larger reads is much faster */ >> + /* Using a staging buffer in GTT for larger reads is much faster. */ >> else if ((usage & PIPE_TRANSFER_READ) && >> !(usage & PIPE_TRANSFER_WRITE) && >> - (rbuffer->domains == RADEON_DOMAIN_VRAM)) { >> + rbuffer->domains == RADEON_DOMAIN_VRAM && >> + r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) { >> unsigned offset; >> struct r600_resource *staging = NULL; >> >> @@ -274,26 +283,16 @@ static void *r600_buffer_transfer_map(struct >> pipe_context *ctx, if (staging) { >> data += box->x % R600_MAP_BUFFER_ALIGNMENT; >> >> - /* Copy the staging buffer into the original one. */ >> - if (rctx->dma_copy(ctx, (struct >> pipe_resource*)staging, 0, >> - box->x % >> R600_MAP_BUFFER_ALIGNMENT, >> - 0, 0, resource, level, box)) { >> - rctx->rings.gfx.flush(rctx, 0); >> - if (rctx->rings.dma.cs) >> - rctx->rings.dma.flush(rctx, 0); >> - >> - /* Wait for any offloaded CS flush to complete >> - * to avoid busy-waiting in the winsys. */ >> - rctx->ws->cs_sync_flush(rctx->rings.gfx.cs); >> - if (rctx->rings.dma.cs) >> - >> rctx->ws->cs_sync_flush(rctx->rings.dma.cs); >> - >> - rctx->ws->buffer_wait(staging->buf, >> RADEON_USAGE_WRITE); >> - return r600_buffer_get_transfer(ctx, resource, >> level, > usage, box, >> - ptransfer, >> data, staging, offset); >> - } else { >> - pipe_resource_reference((struct >> pipe_resource**)&staging, > NULL); >> - } >> + /* Copy the VRAM buffer to the staging buffer. */ >> + rctx->dma_copy(ctx, &staging->b.b, 0, >> + box->x % R600_MAP_BUFFER_ALIGNMENT, >> + 0, 0, resource, level, box); >> + >> + /* Just do the synchronization. The buffer is mapped >> already. */ >> + r600_buffer_map_sync_with_rings(rctx, staging, > PIPE_TRANSFER_READ); >> + >> + return r600_buffer_get_transfer(ctx, resource, level, >> usage, box, >> + ptransfer, data, >> staging, offset); >> } >> } >> >> @@ -329,12 +328,7 @@ static void r600_buffer_transfer_unmap(struct >> pipe_context *ctx, u_box_1d(soffset, size, &box); >> >> /* Copy the staging buffer into the original one. */ >> - if (!(size % 4) && !(doffset % 4) && !(soffset % 4) && >> - rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, >> &box)) { >> - /* DONE. */ >> - } else { >> - ctx->resource_copy_region(ctx, dst, 0, >> doffset, 0, 0, src, > 0, &box); >> - } >> + rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, >> &box); >> } >> pipe_resource_reference((struct >> pipe_resource**)&rtransfer->staging, >> NULL); } >> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h >> b/src/gallium/drivers/radeon/r600_pipe_common.h index 493a8fc..5e544c5 >> 100644 >> --- a/src/gallium/drivers/radeon/r600_pipe_common.h >> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h >> @@ -344,15 +344,14 @@ struct r600_common_context { >> boolean current_render_cond_cond; >> boolean predicate_drawing; >> >> - /* Copy one resource to another using async DMA. >> - * False is returned if the copy couldn't be done. */ >> - boolean (*dma_copy)(struct pipe_context *ctx, >> - struct pipe_resource *dst, >> - unsigned dst_level, >> - unsigned dst_x, unsigned dst_y, unsigned dst_z, >> - struct pipe_resource *src, >> - unsigned src_level, >> - const struct pipe_box *src_box); >> + /* Copy one resource to another using async DMA. */ >> + void (*dma_copy)(struct pipe_context *ctx, >> + struct pipe_resource *dst, >> + unsigned dst_level, >> + unsigned dst_x, unsigned dst_y, unsigned dst_z, >> + struct pipe_resource *src, >> + unsigned src_level, >> + const struct pipe_box *src_box); >> >> void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource >> *dst, >> unsigned offset, unsigned size, unsigned value); >> diff --git a/src/gallium/drivers/radeon/r600_texture.c >> b/src/gallium/drivers/radeon/r600_texture.c index e3b918e..5c32a84 100644 >> --- a/src/gallium/drivers/radeon/r600_texture.c >> +++ b/src/gallium/drivers/radeon/r600_texture.c >> @@ -80,12 +80,8 @@ static void r600_copy_to_staging_texture(struct >> pipe_context *ctx, struct r600_t return; >> } >> >> - if (!rctx->dma_copy(ctx, dst, 0, 0, 0, 0, >> - src, transfer->level, >> - &transfer->box)) { >> - ctx->resource_copy_region(ctx, dst, 0, 0, 0, 0, >> - src, transfer->level, >> &transfer->box); >> - } >> + rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level, >> + &transfer->box); >> } >> >> /* Copy from a transfer's staging texture to a full GPU one. */ >> @@ -106,13 +102,9 @@ static void r600_copy_from_staging_texture(struct >> pipe_context *ctx, struct r600 return; >> } >> >> - if (!rctx->dma_copy(ctx, dst, transfer->level, >> - transfer->box.x, transfer->box.y, >> transfer->box.z, >> - src, 0, &sbox)) { >> - ctx->resource_copy_region(ctx, dst, transfer->level, >> - transfer->box.x, transfer->box.y, >> transfer->box.z, >> - src, 0, &sbox); >> - } >> + rctx->dma_copy(ctx, dst, transfer->level, >> + transfer->box.x, transfer->box.y, transfer->box.z, >> + src, 0, &sbox); >> } >> >> static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned >> level, diff --git a/src/gallium/drivers/radeonsi/si_state.c >> b/src/gallium/drivers/radeonsi/si_state.c index 4094421..9210547 100644 >> --- a/src/gallium/drivers/radeonsi/si_state.c >> +++ b/src/gallium/drivers/radeonsi/si_state.c >> @@ -2919,16 +2919,19 @@ static void *si_create_blend_custom(struct >> si_context *sctx, unsigned mode) return >> si_create_blend_state_mode(&sctx->b.b, &blend, mode); >> } >> >> -static boolean si_dma_copy(struct pipe_context *ctx, >> - struct pipe_resource *dst, >> - unsigned dst_level, >> - unsigned dst_x, unsigned dst_y, unsigned dst_z, >> - struct pipe_resource *src, >> - unsigned src_level, >> - const struct pipe_box *src_box) >> +static void si_dma_copy(struct pipe_context *ctx, >> + struct pipe_resource *dst, >> + unsigned dst_level, >> + unsigned dst_x, unsigned dst_y, unsigned dst_z, >> + struct pipe_resource *src, >> + unsigned src_level, >> + const struct pipe_box *src_box) >> { >> /* XXX implement this or share evergreen_dma_blit with r600g */ >> - return FALSE; >> + >> + /* Fallback: */ >> + ctx->resource_copy_region(ctx, dst, dst_level, dst_x, dst_y, dst_z, >> + src, src_level, src_box); >> } >> >> static void si_set_occlusion_query_state(struct pipe_context *ctx, bool >> enable) -- >> 1.8.3.2 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev