For patch 1 & 2: Reviewed-by: Marek Olšák <marek.ol...@amd.com>
How was the DMA code tested? I think the best thing would be to switch resource_copy_region to dma_copy just for testing and run piglit. (you also probably want to avoid recursion between dma_copy and resource_copy_region) Marek On Tue, Aug 5, 2014 at 7:31 PM, Christian König <deathsim...@vodafone.de> wrote: > From: Christian König <christian.koe...@amd.com> > > v2: fix a couple of typos and bugs > > Signed-off-by: Christian König <christian.koe...@amd.com> > --- > src/gallium/drivers/radeonsi/si_dma.c | 85 > +++++++++++++++++++++++++++-------- > src/gallium/drivers/radeonsi/sid.h | 1 + > 2 files changed, 68 insertions(+), 18 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_dma.c > b/src/gallium/drivers/radeonsi/si_dma.c > index 26f1e1b..4d72f62 100644 > --- a/src/gallium/drivers/radeonsi/si_dma.c > +++ b/src/gallium/drivers/radeonsi/si_dma.c > @@ -111,6 +111,48 @@ static void si_dma_copy_buffer(struct si_context *ctx, > } > } > > +static void si_dma_copy_partial(struct si_context *ctx, > + struct pipe_resource *dst, > + uint64_t dst_offset, > + uint32_t dst_slice_size, > + uint32_t dst_pitch, > + struct pipe_resource *src, > + uint64_t src_offset, > + uint32_t src_slice_size, > + uint32_t src_pitch, > + uint32_t width, > + uint32_t height, > + uint32_t depth, > + unsigned bpp) > +{ > + struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs; > + struct r600_resource *rdst = (struct r600_resource*)dst; > + struct r600_resource *rsrc = (struct r600_resource*)src; > + > + dst_offset += r600_resource_va(&ctx->screen->b.b, dst); > + src_offset += r600_resource_va(&ctx->screen->b.b, src); > + > + r600_need_dma_space(&ctx->b, 9); > + > + r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rsrc, > RADEON_USAGE_READ, > + RADEON_PRIO_MIN); > + r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rdst, > RADEON_USAGE_WRITE, > + RADEON_PRIO_MIN); > + > + radeon_emit(cs, SI_DMA_PACKET(SI_DMA_PACKET_COPY, > SI_DMA_COPY_PARTIAL, 0x0)); > + > + radeon_emit(cs, src_offset & 0xffffffff); > + radeon_emit(cs, ((src_offset >> 32UL) & 0xff) | (src_pitch << 13)); > + radeon_emit(cs, src_slice_size); > + > + radeon_emit(cs, dst_offset & 0xffffffff); > + radeon_emit(cs, ((dst_offset >> 32UL) & 0xff) | (dst_pitch << 13)); > + radeon_emit(cs, dst_slice_size); > + > + radeon_emit(cs, width | (height << 16)); > + radeon_emit(cs, depth | (util_logbase2(bpp) << 29)); > +} > + > static void si_dma_copy_tile(struct si_context *ctx, > struct pipe_resource *dst, > unsigned dst_level, > @@ -299,33 +341,40 @@ void si_dma_copy(struct pipe_context *ctx, > src_mode = src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? > RADEON_SURF_MODE_LINEAR : src_mode; > dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? > RADEON_SURF_MODE_LINEAR : dst_mode; > > - if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) { > - /* FIXME si can do partial blit */ > - goto fallback; > - } > - /* the x test here are currently useless (because we don't support > partial blit) > - * but keep them around so we don't forget about those > - */ > - if ((src_pitch % 8) || (src_box->x % 8) || (dst_x % 8) || (src_box->y > % 8) || (dst_y % 8)) { > + if (((src_pitch % 8) || (src_box->x % 8) || (dst_x % 8) || > (src_box->y % 8) || (dst_y % 8)) && > + ((src_mode != RADEON_SURF_MODE_LINEAR) || (dst_mode != > RADEON_SURF_MODE_LINEAR))) { > goto fallback; > } > > if (src_mode == dst_mode) { > uint64_t dst_offset, src_offset; > - /* simple dma blit would do NOTE code here assume : > - * src_box.x/y == 0 > - * dst_x/y == 0 > - * dst_pitch == src_pitch > - */ > - src_offset= rsrc->surface.level[src_level].offset; > - src_offset += rsrc->surface.level[src_level].slice_size * > src_box->z; > + uint32_t dst_slice_size, src_slice_size; > + > + src_slice_size = rsrc->surface.level[src_level].slice_size; > + src_offset = rsrc->surface.level[src_level].offset; > + src_offset += src_slice_size * src_box->z; > src_offset += src_y * src_pitch + src_x * bpp; > + > + dst_slice_size = rdst->surface.level[dst_level].slice_size; > dst_offset = rdst->surface.level[dst_level].offset; > - dst_offset += rdst->surface.level[dst_level].slice_size * > dst_z; > + dst_offset += dst_slice_size * dst_z; > dst_offset += dst_y * dst_pitch + dst_x * bpp; > - si_dma_copy_buffer(sctx, dst, src, dst_offset, src_offset, > - src_box->height * src_pitch); > + > + if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != > dst_w) { > + if (src_mode != RADEON_SURF_MODE_LINEAR) > + goto fallback; > + > + si_dma_copy_partial(sctx, dst, dst_offset, > dst_slice_size, dst_pitch, > + src, src_offset, src_slice_size, > src_pitch, > + src_box->width, src_box->height, > src_box->depth, bpp); > + } else { > + si_dma_copy_buffer(sctx, dst, src, dst_offset, > src_offset, > + src_box->height * src_pitch); > + } > } else { > + if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != > dst_w) > + goto fallback; > + > si_dma_copy_tile(sctx, dst, dst_level, dst_x, dst_y, dst_z, > src, src_level, src_x, src_y, src_box->z, > copy_height, dst_pitch, bpp); > diff --git a/src/gallium/drivers/radeonsi/sid.h > b/src/gallium/drivers/radeonsi/sid.h > index 3241725..2b2be15 100644 > --- a/src/gallium/drivers/radeonsi/sid.h > +++ b/src/gallium/drivers/radeonsi/sid.h > @@ -8656,6 +8656,7 @@ > #define SI_DMA_COPY_MAX_SIZE_DW 0xffff8 > #define SI_DMA_COPY_DWORD_ALIGNED 0x00 > #define SI_DMA_COPY_BYTE_ALIGNED 0x40 > +#define SI_DMA_COPY_PARTIAL 0x41 > #define SI_DMA_COPY_TILED 0x8 > #define SI_DMA_PACKET_INDIRECT_BUFFER 0x4 > #define SI_DMA_PACKET_SEMAPHORE 0x5 > -- > 1.9.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev