On Wed, Apr 24, 2013 at 6:52 PM, Marek Olšák <mar...@gmail.com> wrote: > Reviewed-by: Marek Olšák <mar...@gmail.com> > > I assume you have tested this (e.g. with the test code at the end of > r600_pipe.c).
No piglit regressions and the test in r600_pipe.c passes. Alex > > Marek > > On Wed, Apr 24, 2013 at 9:15 PM, <alexdeuc...@gmail.com> wrote: >> From: Alex Deucher <alexander.deuc...@amd.com> >> >> Lighter weight then using streamout. Only evergreen >> and newer asics support embedded data as src with >> CP DMA. >> >> Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> >> --- >> src/gallium/drivers/r600/evergreen_hw_context.c | 66 >> +++++++++++++++++++++++ >> src/gallium/drivers/r600/evergreend.h | 42 ++++++++++++++ >> src/gallium/drivers/r600/r600_blit.c | 10 +++- >> src/gallium/drivers/r600/r600_pipe.h | 3 + >> 4 files changed, 119 insertions(+), 2 deletions(-) >> >> diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c >> b/src/gallium/drivers/r600/evergreen_hw_context.c >> index d980c18..7cab879 100644 >> --- a/src/gallium/drivers/r600/evergreen_hw_context.c >> +++ b/src/gallium/drivers/r600/evergreen_hw_context.c >> @@ -106,3 +106,69 @@ void evergreen_dma_copy(struct r600_context *rctx, >> util_range_add(&rdst->valid_buffer_range, dst_offset, >> dst_offset + size); >> } >> + >> +/* The max number of bytes to copy per packet. */ >> +#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8) >> + >> +void evergreen_cp_dma_clear_buffer(struct r600_context *rctx, >> + struct pipe_resource *dst, uint64_t >> offset, >> + unsigned size, uint32_t clear_value) >> +{ >> + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; >> + >> + assert(size); >> + assert(rctx->screen->has_cp_dma); >> + >> + offset += r600_resource_va(&rctx->screen->screen, dst); >> + >> + /* We flush the caches, because we might read from or write >> + * to resources which are bound right now. */ >> + rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES | >> + R600_CONTEXT_FLUSH_AND_INV | >> + R600_CONTEXT_FLUSH_AND_INV_CB_META | >> + R600_CONTEXT_FLUSH_AND_INV_DB_META | >> + R600_CONTEXT_STREAMOUT_FLUSH | >> + R600_CONTEXT_WAIT_3D_IDLE; >> + >> + while (size) { >> + unsigned sync = 0; >> + unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); >> + unsigned reloc; >> + >> + r600_need_cs_space(rctx, 10 + (rctx->flags ? >> R600_MAX_FLUSH_CS_DWORDS : 0), FALSE); >> + >> + /* Flush the caches for the first copy only. */ >> + if (rctx->flags) { >> + r600_flush_emit(rctx); >> + } >> + >> + /* Do the synchronization after the last copy, so that all >> data is written to memory. */ >> + if (size == byte_count) { >> + sync = PKT3_CP_DMA_CP_SYNC; >> + } >> + >> + /* This must be done after r600_need_cs_space. */ >> + reloc = r600_context_bo_reloc(rctx, &rctx->rings.gfx, >> + (struct r600_resource*)dst, >> RADEON_USAGE_WRITE); >> + >> + r600_write_value(cs, PKT3(PKT3_CP_DMA, 4, 0)); >> + r600_write_value(cs, clear_value); /* DATA [31:0] */ >> + r600_write_value(cs, sync | PKT3_CP_DMA_SRC_SEL(2)); /* >> CP_SYNC [31] | SRC_SEL[30:29] */ >> + r600_write_value(cs, offset); /* DST_ADDR_LO [31:0] */ >> + r600_write_value(cs, (offset >> 32) & 0xff); /* >> DST_ADDR_HI [7:0] */ >> + r600_write_value(cs, byte_count); /* COMMAND [29:22] | >> BYTE_COUNT [20:0] */ >> + >> + r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); >> + r600_write_value(cs, reloc); >> + >> + size -= byte_count; >> + offset += byte_count; >> + } >> + >> + /* Invalidate the read caches. */ >> + rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES; >> + >> + util_range_add(&r600_resource(dst)->valid_buffer_range, offset, >> + offset + size); >> +} >> + >> diff --git a/src/gallium/drivers/r600/evergreend.h >> b/src/gallium/drivers/r600/evergreend.h >> index 53b68a4..5d72432 100644 >> --- a/src/gallium/drivers/r600/evergreend.h >> +++ b/src/gallium/drivers/r600/evergreend.h >> @@ -118,6 +118,48 @@ >> #define PKT3_PREDICATE(x) (((x) >> 0) & 0x1) >> #define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | >> PKT_COUNT_S(count)) >> >> +#define PKT3_CP_DMA 0x41 >> +/* 1. header >> + * 2. SRC_ADDR_LO [31:0] or DATA [31:0] >> + * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] | >> SRC_ADDR_HI [7:0] >> + * 4. DST_ADDR_LO [31:0] >> + * 5. DST_ADDR_HI [7:0] >> + * 6. COMMAND [29:22] | BYTE_COUNT [20:0] >> + */ >> +#define PKT3_CP_DMA_CP_SYNC (1 << 31) >> +#define PKT3_CP_DMA_SRC_SEL(x) ((x) << 29) >> +/* 0 - SRC_ADDR >> + * 1 - GDS (program SAS to 1 as well) >> + * 2 - DATA >> + */ >> +#define PKT3_CP_DMA_DST_SEL(x) ((x) << 20) >> +/* 0 - DST_ADDR >> + * 1 - GDS (program DAS to 1 as well) >> + */ >> +/* COMMAND */ >> +#define PKT3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) >> +/* 0 - none >> + * 1 - 8 in 16 >> + * 2 - 8 in 32 >> + * 3 - 8 in 64 >> + */ >> +#define PKT3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24) >> +/* 0 - none >> + * 1 - 8 in 16 >> + * 2 - 8 in 32 >> + * 3 - 8 in 64 >> + */ >> +#define PKT3_CP_DMA_CMD_SAS (1 << 26) >> +/* 0 - memory >> + * 1 - register >> + */ >> +#define PKT3_CP_DMA_CMD_DAS (1 << 27) >> +/* 0 - memory >> + * 1 - register >> + */ >> +#define PKT3_CP_DMA_CMD_SAIC (1 << 28) >> +#define PKT3_CP_DMA_CMD_DAIC (1 << 29) >> + >> /* Registers */ >> #define R_0084FC_CP_STRMOUT_CNTL 0x000084FC >> #define S_0084FC_OFFSET_UPDATE_DONE(x) (((x) & 0x1) << 0) >> diff --git a/src/gallium/drivers/r600/r600_blit.c >> b/src/gallium/drivers/r600/r600_blit.c >> index a0384bf..afe4389 100644 >> --- a/src/gallium/drivers/r600/r600_blit.c >> +++ b/src/gallium/drivers/r600/r600_blit.c >> @@ -526,10 +526,16 @@ static void r600_clear_buffer(struct pipe_context >> *ctx, struct pipe_resource *ds >> unsigned offset, unsigned size, unsigned char >> value) >> { >> struct r600_context *rctx = (struct r600_context*)ctx; >> + uint32_t v = value; >> >> - if (rctx->screen->has_streamout && offset % 4 == 0 && size % 4 == 0) >> { >> + if (rctx->screen->has_cp_dma && >> + rctx->chip_class >= EVERGREEN && >> + offset % 4 == 0 && size % 4 == 0) { >> + uint32_t clear_value = v | (v << 8) | (v << 16) | (v << 24); >> + >> + evergreen_cp_dma_clear_buffer(rctx, dst, offset, size, >> clear_value); >> + } else if (rctx->screen->has_streamout && offset % 4 == 0 && size % >> 4 == 0) { >> union pipe_color_union clear_value; >> - uint32_t v = value; >> >> clear_value.ui[0] = v | (v << 8) | (v << 16) | (v << 24); >> >> diff --git a/src/gallium/drivers/r600/r600_pipe.h >> b/src/gallium/drivers/r600/r600_pipe.h >> index 1dbed80..eb25e35 100644 >> --- a/src/gallium/drivers/r600/r600_pipe.h >> +++ b/src/gallium/drivers/r600/r600_pipe.h >> @@ -829,6 +829,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, >> struct pipe_resource *dst, uint64_t dst_offset, >> struct pipe_resource *src, uint64_t src_offset, >> unsigned size); >> +void evergreen_cp_dma_clear_buffer(struct r600_context *rctx, >> + struct pipe_resource *dst, uint64_t >> offset, >> + unsigned size, uint32_t clear_value); >> void r600_dma_copy(struct r600_context *rctx, >> struct pipe_resource *dst, >> struct pipe_resource *src, >> -- >> 1.7.7.5 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev