I'm wondering whether we need to do this more often in pipeline barriers? Also I'd really appreciate if you could add a check to see if there is any CP DMA activity in the first place.
On Thu, Jun 21, 2018 at 11:04 AM, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > Ported from RadeonSI. > This might fix some synchronization issues. > > I don't know if that will affect performance. > > CC: <mesa-sta...@lists.freedesktop.org> > Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > --- > src/amd/vulkan/radv_cmd_buffer.c | 6 ++++++ > src/amd/vulkan/radv_private.h | 2 ++ > src/amd/vulkan/si_cmd_buffer.c | 12 +++++++++++- > 3 files changed, 19 insertions(+), 1 deletion(-) > > diff --git a/src/amd/vulkan/radv_cmd_buffer.c > b/src/amd/vulkan/radv_cmd_buffer.c > index 8bd41bc41ac..a8ab4d3b977 100644 > --- a/src/amd/vulkan/radv_cmd_buffer.c > +++ b/src/amd/vulkan/radv_cmd_buffer.c > @@ -2585,6 +2585,12 @@ VkResult radv_EndCommandBuffer( > si_emit_cache_flush(cmd_buffer); > } > > + /* Make sure CP DMA is idle at the end of IBs after L2 prefetches > + * because the kernel doesn't wait for it. > + */ > + if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) > + si_cp_dma_wait_for_idle(cmd_buffer); > + > vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); > > if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs)) > diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h > index c77a8b297f8..b0ecb626d4c 100644 > --- a/src/amd/vulkan/radv_private.h > +++ b/src/amd/vulkan/radv_private.h > @@ -1088,6 +1088,8 @@ void si_cp_dma_prefetch(struct radv_cmd_buffer > *cmd_buffer, uint64_t va, > unsigned size); > void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, > uint64_t size, unsigned value); > +void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer); > + > void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer); > bool > radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, > diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c > index e350bccae33..a5ee79a9e72 100644 > --- a/src/amd/vulkan/si_cmd_buffer.c > +++ b/src/amd/vulkan/si_cmd_buffer.c > @@ -1034,7 +1034,6 @@ static void si_emit_cp_dma(struct radv_cmd_buffer > *cmd_buffer, > struct radeon_cmdbuf *cs = cmd_buffer->cs; > uint32_t header = 0, command = 0; > > - assert(size); > assert(size <= cp_dma_max_byte_count(cmd_buffer)); > > radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9); > @@ -1237,6 +1236,17 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer > *cmd_buffer, uint64_t va, > } > } > > +void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer) > +{ > + /* Issue a dummy DMA that copies zero bytes. > + * > + * The DMA engine will see that there's no work to do and skip this > + * DMA request, however, the CP will see the sync flag and still wait > + * for all DMAs to complete. > + */ > + si_emit_cp_dma(cmd_buffer, 0, 0, 0, CP_DMA_SYNC); > +} > + > /* For MSAA sample positions. */ > #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \ > (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) | \ > -- > 2.17.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev