Replace WAIT_REG_MEM with EVENT_WRITE flushes for all shader types and ACQUIRE_MEM. That should accomplish the same thing and avoid having to wait on a fence preventing any issues with pipeline syncs during queue resets.
Signed-off-by: Alex Deucher <[email protected]> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 67 +++++++++++++++------------ 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 7b012ca1153ea..0d8e797d59b8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5572,15 +5572,42 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, 0); } -static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +static void gfx_v9_0_ring_emit_event_write(struct amdgpu_ring *ring, + uint32_t event_type, + uint32_t event_index) { - int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); - uint32_t seq = ring->fence_drv.sync_seq; - uint64_t addr = ring->fence_drv.gpu_addr; + amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); + amdgpu_ring_write(ring, EVENT_TYPE(event_type) | + EVENT_INDEX(event_index)); +} + +static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) +{ + const unsigned int cp_coher_cntl = + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); - gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, - lower_32_bits(addr), upper_32_bits(addr), - seq, 0xffffffff, 4); + /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ + amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); + amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ + amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ +} + +static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +{ + if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { + gfx_v9_0_ring_emit_event_write(ring, VS_PARTIAL_FLUSH, 4); + gfx_v9_0_ring_emit_event_write(ring, PS_PARTIAL_FLUSH, 4); + } + gfx_v9_0_ring_emit_event_write(ring, CS_PARTIAL_FLUSH, 4); + gfx_v9_0_emit_mem_sync(ring); } static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, @@ -7071,25 +7098,6 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, gfx_v9_0_query_utc_edc_status(adev, err_data); } -static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) -{ - const unsigned int cp_coher_cntl = - PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | - PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | - PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | - PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | - PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); - - /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ - amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); - amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ - amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ - amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ - amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ - amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ - amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ -} - static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring, uint32_t pipe, bool enable) { @@ -7404,7 +7412,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .set_wptr = gfx_v9_0_ring_set_wptr_gfx, .emit_frame_size = /* totally 242 maximum if 16 IBs */ 5 + /* COND_EXEC */ - 7 + /* PIPELINE_SYNC */ + 13 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 2 + /* VM_FLUSH */ @@ -7460,7 +7468,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .set_wptr = amdgpu_sw_ring_set_wptr_gfx, .emit_frame_size = /* totally 242 maximum if 16 IBs */ 5 + /* COND_EXEC */ - 7 + /* PIPELINE_SYNC */ + 13 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 2 + /* VM_FLUSH */ @@ -7521,7 +7529,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5 + /* hdp invalidate */ - 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ + 9 + /* gfx_v9_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ @@ -7564,7 +7572,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5 + /* hdp invalidate */ - 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ -- 2.52.0
