There is a race condition which leads to dpm video power profile switch (disable and enable) during active video decode on multi-instance VCN hardware.
This patch aims to fix/skip step 3 in the below sequence: - inst_1 power_on - inst_0(idle) power_off - inst_0(idle) video_power_profile OFF (step 3) - inst_1 video_power_profile ON during next begin_use Add flags to track ON/OFF vcn instances and check if all instances are off before disabling video power profile. v2: Pg_lock is per instance it doesn't help solve the issue. (David Wu) Use existing global workload_profile_mutex. (Alex) v3: Use per instance dpg_lock to protect dpg state changes which need to be protected agianst concurrent use among users of one particular instance only, global worload_profile_mutex need not be used to safe gaurd this and drop pg_lock. Fixes: 3b669df92c85 ("drm/amdgpu/vcn: adjust workload profile handling") Signed-off-by: Sathishkumar S <sathishkumar.sundarar...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 37 +++++++++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 6 +++- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 14 ++-------- 3 files changed, 23 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 9a76e11d1c18..fc61d8875502 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -134,7 +134,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i) int r; mutex_init(&adev->vcn.inst[i].vcn1_jpeg1_workaround); - mutex_init(&adev->vcn.inst[i].vcn_pg_lock); + mutex_init(&adev->vcn.inst[i].dpg_lock); mutex_init(&adev->vcn.inst[i].engine_reset_mutex); atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0); INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler); @@ -290,7 +290,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i) if (adev->vcn.reg_list) amdgpu_vcn_reg_dump_fini(adev); - mutex_destroy(&adev->vcn.inst[i].vcn_pg_lock); + mutex_destroy(&adev->vcn.inst[i].dpg_lock); mutex_destroy(&adev->vcn.inst[i].vcn1_jpeg1_workaround); return 0; @@ -419,7 +419,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) unsigned int i = vcn_inst->inst, j; int r = 0; - if (adev->vcn.harvest_config & (1 << i)) + if (adev->vcn.harvest_config & (1 << vcn_inst->inst)) return; for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) @@ -427,7 +427,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && - !adev->vcn.inst[i].using_unified_queue) { + !vcn_inst->using_unified_queue) { struct dpg_pause_state new_state; if (fence[i] || @@ -436,18 +436,18 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - adev->vcn.inst[i].pause_dpg_mode(vcn_inst, &new_state); + vcn_inst->pause_dpg_mode(vcn_inst, &new_state); } fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_dec); fences += fence[i]; if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) { - mutex_lock(&vcn_inst->vcn_pg_lock); - vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE); - mutex_unlock(&vcn_inst->vcn_pg_lock); mutex_lock(&adev->vcn.workload_profile_mutex); - if (adev->vcn.workload_profile_active) { + vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE); + adev->vcn.flags &= AMDGPU_VCN_FLAG_VINST_OFF(vcn_inst->inst); + if (adev->vcn.workload_profile_active && + !(adev->vcn.flags & AMDGPU_VCN_FLAG_VINST_MASK(adev->vcn.num_vcn_inst))) { r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, false); if (r) @@ -470,13 +470,6 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) cancel_delayed_work_sync(&vcn_inst->idle_work); - /* We can safely return early here because we've cancelled the - * the delayed work so there is no one else to set it to false - * and we don't care if someone else sets it to true. - */ - if (adev->vcn.workload_profile_active) - goto pg_lock; - mutex_lock(&adev->vcn.workload_profile_mutex); if (!adev->vcn.workload_profile_active) { r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, @@ -485,12 +478,14 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); adev->vcn.workload_profile_active = true; } - mutex_unlock(&adev->vcn.workload_profile_mutex); -pg_lock: - mutex_lock(&vcn_inst->vcn_pg_lock); - vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE); + if (!(adev->vcn.flags & AMDGPU_VCN_FLAG_VINST_ON(vcn_inst->inst))) { + vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE); + adev->vcn.flags |= AMDGPU_VCN_FLAG_VINST_ON(vcn_inst->inst); + } + mutex_unlock(&adev->vcn.workload_profile_mutex); + mutex_lock(&vcn_inst->dpg_lock); /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && !vcn_inst->using_unified_queue) { @@ -514,7 +509,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) vcn_inst->pause_dpg_mode(vcn_inst, &new_state); } - mutex_unlock(&vcn_inst->vcn_pg_lock); + mutex_unlock(&vcn_inst->dpg_lock); } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index b3fb1d0e43fc..54cc7646001c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -320,7 +320,7 @@ struct amdgpu_vcn_inst { uint8_t vcn_config; uint32_t vcn_codec_disable_mask; atomic_t total_submission_cnt; - struct mutex vcn_pg_lock; + struct mutex dpg_lock; enum amd_powergating_state cur_state; struct delayed_work idle_work; unsigned fw_version; @@ -366,6 +366,10 @@ struct amdgpu_vcn { struct mutex workload_profile_mutex; u32 reg_count; const struct amdgpu_hwip_reg_entry *reg_list; +#define AMDGPU_VCN_FLAG_VINST_MASK(n) (BIT(n+1) - 1) +#define AMDGPU_VCN_FLAG_VINST_ON(n) (BIT(n)) +#define AMDGPU_VCN_FLAG_VINST_OFF(n) (~BIT(n)) + u32 flags; }; struct amdgpu_fw_shared_rb_ptrs_struct { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 3a7c137a83ef..4bd0ca26a5ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -147,9 +147,9 @@ static void vcn_v2_5_idle_work_handler(struct work_struct *work) } if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) { + mutex_lock(&adev->vcn.workload_profile_mutex); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); - mutex_lock(&adev->vcn.workload_profile_mutex); if (adev->vcn.workload_profile_active) { r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, false); @@ -173,13 +173,6 @@ static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring) cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work); - /* We can safely return early here because we've cancelled the - * the delayed work so there is no one else to set it to false - * and we don't care if someone else sets it to true. - */ - if (adev->vcn.workload_profile_active) - goto pg_lock; - mutex_lock(&adev->vcn.workload_profile_mutex); if (!adev->vcn.workload_profile_active) { r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, @@ -188,10 +181,7 @@ static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring) dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); adev->vcn.workload_profile_active = true; } - mutex_unlock(&adev->vcn.workload_profile_mutex); -pg_lock: - mutex_lock(&adev->vcn.inst[0].vcn_pg_lock); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_UNGATE); @@ -217,7 +207,7 @@ static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring) } v->pause_dpg_mode(v, &new_state); } - mutex_unlock(&adev->vcn.inst[0].vcn_pg_lock); + mutex_unlock(&adev->vcn.workload_profile_mutex); } static void vcn_v2_5_ring_end_use(struct amdgpu_ring *ring) -- 2.48.1