[AMD Official Use Only]


> -----Original Message-----
> From: amd-gfx <amd-gfx-boun...@lists.freedesktop.org> On Behalf Of
> Michel Dänzer
> Sent: Monday, August 16, 2021 6:35 PM
> To: Deucher, Alexander <alexander.deuc...@amd.com>; Koenig, Christian
> <christian.koe...@amd.com>
> Cc: Liu, Leo <leo....@amd.com>; Zhu, James <james....@amd.com>; amd-
> g...@lists.freedesktop.org; dri-de...@lists.freedesktop.org
> Subject: [PATCH v3] drm/amdgpu: Cancel delayed work when GFXOFF is
> disabled
> 
> From: Michel Dänzer <mdaen...@redhat.com>
> 
> schedule_delayed_work does not push back the work if it was already
> scheduled before, so amdgpu_device_delay_enable_gfx_off ran ~100 ms
> after the first time GFXOFF was disabled and re-enabled, even if GFXOFF
> was disabled and re-enabled again during those 100 ms.
> 
> This resulted in frame drops / stutter with the upcoming mutter 41
> release on Navi 14, due to constantly enabling GFXOFF in the HW and
> disabling it again (for getting the GPU clock counter).
> 
> To fix this, call cancel_delayed_work_sync when the disable count
> transitions from 0 to 1, and only schedule the delayed work on the
> reverse transition, not if the disable count was already 0. This makes
> sure the delayed work doesn't run at unexpected times, and allows it to
> be lock-free.
> 
> v2:
> * Use cancel_delayed_work_sync & mutex_trylock instead of
>   mod_delayed_work.
> v3:
> * Make amdgpu_device_delay_enable_gfx_off lock-free (Christian König)
> 
> Cc: sta...@vger.kernel.org
> Signed-off-by: Michel Dänzer <mdaen...@redhat.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +++++------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 22 +++++++++++++++++-
> ----
>  2 files changed, 22 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index f3fd5ec710b6..f944ed858f3e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2777,12 +2777,11 @@ static void
> amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
>       struct amdgpu_device *adev =
>               container_of(work, struct amdgpu_device,
> gfx.gfx_off_delay_work.work);
> 
> -     mutex_lock(&adev->gfx.gfx_off_mutex);
> -     if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
> -             if (!amdgpu_dpm_set_powergating_by_smu(adev,
> AMD_IP_BLOCK_TYPE_GFX, true))
> -                     adev->gfx.gfx_off_state = true;
> -     }
> -     mutex_unlock(&adev->gfx.gfx_off_mutex);
> +     WARN_ON_ONCE(adev->gfx.gfx_off_state);
> +     WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
> +
> +     if (!amdgpu_dpm_set_powergating_by_smu(adev,
> AMD_IP_BLOCK_TYPE_GFX, true))
> +             adev->gfx.gfx_off_state = true;
>  }
> 
>  /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index a0be0772c8b3..ca91aafcb32b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -563,15 +563,26 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device
> *adev, bool enable)
> 
>       mutex_lock(&adev->gfx.gfx_off_mutex);
> 
> -     if (!enable)
> -             adev->gfx.gfx_off_req_count++;
> -     else if (adev->gfx.gfx_off_req_count > 0)
> +     if (enable) {
> +             /* If the count is already 0, it means there's an imbalance bug
> somewhere.
> +              * Note that the bug may be in a different caller than the one
> which triggers the
> +              * WARN_ON_ONCE.
> +              */
> +             if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
> +                     goto unlock;
> +
>               adev->gfx.gfx_off_req_count--;
> +     } else {
> +             adev->gfx.gfx_off_req_count++;
> +     }
> 
>       if (enable && !adev->gfx.gfx_off_state && !adev-
> >gfx.gfx_off_req_count) {
>               schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
> GFX_OFF_DELAY_ENABLE);
> -     } else if (!enable && adev->gfx.gfx_off_state) {
> -             if (!amdgpu_dpm_set_powergating_by_smu(adev,
> AMD_IP_BLOCK_TYPE_GFX, false)) {
> +     } else if (!enable && adev->gfx.gfx_off_req_count == 1) {
[Quan, Evan] It seems here will leave a small time window for race condition. 
If amdgpu_device_delay_enable_gfx_off() happens to occur here, it will 
"WARN_ON_ONCE(adev->gfx.gfx_off_req_count);". How about something as below?
@@ -573,13 +573,11 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool 
enable)
                        goto unlock;

                adev->gfx.gfx_off_req_count--;
-       } else {
-               adev->gfx.gfx_off_req_count++;
        }

        if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) 
{
                schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
GFX_OFF_DELAY_ENABLE);
-       } else if (!enable && adev->gfx.gfx_off_req_count == 1) {
+       } else if (!enable && adev->gfx.gfx_off_req_count == 0) {
                cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);

                if (adev->gfx.gfx_off_state &&
@@ -593,6 +591,9 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool 
enable)
                }
        }

+       if (!enable)
+               adev->gfx.gfx_off_req_count++;
+
 unlock:

BR
Evan
> +             cancel_delayed_work_sync(&adev-
> >gfx.gfx_off_delay_work);
> +
> +             if (adev->gfx.gfx_off_state &&
> +                 !amdgpu_dpm_set_powergating_by_smu(adev,
> AMD_IP_BLOCK_TYPE_GFX, false)) {
>                       adev->gfx.gfx_off_state = false;
> 
>                       if (adev->gfx.funcs->init_spm_golden) {
> @@ -581,6 +592,7 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device
> *adev, bool enable)
>               }
>       }
> 
> +unlock:
>       mutex_unlock(&adev->gfx.gfx_off_mutex);
>  }
> 
> --
> 2.32.0

Reply via email to