It was observed on sc7180 (A618 gpu) that GPU votes for GX rail and CNOC
BCM nodes were not removed after GPU suspend. This was because we
skipped sending 'prepare-slumber' request to gmu during suspend sequence
in some cases. So, make sure we always call prepare-suspend hfi during
suspend. Also, calling prepare-suspend without a prior oob-gpu handshake
messes up gmu firmware's internal state. So, do that when required.

Fixes: 4b565ca5a2cb ("drm/msm: Add A6XX device support")
Signed-off-by: Akhil P Oommen <quic_akhi...@quicinc.com>
---
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 72 +++++++++++++++++++----------------
 1 file changed, 39 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 
699b0dd34b18f0ec811e975779ba95991d485098..38c94915d4c9d6d33354502651a77c1f9e4648df
 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -1169,49 +1169,50 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu)
        struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
        struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
        u32 val;
+       int ret;
 
        /*
-        * The GMU may still be in slumber unless the GPU started so check and
-        * skip putting it back into slumber if so
+        * GMU firmware's internal power state gets messed up if we send 
"prepare_slumber" hfi when
+        * oob_gpu handshake wasn't done after the last wake up. So do a dummy 
handshake here when
+        * required
         */
-       val = gmu_read(gmu, REG_A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE);
+       if (adreno_gpu->base.needs_hw_init) {
+               if (a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET))
+                       goto force_off;
 
-       if (val != 0xf) {
-               int ret = a6xx_gmu_wait_for_idle(gmu);
+               a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
+       }
 
-               /* If the GMU isn't responding assume it is hung */
-               if (ret) {
-                       a6xx_gmu_force_off(gmu);
-                       return;
-               }
+       ret = a6xx_gmu_wait_for_idle(gmu);
 
-               a6xx_bus_clear_pending_transactions(adreno_gpu, a6xx_gpu->hung);
+       /* If the GMU isn't responding assume it is hung */
+       if (ret)
+               goto force_off;
 
-               /* tell the GMU we want to slumber */
-               ret = a6xx_gmu_notify_slumber(gmu);
-               if (ret) {
-                       a6xx_gmu_force_off(gmu);
-                       return;
-               }
+       a6xx_bus_clear_pending_transactions(adreno_gpu, a6xx_gpu->hung);
 
-               ret = gmu_poll_timeout(gmu,
-                       REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, val,
-                       !(val & 
A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB),
-                       100, 10000);
+       /* tell the GMU we want to slumber */
+       ret = a6xx_gmu_notify_slumber(gmu);
+       if (ret)
+               goto force_off;
 
-               /*
-                * Let the user know we failed to slumber but don't worry too
-                * much because we are powering down anyway
-                */
+       ret = gmu_poll_timeout(gmu,
+               REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, val,
+               !(val & A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB),
+               100, 10000);
 
-               if (ret)
-                       DRM_DEV_ERROR(gmu->dev,
-                               "Unable to slumber GMU: status = 0%x/0%x\n",
-                               gmu_read(gmu,
-                                       REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS),
-                               gmu_read(gmu,
-                                       
REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2));
-       }
+       /*
+        * Let the user know we failed to slumber but don't worry too
+        * much because we are powering down anyway
+        */
+
+       if (ret)
+               DRM_DEV_ERROR(gmu->dev,
+                       "Unable to slumber GMU: status = 0%x/0%x\n",
+                       gmu_read(gmu,
+                               REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS),
+                       gmu_read(gmu,
+                               REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2));
 
        /* Turn off HFI */
        a6xx_hfi_stop(gmu);
@@ -1221,6 +1222,11 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu)
 
        /* Tell RPMh to power off the GPU */
        a6xx_rpmh_stop(gmu);
+
+       return;
+
+force_off:
+       a6xx_gmu_force_off(gmu);
 }
 
 

---
base-commit: 72d0af4accd965dc32f504440d74d0a4d18bf781
change-id: 20250110-adreno-sys-suspend-fix-c5bc7beea0c4

Best regards,
-- 
Akhil P Oommen <quic_akhi...@quicinc.com>

Reply via email to