Currently if the GMU resume function fails all we try to do is clear the
BOOT_SLUMBER oob which usually times out and ends up in a cycle of death.
If the resume function fails at any point remove any RPMh votes that might
have been added and try to shut down the GMU hardware cleanly.

Signed-off-by: Jordan Crouse <jcro...@codeaurora.org>
---

 drivers/gpu/drm/msm/adreno/a6xx_gmu.c      | 82 +++++++++++++++++++-----------
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c      | 20 ++------
 drivers/gpu/drm/msm/adreno/adreno_device.c |  1 +
 3 files changed, 58 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index e16d55d..2e89ca3 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -638,20 +638,6 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, 
unsigned int state)
         A6XX_GMU_AO_HOST_INTERRUPT_STATUS_HOST_AHB_BUS_ERROR | \
         A6XX_GMU_AO_HOST_INTERRUPT_STATUS_FENCE_ERR)
 
-static void a6xx_gmu_irq_enable(struct a6xx_gmu *gmu)
-{
-       gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_CLR, ~0);
-       gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_CLR, ~0);
-
-       gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_MASK,
-               ~A6XX_GMU_IRQ_MASK);
-       gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK,
-               ~A6XX_HFI_IRQ_MASK);
-
-       enable_irq(gmu->gmu_irq);
-       enable_irq(gmu->hfi_irq);
-}
-
 static void a6xx_gmu_irq_disable(struct a6xx_gmu *gmu)
 {
        disable_irq(gmu->gmu_irq);
@@ -661,11 +647,24 @@ static void a6xx_gmu_irq_disable(struct a6xx_gmu *gmu)
        gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, ~0);
 }
 
-/* Force the GMU off in case it isn't responsive */
-static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)
+static void a6xx_gmu_rpmh_off(struct a6xx_gmu *gmu)
 {
        u32 val;
 
+       /* Make sure there are no outstanding RPMh votes */
+       gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS0_DRV0_STATUS, val,
+               (val & 1), 100, 10000);
+       gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS1_DRV0_STATUS, val,
+               (val & 1), 100, 10000);
+       gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS2_DRV0_STATUS, val,
+               (val & 1), 100, 10000);
+       gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS3_DRV0_STATUS, val,
+               (val & 1), 100, 1000);
+}
+
+/* Force the GMU off in case it isn't responsive */
+static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)
+{
        /* Flush all the queues */
        a6xx_hfi_stop(gmu);
 
@@ -676,14 +675,7 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)
        a6xx_sptprac_disable(gmu);
 
        /* Make sure there are no outstanding RPMh votes */
-       gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS0_DRV0_STATUS, val,
-               (val & 1), 100, 10000);
-       gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS1_DRV0_STATUS, val,
-               (val & 1), 100, 10000);
-       gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS2_DRV0_STATUS, val,
-               (val & 1), 100, 10000);
-       gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS3_DRV0_STATUS, val,
-               (val & 1), 100, 1000);
+       a6xx_gmu_rpmh_off(gmu);
 }
 
 int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
@@ -702,10 +694,15 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
        /* Use a known rate to bring up the GMU */
        clk_set_rate(gmu->core_clk, 200000000);
        ret = clk_bulk_prepare_enable(gmu->nr_clocks, gmu->clocks);
-       if (ret)
-               goto out;
+       if (ret) {
+               pm_runtime_put(gmu->dev);
+               return ret;
+       }
 
-       a6xx_gmu_irq_enable(gmu);
+       /* Enable the GMU interrupt */
+       gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_CLR, ~0);
+       gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_MASK, ~A6XX_GMU_IRQ_MASK);
+       enable_irq(gmu->gmu_irq);
 
        /* Check to see if we are doing a cold or warm boot */
        status = gmu_read(gmu, REG_A6XX_GMU_GENERAL_7) == 1 ?
@@ -716,6 +713,16 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
                goto out;
 
        ret = a6xx_hfi_start(gmu, status);
+       if (ret)
+               goto out;
+
+       /*
+        * Turn on the GMU firmware fault interrupt after we know the boot
+        * sequence is successful
+        */
+       gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_CLR, ~0);
+       gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, ~A6XX_HFI_IRQ_MASK);
+       enable_irq(gmu->hfi_irq);
 
        /* Set the GPU to the highest power frequency */
        __a6xx_gmu_set_freq(gmu, gmu->nr_gpu_freqs - 1);
@@ -729,9 +736,12 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
                pm_runtime_get(gmu->gxpd);
 
 out:
-       /* Make sure to turn off the boot OOB request on error */
-       if (ret)
-               a6xx_gmu_clear_oob(gmu, GMU_OOB_BOOT_SLUMBER);
+       /* On failure, shut down the GMU to leave it in a good state */
+       if (ret) {
+               disable_irq(gmu->gmu_irq);
+               a6xx_rpmh_stop(gmu);
+               pm_runtime_put(gmu->dev);
+       }
 
        return ret;
 }
@@ -754,6 +764,9 @@ bool a6xx_gmu_isidle(struct a6xx_gmu *gmu)
 /* Gracefully try to shut down the GMU and by extension the GPU */
 static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu)
 {
+       struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
+       struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+       struct msm_gpu *gpu = &adreno_gpu->base;
        u32 val;
 
        /*
@@ -771,6 +784,12 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu)
                        return;
                }
 
+               /* Clear the VBIF pipe before shutting down */
+               gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf);
+               spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & 0xf)
+                       == 0xf);
+               gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
+
                /* tell the GMU we want to slumber */
                a6xx_gmu_notify_slumber(gmu);
 
@@ -808,6 +827,9 @@ int a6xx_gmu_stop(struct a6xx_gpu *a6xx_gpu)
 {
        struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
 
+       if (!pm_runtime_active(gmu->dev))
+               return 0;
+
        /*
         * Force the GMU off if we detected a hang, otherwise try to shut it
         * down gracefully
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index f76d8cd..576559a 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -678,13 +678,15 @@ static int a6xx_pm_resume(struct msm_gpu *gpu)
        struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
        int ret;
 
-       ret = a6xx_gmu_resume(a6xx_gpu);
-
        gpu->needs_hw_init = true;
 
+       ret = a6xx_gmu_resume(a6xx_gpu);
+       if (ret)
+               return ret;
+
        msm_gpu_resume_devfreq(gpu);
 
-       return ret;
+       return 0;
 }
 
 static int a6xx_pm_suspend(struct msm_gpu *gpu)
@@ -694,18 +696,6 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
 
        devfreq_suspend_device(gpu->devfreq.devfreq);
 
-       /*
-        * Make sure the GMU is idle before continuing (because some transitions
-        * may use VBIF
-        */
-       a6xx_gmu_wait_for_idle(&a6xx_gpu->gmu);
-
-       /* Clear the VBIF pipe before shutting down */
-       /* FIXME: This accesses the GPU - do we need to make sure it is on? */
-       gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf);
-       spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & 0xf) == 0xf);
-       gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
-
        return a6xx_gmu_stop(a6xx_gpu);
 }
 
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c 
b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 714ed65..0d87db7 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -229,6 +229,7 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev)
 
        ret = pm_runtime_get_sync(&pdev->dev);
        if (ret < 0) {
+               pm_runtime_put_sync(&pdev->dev);
                DRM_DEV_ERROR(dev->dev, "Couldn't power up the GPU: %d\n", ret);
                return NULL;
        }
-- 
2.7.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Reply via email to