-----Original Message-----
From: Kenneth Feng <kenneth.f...@amd.com> 
Sent: Thursday, October 26, 2023 11:34 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander <alexander.deuc...@amd.com>; Wang, Yang(Kevin) 
<kevinyang.w...@amd.com>; Feng, Kenneth <kenneth.f...@amd.com>
Subject: [PATCH v2] drm/amd/pm: fix the high voltage and temperature issue

fix the high voltage and temperature issue after the driver is unloaded on smu 
13.0.0, smu 13.0.7 and smu 13.0.10
v2 - fix the code format and make sure it is used on the unload case only.

Signed-off-by: Kenneth Feng <kenneth.f...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    | 25 ++++++++++----
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     | 33 +++++++++++++++++--
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  1 +  
drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h  |  2 ++
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c    | 13 ++++++++
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 18 ++++++++--  
.../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 19 +++++++++--
 7 files changed, 96 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 31f8c3ead161..1ad0fc3f3861 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3986,13 +3986,23 @@ int amdgpu_device_init(struct amdgpu_device *adev,
                                }
                        }
                } else {
-                       tmp = amdgpu_reset_method;
-                       /* It should do a default reset when loading or 
reloading the driver,
-                        * regardless of the module parameter reset_method.
-                        */
-                       amdgpu_reset_method = AMD_RESET_METHOD_NONE;
-                       r = amdgpu_asic_reset(adev);
-                       amdgpu_reset_method = tmp;
+                       switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+                       case IP_VERSION(13, 0, 0):
+                       case IP_VERSION(13, 0, 7):
+                       case IP_VERSION(13, 0, 10):
+                               r = psp_gpu_reset(adev);
+                               break;
+                       default:
+                               tmp = amdgpu_reset_method;
+                               /* It should do a default reset when loading or 
reloading the driver,
+                                * regardless of the module parameter 
reset_method.
+                                */
+                               amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+                               r = amdgpu_asic_reset(adev);
+                               amdgpu_reset_method = tmp;
+                               break;
+                       }
+
                        if (r) {
                                dev_err(adev->dev, "asic reset on init 
failed\n");
                                goto failed;
@@ -5945,6 +5955,7 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
                return -ENOTSUPP;
 
        ret = amdgpu_dpm_baco_exit(adev);
+
[kevin]:
This blank line is not needed.

        if (ret)
                return ret;
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 7c3356d6da5e..2e82172ba250 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -733,7 +733,7 @@ static int smu_early_init(void *handle)
        smu->adev = adev;
        smu->pm_enabled = !!amdgpu_dpm;
        smu->is_apu = false;
-       smu->smu_baco.state = SMU_BACO_STATE_EXIT;
+       smu->smu_baco.state = SMU_BACO_STATE_NONE;
        smu->smu_baco.platform_support = false;
        smu->user_dpm_profile.fan_mode = -1;
 
@@ -1740,10 +1740,31 @@ static int smu_smc_hw_cleanup(struct smu_context *smu)
        return 0;
 }
 
+static int smu_reset_mp1_state(struct smu_context *smu) {
+       struct amdgpu_device *adev = smu->adev;
+       int ret = 0;
+
+       if ((!adev->in_runpm) && (!adev->in_suspend) &&
+               (!amdgpu_in_reset(adev)))
+               switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+                 case IP_VERSION(13, 0, 0):
+                 case IP_VERSION(13, 0, 7):
+                 case IP_VERSION(13, 0, 10):
+                       ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD);
+                       break;
+                 default:
+                       break;
+               }
+
+       return ret;
+}
[kevin]:

Hi Kenneth,

I prefer to add a callback function in _ppt.c file to avoid IP version check in 
SMU common file.
But either way is okay for me.

Reviewed-by: Yang Wang <kevinyang.w...@amd.com>

Thanks.

Best Regards,
Kevin
+
 static int smu_hw_fini(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        struct smu_context *smu = adev->powerplay.pp_handle;
+       int ret;
 
        if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
                return 0;
@@ -1761,7 +1782,15 @@ static int smu_hw_fini(void *handle)
 
        adev->pm.dpm_enabled = false;
 
-       return smu_smc_hw_cleanup(smu);
+       ret = smu_smc_hw_cleanup(smu);
+       if (ret)
+               return ret;
+
+       ret = smu_reset_mp1_state(smu);
+       if (ret)
+               return ret;
+
+       return 0;
 }
 
 static void smu_late_fini(void *handle) diff --git 
a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 1454eed76604..9f2dbc90b606 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -419,6 +419,7 @@ enum smu_reset_mode {  enum smu_baco_state {
        SMU_BACO_STATE_ENTER = 0,
        SMU_BACO_STATE_EXIT,
+       SMU_BACO_STATE_NONE,
 };
 
 struct smu_baco_context {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index cc02f979e9e9..43c7ba68eb50 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -299,5 +299,7 @@ int smu_v13_0_update_pcie_parameters(struct smu_context 
*smu,
                                     uint8_t pcie_gen_cap,
                                     uint8_t pcie_width_cap);
 
+int smu_v13_0_disable_pmfw_state(struct smu_context* smu);
+
 #endif
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index bcb7ab9d2221..0724441e53ef 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -2473,3 +2473,16 @@ int smu_v13_0_update_pcie_parameters(struct smu_context 
*smu,
 
        return 0;
 }
+
+int smu_v13_0_disable_pmfw_state(struct smu_context* smu) {
+       int ret;
+       struct amdgpu_device *adev = smu->adev;
+
+       WREG32_PCIE(MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff), 0);
+
+       ret = RREG32_PCIE(MP1_Public |
+                                          (smnMP1_FIRMWARE_FLAGS & 
0xffffffff));
+
+       return ret == 0 ? 0 : -EINVAL;
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 47d008cbc186..02c5e7b1e43b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2570,14 +2570,20 @@ static int smu_v13_0_0_baco_enter(struct smu_context 
*smu)  static int smu_v13_0_0_baco_exit(struct smu_context *smu)  {
        struct amdgpu_device *adev = smu->adev;
+       int ret;
 
        if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
                /* Wait for PMFW handling for the Dstate change */
                usleep_range(10000, 11000);
-               return smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
+               ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
        } else {
-               return smu_v13_0_baco_exit(smu);
+               ret = smu_v13_0_baco_exit(smu);
        }
+
+       if (!ret)
+               adev->gfx.is_poweron = false;
+
+       return ret;
 }
 
 static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu) @@ 
-2758,7 +2764,13 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context 
*smu,
 
        switch (mp1_state) {
        case PP_MP1_STATE_UNLOAD:
-               ret = smu_cmn_set_mp1_state(smu, mp1_state);
+               ret = smu_cmn_send_smc_msg_with_param(smu,
+                                                               
SMU_MSG_PrepareMp1ForUnload,
+                                                               0x55, NULL);
+
+               if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
+                       ret = smu_v13_0_disable_pmfw_state(smu);
+
                break;
        default:
                /* Ignore others */
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index b8a7a1d853df..40e8d1767b71 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -2429,7 +2429,13 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context 
*smu,
 
        switch (mp1_state) {
        case PP_MP1_STATE_UNLOAD:
-               ret = smu_cmn_set_mp1_state(smu, mp1_state);
+               ret = smu_cmn_send_smc_msg_with_param(smu,
+                                                               
SMU_MSG_PrepareMp1ForUnload,
+                                                               0x55, NULL);
+
+               if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
+                       ret = smu_v13_0_disable_pmfw_state(smu);
+
                break;
        default:
                /* Ignore others */
@@ -2455,14 +2461,21 @@ static int smu_v13_0_7_baco_enter(struct smu_context 
*smu)  static int smu_v13_0_7_baco_exit(struct smu_context *smu)  {
        struct amdgpu_device *adev = smu->adev;
+       int ret;
 
        if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
                /* Wait for PMFW handling for the Dstate change */
                usleep_range(10000, 11000);
-               return smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
+               ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
        } else {
-               return smu_v13_0_baco_exit(smu);
+               ret = smu_v13_0_baco_exit(smu);
        }
+
+       if (!ret)
+               adev->gfx.is_poweron = false;
+
+       return ret;
+
[kevin]:
Please remove this blank line too.
 }
 
 static bool smu_v13_0_7_is_mode1_reset_supported(struct smu_context *smu)
--
2.34.1

Reply via email to