Why:
If the reg mmMP1_SMN_C2PMSG_90 is being written to before or during
amdgpu driver load or driver unload in sriov case, subsequent amdgpu
driver load will fail at smu hw_init.
The default of mmMP1_SMN_C2PMSG_90 register at a clean environment is 0x1,
and if value differs from 0x1, amdgpu driver load will fail.

How to fix:
This patch is to check whether smu is idle by sending a test
message to smu. If smu is idle, it will respond.
This will avoid errors in case mmMP1_SMN_C2PMSG_90 is not 0x1 eventhough
smu is idle.

Signed-off-by: Danijel Slivka <danijel.sli...@amd.com>
Signed-off-by: Jingwen Chen <jingwen.ch...@amd.com>
Signed-off-by: pengzhou <pengju.z...@amd.com>
---
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c    | 17 ++++++--
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c        | 42 +++++++++++++++++++
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h        |  3 ++
 3 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index e17466cc1952..dafd91b352ec 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -231,6 +231,7 @@ int smu_v13_0_check_fw_status(struct smu_context *smu)
 {
        struct amdgpu_device *adev = smu->adev;
        uint32_t mp1_fw_flags;
+       int ret = 0;
 
        switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
        case IP_VERSION(13, 0, 4):
@@ -244,11 +245,19 @@ int smu_v13_0_check_fw_status(struct smu_context *smu)
                break;
        }
 
-       if ((mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) >>
-           MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED__SHIFT)
-               return 0;
+       if (!((mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) >>
+           MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED__SHIFT))
+               return -EIO;
+
+       if (amdgpu_sriov_vf(adev)) {
+               ret = smu_cmn_wait_smu_idle(smu);
+               if (ret) {
+                       dev_err(adev->dev, "SMU is not idle\n");
+                       return ret;
+               }
+       }
 
-       return -EIO;
+       return 0;
 }
 
 int smu_v13_0_check_fw_version(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 5592fd825aa3..de431c31ca7f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -359,6 +359,48 @@ int smu_cmn_wait_for_response(struct smu_context *smu)
        return res;
 }
 
+/**
+ * smu_cmn_wait_smu_idle -- wait for smu to become idle
+ * @smu: pointer to an SMU context
+ *
+ * Send SMU_MSG_TestMessage to check whether SMU is idle.
+ * If SMU is idle, it will respond.
+ * The returned parameter will be the param you pass + 1.
+ *
+ * Return 0 on success, -errno on error, indicating the execution
+ * status and result of the message being waited for. See
+ * __smu_cmn_reg2errno() for details of the -errno.
+ */
+int smu_cmn_wait_smu_idle(struct smu_context *smu)
+{
+       u32 reg;
+       u32 param = 0xff00011;
+       uint32_t read_arg;
+       int res, index;
+
+       index = smu_cmn_to_asic_specific_index(smu,
+                                              CMN2ASIC_MAPPING_MSG,
+                                              SMU_MSG_TestMessage);
+
+       if (index < 0)
+               return index == -EACCES ? 0 : index;
+
+       __smu_cmn_send_msg(smu, index, param);
+       reg = __smu_cmn_poll_stat(smu);
+       res = __smu_cmn_reg2errno(smu, reg);
+
+       if (unlikely(smu->adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) &&
+           res && (res != -ETIME)) {
+               amdgpu_device_halt(smu->adev);
+               WARN_ON(1);
+       }
+
+       smu_cmn_read_arg(smu, &read_arg);
+       if (read_arg == param + 1)
+               return 0;
+       return res;
+}
+
 /**
  * smu_cmn_send_smc_msg_with_param -- send a message with parameter
  * @smu: pointer to an SMU context
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h 
b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
index 1de685defe85..486acfc956a5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
@@ -51,6 +51,9 @@ static inline int pcie_gen_to_speed(uint32_t gen)
 int smu_cmn_send_msg_without_waiting(struct smu_context *smu,
                                     uint16_t msg_index,
                                     uint32_t param);
+
+int smu_cmn_wait_smu_idle(struct smu_context *smu);
+
 int smu_cmn_send_smc_msg_with_param(struct smu_context *smu,
                                    enum smu_message_type msg,
                                    uint32_t param,
-- 
2.34.1

Reply via email to