pm: Add smu message control block

Lijo Lazar Wed, 17 Dec 2025 05:55:08 -0800

Add message control block to abstract PMFW message protocol. Message
control block primarily carries message config which is set of register
addresses and message ops which abstracts the protocol of sending messages.


Signed-off-by: Lijo Lazar <[email protected]>
---
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  70 ++++
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c        | 315 ++++++++++++++++++
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h        |   4 +
 3 files changed, 389 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 24cf25d0606b..41f27fb4af4b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -551,6 +551,73 @@ struct cmn2asic_mapping {
        int     map_to;
 };
 
+#define SMU_MSG_MAX_ARGS 4
+
+/* Message flags for smu_msg_args */
+#define SMU_MSG_FLAG_NO_WAIT BIT(0) /* Skip post-poll (for split send/wait) */
+
+struct smu_msg_ctl;
+/**
+ * struct smu_msg_config - IP-level register configuration
+ * @msg_reg: Message register offset
+ * @resp_reg: Response register offset
+ * @arg_regs: Argument register offsets (up to SMU_MSG_MAX_ARGS)
+ * @num_arg_regs: Number of argument registers available
+ */
+struct smu_msg_config {
+       u32 msg_reg;
+       u32 resp_reg;
+       u32 arg_regs[SMU_MSG_MAX_ARGS];
+       int num_arg_regs;
+};
+
+/**
+ * struct smu_msg_args - Per-call message arguments
+ * @msg: Common message type (enum smu_message_type)
+ * @args: Input arguments
+ * @num_args: Number of input arguments
+ * @out_args: Output arguments (filled after successful send)
+ * @num_out_args: Number of output arguments to read
+ * @flags: Message flags (SMU_MSG_FLAG_*)
+ * @timeout: Per-message timeout in us (0 = use default)
+ */
+struct smu_msg_args {
+       enum smu_message_type msg;
+       u32 args[SMU_MSG_MAX_ARGS];
+       int num_args;
+       u32 out_args[SMU_MSG_MAX_ARGS];
+       int num_out_args;
+       u32 flags;
+       u32 timeout;
+};
+
+/**
+ * struct smu_msg_ops - IP-level protocol operations
+ * @send_msg: send message protocol
+ * @wait_response: wait for response (for split send/wait cases)
+ * @decode_response: Convert response register value to errno
+ */
+struct smu_msg_ops {
+       int (*send_msg)(struct smu_msg_ctl *ctl, struct smu_msg_args *args);
+       int (*wait_response)(struct smu_msg_ctl *ctl, u32 timeout_us);
+       int (*decode_response)(u32 resp);
+};
+
+/**
+ * struct smu_msg_ctl - Per-device message control block
+ * This is a standalone control block that encapsulates everything
+ * needed for SMU messaging. The ops->send_msg implements the complete
+ * protocol including all filtering and error handling.
+ */
+struct smu_msg_ctl {
+       struct smu_context *smu;
+       struct mutex lock;
+       struct smu_msg_config config;
+       const struct smu_msg_ops *ops;
+       const struct cmn2asic_msg_mapping *message_map;
+       u32 default_timeout;
+};
+
 struct stb_context {
        uint32_t stb_buf_size;
        bool enabled;
@@ -691,6 +758,9 @@ struct smu_context {
        bool                            wbrf_supported;
        struct notifier_block           wbrf_notifier;
        struct delayed_work             wbrf_delayed_work;
+
+       /* SMU message control block */
+       struct smu_msg_ctl msg_ctl;
 };
 
 struct i2c_adapter;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 88e9699d9cc1..266aca5ed559 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -515,6 +515,321 @@ int smu_cmn_send_debug_smc_msg_with_param(struct 
smu_context *smu,
        return __smu_cmn_send_debug_msg(smu, msg, param);
 }
 
+static int smu_msg_v1_decode_response(u32 resp)
+{
+       int res;
+
+       switch (resp) {
+       case SMU_RESP_NONE:
+               /* The SMU is busy--still executing your command.
+                */
+               res = -ETIME;
+               break;
+       case SMU_RESP_OK:
+               res = 0;
+               break;
+       case SMU_RESP_CMD_FAIL:
+               /* Command completed successfully, but the command
+                * status was failure.
+                */
+               res = -EIO;
+               break;
+       case SMU_RESP_CMD_UNKNOWN:
+               /* Unknown command--ignored by the SMU.
+                */
+               res = -EOPNOTSUPP;
+               break;
+       case SMU_RESP_CMD_BAD_PREREQ:
+               /* Valid command--bad prerequisites.
+                */
+               res = -EINVAL;
+               break;
+       case SMU_RESP_BUSY_OTHER:
+               /* The SMU is busy with other commands. The client
+                * should retry in 10 us.
+                */
+               res = -EBUSY;
+               break;
+       default:
+               /* Unknown or debug response from the SMU.
+                */
+               res = -EREMOTEIO;
+               break;
+       }
+
+       return res;
+}
+
+static u32 __smu_msg_v1_poll_stat(struct smu_msg_ctl *ctl, u32 timeout_us)
+{
+       struct amdgpu_device *adev = ctl->smu->adev;
+       struct smu_msg_config *cfg = &ctl->config;
+       u32 timeout = timeout_us ? timeout_us : ctl->default_timeout;
+       u32 reg;
+
+       for (; timeout > 0; timeout--) {
+               reg = RREG32(cfg->resp_reg);
+               if ((reg & MP1_C2PMSG_90__CONTENT_MASK) != 0)
+                       break;
+               udelay(1);
+       }
+
+       return reg;
+}
+
+static void __smu_msg_v1_send(struct smu_msg_ctl *ctl, u16 index, u32 param)
+{
+       struct amdgpu_device *adev = ctl->smu->adev;
+       struct smu_msg_config *cfg = &ctl->config;
+
+       WREG32(cfg->resp_reg, 0);
+       WREG32(cfg->arg_regs[0], param);
+       WREG32(cfg->msg_reg, index);
+}
+
+static u32 __smu_msg_v1_read_arg(struct smu_msg_ctl *ctl)
+{
+       struct amdgpu_device *adev = ctl->smu->adev;
+
+       return RREG32(ctl->config.arg_regs[0]);
+}
+
+static void __smu_msg_v1_print_error(struct smu_msg_ctl *ctl,
+                                    u32 resp,
+                                    struct smu_msg_args *args)
+{
+       struct smu_context *smu = ctl->smu;
+       struct amdgpu_device *adev = smu->adev;
+       const char *message = smu_get_message_name(smu, args->msg);
+       int index = ctl->message_map[args->msg].map_to;
+       u32 param = args->args[0];
+       u32 msg_idx, prm;
+
+       switch (resp) {
+       case SMU_RESP_NONE:
+               msg_idx = RREG32(ctl->config.msg_reg);
+               prm = RREG32(ctl->config.arg_regs[0]);
+               dev_err_ratelimited(adev->dev,
+                       "SMU: I'm not done with your previous command: 
SMN_C2PMSG_66:0x%08X SMN_C2PMSG_82:0x%08X",
+                       msg_idx, prm);
+               break;
+       case SMU_RESP_OK:
+               break;
+       case SMU_RESP_CMD_FAIL:
+               break;
+       case SMU_RESP_CMD_UNKNOWN:
+               dev_err_ratelimited(adev->dev,
+                       "SMU: unknown command: index:%d param:0x%08X 
message:%s",
+                       index, param, message);
+               break;
+       case SMU_RESP_CMD_BAD_PREREQ:
+               dev_err_ratelimited(adev->dev,
+                       "SMU: valid command, bad prerequisites: index:%d 
param:0x%08X message:%s",
+                       index, param, message);
+               break;
+       case SMU_RESP_BUSY_OTHER:
+               if (args->msg != SMU_MSG_GetBadPageCount)
+                       dev_err_ratelimited(adev->dev,
+                               "SMU: I'm very busy for your command: index:%d 
param:0x%08X message:%s",
+                               index, param, message);
+               break;
+       case SMU_RESP_DEBUG_END:
+               dev_err_ratelimited(adev->dev, "SMU: I'm debugging!");
+               break;
+       case SMU_RESP_UNEXP:
+               if (amdgpu_device_bus_status_check(adev)) {
+                       dev_err(adev->dev,
+                               "SMU: response:0x%08X for index:%d param:0x%08X 
message:%s?",
+                               resp, index, param, message);
+                       break;
+               }
+               fallthrough;
+       default:
+               dev_err_ratelimited(adev->dev,
+                       "SMU: response:0x%08X for index:%d param:0x%08X 
message:%s?",
+                       resp, index, param, message);
+               break;
+       }
+}
+
+static int __smu_msg_v1_ras_filter(struct smu_msg_ctl *ctl,
+                                  enum smu_message_type msg, u32 msg_flags,
+                                  bool *skip_pre_poll)
+{
+       struct smu_context *smu = ctl->smu;
+       struct amdgpu_device *adev = smu->adev;
+       bool fed_status;
+       u32 reg;
+
+       if (!(smu->smc_fw_caps & SMU_FW_CAP_RAS_PRI))
+               return 0;
+
+       fed_status = amdgpu_ras_get_fed_status(adev);
+
+       /* Block non-RAS-priority messages during RAS error */
+       if (fed_status && !(msg_flags & SMU_MSG_RAS_PRI)) {
+               dev_dbg(adev->dev, "RAS error detected, skip sending %s",
+                       smu_get_message_name(smu, msg));
+               return -EACCES;
+       }
+
+       /* Skip pre-poll for priority messages or during RAS error */
+       if ((msg_flags & SMU_MSG_NO_PRECHECK) || fed_status) {
+               reg = RREG32(ctl->config.resp_reg);
+               dev_dbg(adev->dev,
+                       "Sending priority message %s response status: %x",
+                       smu_get_message_name(smu, msg), reg);
+               if (reg == 0)
+                       *skip_pre_poll = true;
+       }
+
+       return 0;
+}
+
+/**
+ * smu_msg_proto_v1_send_msg - Complete V1 protocol with all filtering
+ * @ctl: Message control block
+ * @args: Message arguments
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+static int smu_msg_v1_send_msg(struct smu_msg_ctl *ctl,
+                              struct smu_msg_args *args)
+{
+       struct smu_context *smu = ctl->smu;
+       struct amdgpu_device *adev = smu->adev;
+       const struct cmn2asic_msg_mapping *mapping;
+       u32 reg, msg_flags;
+       int ret, index;
+       bool skip_pre_poll = false;
+
+       /* Early exit if no HW access */
+       if (adev->no_hw_access)
+               return 0;
+
+       /* Message index translation */
+       if (args->msg >= SMU_MSG_MAX_COUNT || !ctl->message_map)
+               return -EINVAL;
+
+       mapping = &ctl->message_map[args->msg];
+       if (!mapping->valid_mapping)
+               return -EINVAL;
+
+       msg_flags = mapping->flags;
+       index = mapping->map_to;
+
+       /* VF filter - skip messages not valid for VF */
+       if (amdgpu_sriov_vf(adev) && !(msg_flags & SMU_MSG_VF_FLAG))
+               return 0;
+
+       mutex_lock(&ctl->lock);
+
+       /* RAS priority filter */
+       ret = __smu_msg_v1_ras_filter(ctl, args->msg, msg_flags,
+                                     &skip_pre_poll);
+       if (ret)
+               goto out;
+
+       /* FW state checks */
+       if (smu->smc_fw_state == SMU_FW_HANG) {
+               dev_err(adev->dev,
+                       "SMU is in hanged state, failed to send smu 
message!\n");
+               ret = -EREMOTEIO;
+               goto out;
+       } else if (smu->smc_fw_state == SMU_FW_INIT) {
+               skip_pre_poll = true;
+               smu->smc_fw_state = SMU_FW_RUNTIME;
+       }
+
+       /* Pre-poll: ensure previous message completed */
+       if (!skip_pre_poll) {
+               reg = __smu_msg_v1_poll_stat(ctl, args->timeout);
+               ret = smu_msg_v1_decode_response(reg);
+               if (reg == SMU_RESP_NONE || ret == -EREMOTEIO) {
+                       __smu_msg_v1_print_error(ctl, reg, args);
+                       goto out;
+               }
+       }
+
+       /* Send message */
+       __smu_msg_v1_send(ctl, (u16)index, args->args[0]);
+
+       /* Post-poll (skip if NO_WAIT) */
+       if (args->flags & SMU_MSG_FLAG_NO_WAIT) {
+               ret = 0;
+               goto out;
+       }
+
+       reg = __smu_msg_v1_poll_stat(ctl, args->timeout);
+       ret = smu_msg_v1_decode_response(reg);
+
+       /* FW state update on fatal error */
+       if (ret == -EREMOTEIO) {
+               smu->smc_fw_state = SMU_FW_HANG;
+               __smu_msg_v1_print_error(ctl, reg, args);
+       } else if (ret != 0) {
+               __smu_msg_v1_print_error(ctl, reg, args);
+       }
+
+       /* Read output args */
+       if (ret == 0 && args->num_out_args > 0) {
+               args->out_args[0] = __smu_msg_v1_read_arg(ctl);
+               dev_dbg(adev->dev,
+                       "smu send message: %s(%d) param: 0x%08x, resp: 0x%08x, 
readval: 0x%08x\n",
+                       smu_get_message_name(smu, args->msg), index,
+                       args->args[0], reg, args->out_args[0]);
+       } else {
+               dev_dbg(adev->dev,
+                       "smu send message: %s(%d) param: 0x%08x, resp: 
0x%08x\n",
+                       smu_get_message_name(smu, args->msg), index,
+                       args->args[0], reg);
+       }
+
+out:
+       /* Debug halt on error */
+       if (unlikely(adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) &&
+           ret) {
+               amdgpu_device_halt(adev);
+               WARN_ON(1);
+       }
+
+       mutex_unlock(&ctl->lock);
+       return ret;
+}
+
+static int smu_msg_v1_wait_response(struct smu_msg_ctl *ctl, u32 timeout_us)
+{
+       struct smu_context *smu = ctl->smu;
+       struct amdgpu_device *adev = smu->adev;
+       u32 reg;
+       int ret;
+
+       reg = __smu_msg_v1_poll_stat(ctl, timeout_us);
+       ret = smu_msg_v1_decode_response(reg);
+
+       if (ret == -EREMOTEIO)
+               smu->smc_fw_state = SMU_FW_HANG;
+
+       if (unlikely(adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) &&
+           ret && (ret != -ETIME)) {
+               amdgpu_device_halt(adev);
+               WARN_ON(1);
+       }
+
+       return ret;
+}
+
+const struct smu_msg_ops smu_msg_v1_ops = {
+       .send_msg = smu_msg_v1_send_msg,
+       .wait_response = smu_msg_v1_wait_response,
+       .decode_response = smu_msg_v1_decode_response,
+};
+
+int smu_msg_wait_response(struct smu_msg_ctl *ctl, u32 timeout_us)
+{
+       return ctl->ops->wait_response(ctl, timeout_us);
+}
+
 int smu_cmn_to_asic_specific_index(struct smu_context *smu,
                                   enum smu_cmn2asic_mapping_type type,
                                   uint32_t index)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h 
b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
index f458125e8d4e..6ba62180e544 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
@@ -25,6 +25,10 @@
 
 #include "amdgpu_smu.h"
 
+extern const struct smu_msg_ops smu_msg_v1_ops;
+
+int smu_msg_wait_response(struct smu_msg_ctl *ctl, u32 timeout_us);
+
 #if defined(SWSMU_CODE_LAYER_L2) || defined(SWSMU_CODE_LAYER_L3) || 
defined(SWSMU_CODE_LAYER_L4)
 
 #define FDO_PWM_MODE_STATIC  1
-- 
2.49.0

[PATCH 01/21] drm/amd/pm: Add smu message control block

Reply via email to