On 11/30/ , Christian KKKnig wrote:
> Am 30.11.21 um 06:17 schrieb Lang Yu:
> > To maintain system error state when SMU errors occurred,
> > which will aid in debugging SMU firmware issues,
> > add SMU debug option support.
> > 
> > It can be enabled or disabled via amdgpu_smu_debug
> > debugfs file. When enabled, it makes SMU errors fatal.
> > It is disabled by default.
> > 
> > == Command Guide ==
> > 
> > 1, enable SMU debug option
> > 
> >   # echo 1 > /sys/kernel/debug/dri/0/amdgpu_smu_debug
> > 
> > 2, disable SMU debug option
> > 
> >   # echo 0 > /sys/kernel/debug/dri/0/amdgpu_smu_debug
> > 
> > v2:
> >   - Resend command when timeout.(Lijo)
> >   - Use debugfs file instead of module parameter.
> > 
> > Signed-off-by: Lang Yu <lang...@amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 32 +++++++++++++++++
> >   drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c      | 39 +++++++++++++++++++--
> >   2 files changed, 69 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > index 164d6a9e9fbb..f9412de86599 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > @@ -39,6 +39,8 @@
> >   #if defined(CONFIG_DEBUG_FS)
> > +extern int amdgpu_smu_debug;
> > +
> >   /**
> >    * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
> >    *
> > @@ -1152,6 +1154,8 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file 
> > *f, char __user *buf,
> >     return result;
> >   }
> > +
> > +
> 
> Unrelated change.
Will remove it.

> >   static const struct file_operations amdgpu_debugfs_regs2_fops = {
> >     .owner = THIS_MODULE,
> >     .unlocked_ioctl = amdgpu_debugfs_regs2_ioctl,
> > @@ -1609,6 +1613,26 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL,
> >   DEFINE_DEBUGFS_ATTRIBUTE(fops_sclk_set, NULL,
> >                     amdgpu_debugfs_sclk_set, "%llu\n");
> > +static int amdgpu_debugfs_smu_debug_get(void *data, u64 *val)
> > +{
> > +   *val = amdgpu_smu_debug;
> > +   return 0;
> > +}
> > +
> > +static int amdgpu_debugfs_smu_debug_set(void *data, u64 val)
> > +{
> > +   if (val != 0 && val != 1)
> > +           return -EINVAL;
> > +
> > +   amdgpu_smu_debug = val;
> > +   return 0;
> > +}
> > +
> > +DEFINE_DEBUGFS_ATTRIBUTE(fops_smu_debug,
> > +                    amdgpu_debugfs_smu_debug_get,
> > +                    amdgpu_debugfs_smu_debug_set,
> > +                    "%llu\n");
> > +
> 
> That can be done much simpler. Take a look at the debugfs_create_bool()
> function.
Thanks for your advice. Will modify that.

> >   int amdgpu_debugfs_init(struct amdgpu_device *adev)
> >   {
> >     struct dentry *root = adev_to_drm(adev)->primary->debugfs_root;
> > @@ -1632,6 +1656,14 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
> >             return PTR_ERR(ent);
> >     }
> > +   ent = debugfs_create_file("amdgpu_smu_debug", 0600, root, adev,
> > +                             &fops_smu_debug);
> > +   if (IS_ERR(ent)) {
> > +           DRM_ERROR("unable to create amdgpu_smu_debug debugsfs file\n");
> > +           return PTR_ERR(ent);
> > +   }
> > +
> > +
> >     /* Register debugfs entries for amdgpu_ttm */
> >     amdgpu_ttm_debugfs_init(adev);
> >     amdgpu_debugfs_pm_init(adev);
> > diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c 
> > b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
> > index 048ca1673863..b3969d7933d3 100644
> > --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
> > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
> > @@ -55,6 +55,14 @@
> >   #undef __SMU_DUMMY_MAP
> >   #define __SMU_DUMMY_MAP(type)     #type
> > +
> > +/*
> > + * Used to enable SMU debug option. When enabled, it makes SMU errors 
> > fatal.
> > + * This will aid in debugging SMU firmware issues.
> > + * (0 = disabled (default), 1 = enabled)
> > + */
> > +int amdgpu_smu_debug;
> 
> Probably better to put that into amdgpu_device or similar structure.
Ok. Thanks for your advice.

Regards,
Lang

> Regards,
> Christian.
> 
> > +
> >   static const char * const __smu_message_names[] = {
> >     SMU_MESSAGE_TYPES
> >   };
> > @@ -272,6 +280,11 @@ int smu_cmn_send_msg_without_waiting(struct 
> > smu_context *smu,
> >     __smu_cmn_send_msg(smu, msg_index, param);
> >     res = 0;
> >   Out:
> > +   if (unlikely(amdgpu_smu_debug == 1) && res) {
> > +           mutex_unlock(&smu->message_lock);
> > +           BUG();
> > +   }
> > +
> >     return res;
> >   }
> > @@ -288,9 +301,17 @@ int smu_cmn_send_msg_without_waiting(struct 
> > smu_context *smu,
> >   int smu_cmn_wait_for_response(struct smu_context *smu)
> >   {
> >     u32 reg;
> > +   int res;
> >     reg = __smu_cmn_poll_stat(smu);
> > -   return __smu_cmn_reg2errno(smu, reg);
> > +   res = __smu_cmn_reg2errno(smu, reg);
> > +
> > +   if (unlikely(amdgpu_smu_debug == 1) && res) {
> > +           mutex_unlock(&smu->message_lock);
> > +           BUG();
> > +   }
> > +
> > +   return res;
> >   }
> >   /**
> > @@ -328,6 +349,7 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context 
> > *smu,
> >                                 uint32_t param,
> >                                 uint32_t *read_arg)
> >   {
> > +   int retry_count = 0;
> >     int res, index;
> >     u32 reg;
> > @@ -349,15 +371,28 @@ int smu_cmn_send_smc_msg_with_param(struct 
> > smu_context *smu,
> >             __smu_cmn_reg_print_error(smu, reg, index, param, msg);
> >             goto Out;
> >     }
> > +retry:
> >     __smu_cmn_send_msg(smu, (uint16_t) index, param);
> >     reg = __smu_cmn_poll_stat(smu);
> >     res = __smu_cmn_reg2errno(smu, reg);
> > -   if (res != 0)
> > +   if (res != 0) {
> >             __smu_cmn_reg_print_error(smu, reg, index, param, msg);
> > +           if ((res == -ETIME) && (retry_count++ < 1)) {
> > +                   usleep_range(500, 1000);
> > +                   dev_err(smu->adev->dev,
> > +                           "SMU: resend command: index:%d param:0x%08X 
> > message:%s",
> > +                           index, param, smu_get_message_name(smu, msg));
> > +                   goto retry;
> > +           }
> > +           goto Out;
> > +   }
> >     if (read_arg)
> >             smu_cmn_read_arg(smu, read_arg);
> >   Out:
> >     mutex_unlock(&smu->message_lock);
> > +
> > +   BUG_ON(unlikely(amdgpu_smu_debug == 1) && res);
> > +
> >     return res;
> >   }
> 

Reply via email to