[AMD Official Use Only - General]

Hi @Grodzovsky, Andrey,

Please help review the series, thanks a lot.

Hi @Koenig, Christian,

I thought a module parameter will be exposed to a common user, this control was 
added to help debug and test so I put it as a debugfs. I can make it module 
parameter if more appropriate.


Thanks,
Victor



-----Original Message-----
From: Koenig, Christian <christian.koe...@amd.com> 
Sent: Friday, July 22, 2022 4:20 PM
To: Zhao, Victor <victor.z...@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Deng, Emily <emily.d...@amd.com>; Deucher, Alexander 
<alexander.deuc...@amd.com>
Subject: Re: [PATCH 2/5] drm/amdgpu: add debugfs amdgpu_reset_level

Well NAK to the debugfs approach, stuff like that is usually a module parameter.

Apart from that this series needs to be reviewed by Andrey.

Regards,
Christian.

Am 22.07.22 um 09:34 schrieb Victor Zhao:
> Introduce amdgpu_reset_level debugfs in order to help debug and test 
> specific type of reset. Also helps blocking unwanted type of resets.
>
> By default, mode2 reset will not be enabled
>
> Signed-off-by: Victor Zhao <victor.z...@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h         |  4 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 20 ++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c   |  6 ++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c    |  3 +++
>   5 files changed, 34 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 6cd1e0a6dffc..c661231a6a07 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -238,6 +238,7 @@ extern int amdgpu_si_support;
>   extern int amdgpu_cik_support;
>   #endif
>   extern int amdgpu_num_kcq;
> +extern uint amdgpu_reset_level_mask;
>   
>   #define AMDGPU_VCNFW_LOG_SIZE (32 * 1024)
>   extern int amdgpu_vcnfw_log;
> @@ -274,6 +275,9 @@ extern int amdgpu_vcnfw_log;
>   #define AMDGPU_RESET_VCE                    (1 << 13)
>   #define AMDGPU_RESET_VCE1                   (1 << 14)
>   
> +#define AMDGPU_RESET_LEVEL_SOFT_RECOVERY (1 << 0) #define 
> +AMDGPU_RESET_LEVEL_MODE2 (1 << 1)
> +
>   /* max cursor sizes (in pixels) */
>   #define CIK_CURSOR_WIDTH 128
>   #define CIK_CURSOR_HEIGHT 128
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index e2eec985adb3..235c48e4ba4d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -1661,12 +1661,29 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 
> val)
>       return ret;
>   }
>   
> +static int amdgpu_debugfs_reset_level_get(void *data, u64 *val) {
> +     struct amdgpu_device *adev = (struct amdgpu_device *)data;
> +     *val = amdgpu_reset_level_mask;
> +     return 0;
> +}
> +
> +static int amdgpu_debugfs_reset_level_set(void *data, u64 val) {
> +     struct amdgpu_device *adev = (struct amdgpu_device *)data;
> +     amdgpu_reset_level_mask = val;
> +     return 0;
> +}
> +
>   DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL,
>                       amdgpu_debugfs_ib_preempt, "%llu\n");
>   
>   DEFINE_DEBUGFS_ATTRIBUTE(fops_sclk_set, NULL,
>                       amdgpu_debugfs_sclk_set, "%llu\n");
>   
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_reset_level, amdgpu_debugfs_reset_level_get,
> +                     amdgpu_debugfs_reset_level_set, "%llu\n");
> +
>   static ssize_t amdgpu_reset_dump_register_list_read(struct file *f,
>                               char __user *buf, size_t size, loff_t *pos)
>   {
> @@ -1785,6 +1802,9 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
>               return PTR_ERR(ent);
>       }
>   
> +     debugfs_create_file("amdgpu_reset_level", 0200, root, adev,
> +                               &fops_reset_level);
> +
>       /* Register debugfs entries for amdgpu_ttm */
>       amdgpu_ttm_debugfs_init(adev);
>       amdgpu_debugfs_pm_init(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index e8c6c3fe9374..fb8f3cb853a7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -198,6 +198,7 @@ struct amdgpu_watchdog_timer amdgpu_watchdog_timer = {
>       .timeout_fatal_disable = false,
>       .period = 0x0, /* default to 0x0 (timeout disable) */
>   };
> +uint amdgpu_reset_level_mask = 0x1;
>   
>   /**
>    * DOC: vramlimit (int)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> index 831fb222139c..f16ab1a54b70 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> @@ -74,6 +74,9 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device 
> *adev,
>   {
>       struct amdgpu_reset_handler *reset_handler = NULL;
>   
> +     if (!(amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_MODE2))
> +             return -ENOSYS;
> +
>       if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags))
>               return -ENOSYS;
>   
> @@ -93,6 +96,9 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
>       int ret;
>       struct amdgpu_reset_handler *reset_handler = NULL;
>   
> +     if (!(amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_MODE2))
> +             return -ENOSYS;
> +
>       if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags))
>               return -ENOSYS;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index d3558c34d406..1ffdc050a077 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -405,6 +405,9 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, 
> unsigned int vmid,
>   {
>       ktime_t deadline = ktime_add_us(ktime_get(), 10000);
>   
> +     if (!(amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_SOFT_RECOVERY))
> +             return false;
> +
>       if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || 
> !fence)
>               return false;
>   

Reply via email to