On Wed, Mar 11, 2026 at 03:59:16PM +0530, Riana Tauro wrote:
> Add support for clear-error-counter command in XE DRM RAS.
> This resets the counter value.
> 
> Usage:
> 
> $ sudo ynl --family drm_ras  --do clear-error-counter --json \
> '{"node-id":1, "error-id":1}'
> None
> 
> Signed-off-by: Riana Tauro <[email protected]>
> ---
>  drivers/gpu/drm/xe/xe_drm_ras.c | 35 +++++++++++++++++++++++++++++++--
>  1 file changed, 33 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_drm_ras.c b/drivers/gpu/drm/xe/xe_drm_ras.c
> index e07dc23a155e..c21c8b428de6 100644
> --- a/drivers/gpu/drm/xe/xe_drm_ras.c
> +++ b/drivers/gpu/drm/xe/xe_drm_ras.c
> @@ -27,6 +27,16 @@ static int hw_query_error_counter(struct 
> xe_drm_ras_counter *info,
>       return 0;
>  }
>  
> +static int hw_clear_error_counter(struct xe_drm_ras_counter *info, u32 
> error_id)
> +{
> +     if (!info || !info[error_id].name)
> +             return -ENOENT;
> +
> +     atomic_set(&info[error_id].counter, 0);
> +
> +     return 0;
> +}
> +
>  static int query_uncorrectable_error_counter(struct drm_ras_node *ep, u32 
> error_id,
>                                            const char **name, u32 *val)
>  {
> @@ -37,6 +47,15 @@ static int query_uncorrectable_error_counter(struct 
> drm_ras_node *ep, u32 error_
>       return hw_query_error_counter(info, error_id, name, val);
>  }
>  
> +static int clear_uncorrectable_error_counter(struct drm_ras_node *node, u32 
> error_id)
> +{
> +     struct xe_device *xe = node->priv;
> +     struct xe_drm_ras *ras = &xe->ras;
> +     struct xe_drm_ras_counter *info = 
> ras->info[DRM_XE_RAS_ERR_SEV_UNCORRECTABLE];
> +
> +     return hw_clear_error_counter(info, error_id);
> +}
> +
>  static int query_correctable_error_counter(struct drm_ras_node *ep, u32 
> error_id,
>                                          const char **name, u32 *val)
>  {
> @@ -47,6 +66,15 @@ static int query_correctable_error_counter(struct 
> drm_ras_node *ep, u32 error_id
>       return hw_query_error_counter(info, error_id, name, val);
>  }
>  
> +static int clear_correctable_error_counter(struct drm_ras_node *node, u32 
> error_id)
> +{
> +     struct xe_device *xe = node->priv;
> +     struct xe_drm_ras *ras = &xe->ras;
> +     struct xe_drm_ras_counter *info = 
> ras->info[DRM_XE_RAS_ERR_SEV_CORRECTABLE];
> +
> +     return hw_clear_error_counter(info, error_id);
> +}

This would've been much simpler if we had per node info, but for now

Reviewed-by: Raag Jadav <[email protected]>

>  static struct xe_drm_ras_counter *allocate_and_copy_counters(struct 
> xe_device *xe)
>  {
>       struct xe_drm_ras_counter *counter;
> @@ -92,10 +120,13 @@ static int assign_node_params(struct xe_device *xe, 
> struct drm_ras_node *node,
>       if (IS_ERR(ras->info[severity]))
>               return PTR_ERR(ras->info[severity]);
>  
> -     if (severity == DRM_XE_RAS_ERR_SEV_CORRECTABLE)
> +     if (severity == DRM_XE_RAS_ERR_SEV_CORRECTABLE) {
>               node->query_error_counter = query_correctable_error_counter;
> -     else
> +             node->clear_error_counter = clear_correctable_error_counter;
> +     } else {
>               node->query_error_counter = query_uncorrectable_error_counter;
> +             node->clear_error_counter = clear_uncorrectable_error_counter;
> +     }
>  
>       return 0;
>  }
> -- 
> 2.47.1
> 

Reply via email to