On 1/13/2025 7:12 AM, Jiang Liu wrote:
> Add a flag to track ras debugfs creation status, to avoid possible
> incorrect reference count management for ras block object  in function
> amdgpu_ras_aca_is_supported().
> 
> Signed-off-by: Jiang Liu <ge...@linux.alibaba.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 9 +++++++--
>  2 files changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index f0f773659faf..09b63a622728 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -378,6 +378,8 @@ int amdgpu_ip_block_resume(struct amdgpu_ip_block 
> *ip_block);
>  #define AMDGPU_MAX_IP_NUM 16
>  
>  enum amdgpu_marker {
> +     // Markers for ras blocks.
> +     AMDGPU_MARKER_RAS_DEBUGFS,

As mentioned in patch 1, keeping a global tracker at this layer is not
the right solution. The expectation is each object/IP block to keep the
state info.

Thanks,
Lijo

>       // Markers for IRQs, used for both ip blocks and ras blocks.
>       AMDGPU_MARKER_IRQ0 = 32,
>       AMDGPU_MARKER_IRQ1,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 5e19d820ab34..c10ea3fd3e16 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -1997,7 +1997,8 @@ static void amdgpu_ras_debugfs_create(struct 
> amdgpu_device *adev,
>  {
>       struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head);
>  
> -     if (!obj || !dir)
> +     if (!obj || !dir ||
> +         amdgpu_ras_test_marker(adev, &head->head, 
> AMDGPU_MARKER_RAS_DEBUGFS))
>               return;
>  
>       get_obj(obj);
> @@ -2008,6 +2009,8 @@ static void amdgpu_ras_debugfs_create(struct 
> amdgpu_device *adev,
>  
>       debugfs_create_file(obj->fs_data.debugfs_name, S_IWUGO | S_IRUGO, dir,
>                           obj, &amdgpu_ras_debugfs_ops);
> +
> +     amdgpu_ras_set_marker(adev, &head->head, AMDGPU_MARKER_RAS_DEBUGFS);
>  }
>  
>  static bool amdgpu_ras_aca_is_supported(struct amdgpu_device *adev)
> @@ -2136,7 +2139,9 @@ static int amdgpu_ras_fs_fini(struct amdgpu_device 
> *adev)
>       if (IS_ENABLED(CONFIG_DEBUG_FS)) {
>               list_for_each_entry_safe(con_obj, tmp, &con->head, node) {
>                       ip_obj = amdgpu_ras_find_obj(adev, &con_obj->head);
> -                     if (ip_obj)
> +                     if (ip_obj &&
> +                         amdgpu_ras_test_and_clear_marker(adev, 
> &ip_obj->head,
> +                                                          
> AMDGPU_MARKER_RAS_DEBUGFS))
>                               put_obj(ip_obj);
>               }
>       }

Reply via email to