On 11/25/2021 4:26 PM, yipechai wrote:
Define an unified ras function pointers for each ip block to adapt.

Signed-off-by: yipechai <yipeng.c...@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 20 ++++++++++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 36 ++++++++++++-------------
  2 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 90f0db3b4f65..dc6c8130e2d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2739,3 +2739,23 @@ static void amdgpu_register_bad_pages_mca_notifier(void)
          }
  }
  #endif
+
+/* check if ras is supported on block, say, sdma, gfx */
+int amdgpu_ras_is_supported(struct amdgpu_device *adev,
+               unsigned int block)
+{
+       struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+       if (block >= AMDGPU_RAS_BLOCK_COUNT)
+               return 0;
+       return ras && (adev->ras_enabled & (1 << block));
+}
+
+int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
+{
+       struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+       if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
+               schedule_work(&ras->recovery_work);
+       return 0;
+}

These changes look unrelated. Maybe as another patch to move from .h file to .c file.

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index cdd0010a5389..4b7da40dd837 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -469,6 +469,19 @@ struct ras_debug_if {
        };
        int op;
  };
+
+struct amdgpu_ras_block_ops {
+       int (*ras_late_init)(struct amdgpu_device *adev);
+       void (*ras_fini)(struct amdgpu_device *adev);
+       int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
+       void  (*query_ras_error_count)(struct amdgpu_device *adev,void 
*ras_error_status);
+       void (*query_ras_error_status)(struct amdgpu_device *adev);
+       bool  (*query_ras_poison_mode)(struct amdgpu_device *adev);
+       void (*query_ras_error_address)(struct amdgpu_device *adev, void 
*ras_error_status);
+       void (*reset_ras_error_count)(struct amdgpu_device *adev);
+       void (*reset_ras_error_status)(struct amdgpu_device *adev);
+};
+

Generic comment - Since all the operations are consolidated under _ops, it makes sense to rename the <ip>_ras_funcs to <ip>_ras.

Ex: amdgpu_gfx_ras_funcs => amdgpu_gfx_ras, amdgpu_xgmi_ras_funcs => amdgpu_xgmi_ras and so forth.

In future, these ras blocks may have data members to keep IP specific ras data.

Thanks,
Lijo

  /* work flow
   * vbios
   * 1: ras feature enable (enabled by default)
@@ -486,16 +499,6 @@ struct ras_debug_if {
  #define amdgpu_ras_get_context(adev)          ((adev)->psp.ras_context.ras)
  #define amdgpu_ras_set_context(adev, ras_con) ((adev)->psp.ras_context.ras = 
(ras_con))
-/* check if ras is supported on block, say, sdma, gfx */
-static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
-               unsigned int block)
-{
-       struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
-
-       if (block >= AMDGPU_RAS_BLOCK_COUNT)
-               return 0;
-       return ras && (adev->ras_enabled & (1 << block));
-}
int amdgpu_ras_recovery_init(struct amdgpu_device *adev); @@ -512,15 +515,6 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev); -static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
-{
-       struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
-
-       if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
-               schedule_work(&ras->recovery_work);
-       return 0;
-}
-
  static inline enum ta_ras_block
  amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
        switch (block) {
@@ -652,4 +646,8 @@ const char *get_ras_block_str(struct ras_common_if 
*ras_block);
bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev); +int amdgpu_ras_is_supported(struct amdgpu_device *adev, unsigned int block);
+
+int amdgpu_ras_reset_gpu(struct amdgpu_device *adev);
+
  #endif

Reply via email to