GFX v11_0_3 ras needs to be enabled if poison mode
is supported. Driver doesn't need issue an feature
enable call in gfx_v11_0 late init phase. The ras
late init call is already centralized to
amdgpu_ras_late_init.
In addition, move poison_mode check out of common
helper like amdgpu_ras_is_supported and
amdgpu_ras_is_feature_allowed ensure only GFX RAS
is enabled when poison mode is supported.

Signed-off-by: Hawking Zhang <hawking.zh...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 49 ++++++++-----------------
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 26 -------------
 2 files changed, 16 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index dd7cdc234d7e..35e70860d628 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -126,6 +126,7 @@ static bool amdgpu_ras_check_bad_page_unlock(struct 
amdgpu_ras *con,
                                uint64_t addr);
 static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
                                uint64_t addr);
+static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev);
 #ifdef CONFIG_X86_MCE_AMD
 static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev);
 struct mce_notifier_adev_list {
@@ -757,16 +758,6 @@ static int __amdgpu_ras_feature_enable(struct 
amdgpu_device *adev,
        return 0;
 }
 
-static int amdgpu_ras_check_feature_allowed(struct amdgpu_device *adev,
-               struct ras_common_if *head)
-{
-       if (amdgpu_ras_is_feature_allowed(adev, head) ||
-               amdgpu_ras_is_poison_mode_supported(adev))
-               return 1;
-       else
-               return 0;
-}
-
 /* wrapper of psp_ras_enable_features */
 int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
                struct ras_common_if *head, bool enable)
@@ -797,7 +788,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
        }
 
        /* Do not enable if it is not allowed. */
-       if (enable && !amdgpu_ras_check_feature_allowed(adev, head))
+       if (enable && !amdgpu_ras_is_feature_allowed(adev, head))
                goto out;
 
        /* Only enable ras feature operation handle on host side */
@@ -2420,9 +2411,9 @@ static bool amdgpu_ras_asic_supported(struct 
amdgpu_device *adev)
 }
 
 /*
- * this is workaround for vega20 workstation sku,
- * force enable gfx ras, ignore vbios gfx ras flag
- * due to GC EDC can not write
+ * Common helpers for device or IP specific RAS quirks including
+ * a). Enable gfx ras on D16406 or D36002 board
+ * b). Enable gfx ras in gfx_v11_0_3 if poison mode is supported
  */
 static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
 {
@@ -2431,10 +2422,16 @@ static void amdgpu_ras_get_quirks(struct amdgpu_device 
*adev)
        if (!ctx)
                return;
 
+       /* Enable gfx ras on specific board */
        if (strnstr(ctx->vbios_version, "D16406",
                    sizeof(ctx->vbios_version)) ||
-               strnstr(ctx->vbios_version, "D36002",
-                       sizeof(ctx->vbios_version)))
+           strnstr(ctx->vbios_version, "D36002",
+                   sizeof(ctx->vbios_version)))
+               adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
+
+       /* Enable gfx ras on gfx_v11_0_3 if poison mode is supported */
+       if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3) &&
+           amdgpu_ras_is_poison_mode_supported(adev))
                adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
 }
 
@@ -2502,6 +2499,8 @@ static void amdgpu_ras_check_supported(struct 
amdgpu_device *adev)
                                           1 << AMDGPU_RAS_BLOCK__MMHUB);
        }
 
+       amdgpu_ras_query_poison_mode(adev);
+
        amdgpu_ras_get_quirks(adev);
 
        /* hw_supported needs to be aligned with RAS block mask. */
@@ -2659,8 +2658,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
                        goto release_con;
        }
 
-       amdgpu_ras_query_poison_mode(adev);
-
        if (amdgpu_ras_fs_init(adev)) {
                r = -EINVAL;
                goto release_con;
@@ -3115,26 +3112,12 @@ int amdgpu_ras_set_context(struct amdgpu_device *adev, 
struct amdgpu_ras *ras_co
 int amdgpu_ras_is_supported(struct amdgpu_device *adev,
                unsigned int block)
 {
-       int ret = 0;
        struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
        if (block >= AMDGPU_RAS_BLOCK_COUNT)
                return 0;
 
-       ret = ras && (adev->ras_enabled & (1 << block));
-
-       /* For the special asic with mem ecc enabled but sram ecc
-        * not enabled, even if the ras block is not supported on
-        * .ras_enabled, if the asic supports poison mode and the
-        * ras block has ras configuration, it can be considered
-        * that the ras block supports ras function.
-        */
-       if (!ret &&
-           amdgpu_ras_is_poison_mode_supported(adev) &&
-           amdgpu_ras_get_ras_block(adev, block, 0))
-               ret = 1;
-
-       return ret;
+       return (ras && (adev->ras_enabled & (1 << block)));
 }
 
 int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 690e121d9dda..11e0c574b9f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -4650,26 +4650,6 @@ static int gfx_v11_0_early_init(void *handle)
        return gfx_v11_0_init_microcode(adev);
 }
 
-static int gfx_v11_0_ras_late_init(void *handle)
-{
-       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-       struct ras_common_if *gfx_common_if;
-       int ret;
-
-       gfx_common_if = kzalloc(sizeof(struct ras_common_if), GFP_KERNEL);
-       if (!gfx_common_if)
-               return -ENOMEM;
-
-       gfx_common_if->block = AMDGPU_RAS_BLOCK__GFX;
-
-       ret = amdgpu_ras_feature_enable(adev, gfx_common_if, true);
-       if (ret)
-               dev_warn(adev->dev, "Failed to enable gfx11 ras feature\n");
-
-       kfree(gfx_common_if);
-       return 0;
-}
-
 static int gfx_v11_0_late_init(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -4683,12 +4663,6 @@ static int gfx_v11_0_late_init(void *handle)
        if (r)
                return r;
 
-       if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) {
-               r = gfx_v11_0_ras_late_init(handle);
-               if (r)
-                       return r;
-       }
-
        return 0;
 }
 
-- 
2.17.1

Reply via email to