From: "jesse.zh...@amd.com" <jesse.zh...@amd.com>

- Introduce a new function `sdma_v4_4_2_init_sysfs_reset_mask` to initialize 
the sysfs reset mask for SDMA.
- Move the initialization of the sysfs reset mask to the `late_init` stage to 
ensure that the SMU  initialization
     and capability setup are completed before checking the SDMA reset 
capability.
- Consolidate the logic for setting the supported reset types and initializing 
the sysfs reset mask into the new function.
- For IP versions 9.4.3 and 9.4.4, enable per-queue reset if the MEC firmware 
version is at least 0xb0 and PMFW supports queue reset.
- Add a TODO comment for future support of per-queue reset for IP version 9.4.5.

This change ensures that per-queue reset is only enabled when the MEC and PMFW 
support it.

Suggested-by: Jonathan Kim <jonathan....@amd.com>
Signed-off-by: Vitaly Prosyak <vitaly.pros...@amd.com>
Signed-off-by: Jesse Zhang <jesse.zh...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 55 ++++++++++++++++++++----
 1 file changed, 47 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 4fa688e00f5e..fd2884de2dc4 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -107,6 +107,7 @@ static void sdma_v4_4_2_set_vm_pte_funcs(struct 
amdgpu_device *adev);
 static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev);
 static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev);
 static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev);
+static int  sdma_v4_4_2_init_sysfs_reset_mask(struct amdgpu_device *adev);
 
 static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev,
                u32 instance, u32 offset)
@@ -1366,6 +1367,7 @@ static int sdma_v4_4_2_process_ras_data_cb(struct 
amdgpu_device *adev,
 static int sdma_v4_4_2_late_init(struct amdgpu_ip_block *ip_block)
 {
        struct amdgpu_device *adev = ip_block->adev;
+       int r;
 #if 0
        struct ras_ih_if ih_info = {
                .cb = sdma_v4_4_2_process_ras_data_cb,
@@ -1374,7 +1376,12 @@ static int sdma_v4_4_2_late_init(struct amdgpu_ip_block 
*ip_block)
        if (!amdgpu_persistent_edc_harvesting_supported(adev))
                amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA);
 
-       return 0;
+       /* The initialization is done in the late_init stage to ensure that the 
SMU
+        * initialization and capability setup are completed before we check 
the SDMA
+        * reset capability
+        */
+       r = sdma_v4_4_2_init_sysfs_reset_mask(adev);
+       return r;
 }
 
 static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block)
@@ -1481,10 +1488,6 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block 
*ip_block)
                }
        }
 
-       /* TODO: Add queue reset mask when FW fully supports it */
-       adev->sdma.supported_reset =
-               amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
-
        if (amdgpu_sdma_ras_sw_init(adev)) {
                dev_err(adev->dev, "fail to initialize sdma ras block\n");
                return -EINVAL;
@@ -1497,9 +1500,6 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block 
*ip_block)
        else
                DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
 
-       r = amdgpu_sdma_sysfs_reset_mask_init(adev);
-       if (r)
-               return r;
        /* Initialize guilty flags for GFX and PAGE queues */
        adev->sdma.gfx_guilty = false;
        adev->sdma.page_guilty = false;
@@ -2328,6 +2328,45 @@ static void sdma_v4_4_2_set_vm_pte_funcs(struct 
amdgpu_device *adev)
        adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
 }
 
+/**
+ * sdma_v4_4_2_init_sysfs_reset_mask - Initialize sysfs reset mask for SDMA
+ * @adev: Pointer to the AMDGPU device structure
+ *
+ * This function initializes the sysfs reset mask for SDMA and sets the 
supported
+ * reset types based on the IP version and firmware versions.
+ *
+ * Returns: 0 on success, or a negative error code on failure.
+ */
+static int sdma_v4_4_2_init_sysfs_reset_mask(struct amdgpu_device *adev)
+{
+       int r = 0;
+
+       /* Set the supported reset types */
+       adev->sdma.supported_reset =
+               amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+       /*
+        * the user queue relies on MEC fw and pmfw when the sdma queue do 
reset.
+        * it needs to check both of them at here to skip old mec and pmfw.
+        */
+       switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+       case IP_VERSION(9, 4, 3):
+       case IP_VERSION(9, 4, 4):
+               if ((adev->gfx.mec_fw_version >= 0xb0) && 
amdgpu_dpm_reset_sdma_is_supported(adev))
+                       adev->sdma.supported_reset |= 
AMDGPU_RESET_TYPE_PER_QUEUE;
+               break;
+       case IP_VERSION(9, 4, 5):
+               /*TODO: enable the queue reset flag until fw supported */
+       default:
+               break;
+       }
+
+       /* Initialize the sysfs reset mask */
+       r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+
+       return r;
+
+}
+
 const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = {
        .type = AMD_IP_BLOCK_TYPE_SDMA,
        .major = 4,
-- 
2.25.1

Reply via email to