Since KFD no longer registers its own callbacks for SDMA resets, and only KGD 
uses the reset mechanism,
we can simplify the SDMA reset flow by directly calling the ring's `stop_queue` 
and `start_queue` functions.
This patch removes the dynamic callback mechanism and prepares for its eventual 
deprecation.

1. **Remove Dynamic Callbacks**:
   - The `pre_reset` and `post_reset` callback invocations in 
`amdgpu_sdma_reset_engine` are removed.
   - Instead, the ring's `stop_queue` and `start_queue` functions are called 
directly during the reset process.

2. **Prepare for Deprecation of Dynamic Mechanism**:
   - By removing the callback invocations, this patch prepares the codebase for 
the eventual removal of the dynamic callback registration mechanism.

v2: Update stop_queue/start_queue function paramters to use ring pointer 
instead of device/instance(Christian)
v3: The general coding style is to declare variables like "i" or "r" last. E.g. 
longest lines first and short lasts. (Chritian)
v4: move stop_queue/start_queue to struct amdgpu_sdma_instance and rename them. 
(Alex)

Signed-off-by: Jesse Zhang <jesse.zh...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 54 ++++++------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  7 ++-
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 22 ++++++----
 3 files changed, 34 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 0a9893fee828..541b349e0310 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -558,20 +558,14 @@ void amdgpu_sdma_register_on_reset_callbacks(struct 
amdgpu_device *adev, struct
  * @adev: Pointer to the AMDGPU device
  * @instance_id: ID of the SDMA engine instance to reset
  *
- * This function performs the following steps:
- * 1. Calls all registered pre_reset callbacks to allow KFD and AMDGPU to save 
their state.
- * 2. Resets the specified SDMA engine instance.
- * 3. Calls all registered post_reset callbacks to allow KFD and AMDGPU to 
restore their state.
- *
  * Returns: 0 on success, or a negative error code on failure.
  */
 int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
 {
-       struct sdma_on_reset_funcs *funcs;
        int ret = 0;
        struct amdgpu_sdma_instance *sdma_instance = 
&adev->sdma.instance[instance_id];
-       struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
-       struct amdgpu_ring *page_ring = &sdma_instance->page;
+       struct amdgpu_ring *sdma_gfx_ring = &sdma_instance->ring;
+       struct amdgpu_ring *sdma_page_ring = &sdma_instance->page;
        bool gfx_sched_stopped = false, page_sched_stopped = false;
 
        mutex_lock(&sdma_instance->engine_reset_mutex);
@@ -579,28 +573,18 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, 
uint32_t instance_id)
        * This ensures that no new tasks are submitted to the queues while
        * the reset is in progress.
        */
-       if (!amdgpu_ring_sched_ready(gfx_ring)) {
-               drm_sched_wqueue_stop(&gfx_ring->sched);
+       if (!amdgpu_ring_sched_ready(sdma_gfx_ring)) {
+               drm_sched_wqueue_stop(&sdma_gfx_ring->sched);
                gfx_sched_stopped = true;
        }
 
-       if (adev->sdma.has_page_queue && !amdgpu_ring_sched_ready(page_ring)) {
-               drm_sched_wqueue_stop(&page_ring->sched);
+       if (adev->sdma.has_page_queue && 
!amdgpu_ring_sched_ready(sdma_page_ring)) {
+               drm_sched_wqueue_stop(&sdma_page_ring->sched);
                page_sched_stopped = true;
        }
 
-       /* Invoke all registered pre_reset callbacks */
-       list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) {
-               if (funcs->pre_reset) {
-                       ret = funcs->pre_reset(adev, instance_id);
-                       if (ret) {
-                               dev_err(adev->dev,
-                               "beforeReset callback failed for instance %u: 
%d\n",
-                                       instance_id, ret);
-                               goto exit;
-                       }
-               }
-       }
+       if (sdma_instance->funcs->stop_kernel_queue)
+               sdma_instance->funcs->stop_kernel_queue(sdma_gfx_ring);
 
        /* Perform the SDMA reset for the specified instance */
        ret = amdgpu_dpm_reset_sdma(adev, 1 << instance_id);
@@ -609,18 +593,8 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, 
uint32_t instance_id)
                goto exit;
        }
 
-       /* Invoke all registered post_reset callbacks */
-       list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) {
-               if (funcs->post_reset) {
-                       ret = funcs->post_reset(adev, instance_id);
-                       if (ret) {
-                               dev_err(adev->dev,
-                               "afterReset callback failed for instance %u: 
%d\n",
-                                       instance_id, ret);
-                               goto exit;
-                       }
-               }
-       }
+       if (sdma_instance->funcs->start_kernel_queue)
+               sdma_instance->funcs->start_kernel_queue(sdma_gfx_ring);
 
 exit:
        /* Restart the scheduler's work queue for the GFX and page rings
@@ -628,11 +602,11 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, 
uint32_t instance_id)
         * to be submitted to the queues after the reset is complete.
         */
        if (!ret) {
-               if (gfx_sched_stopped && amdgpu_ring_sched_ready(gfx_ring)) {
-                       drm_sched_wqueue_start(&gfx_ring->sched);
+               if (gfx_sched_stopped && 
amdgpu_ring_sched_ready(sdma_gfx_ring)) {
+                       drm_sched_wqueue_start(&sdma_gfx_ring->sched);
                }
-               if (page_sched_stopped && amdgpu_ring_sched_ready(page_ring)) {
-                       drm_sched_wqueue_start(&page_ring->sched);
+               if (page_sched_stopped && 
amdgpu_ring_sched_ready(sdma_page_ring)) {
+                       drm_sched_wqueue_start(&sdma_page_ring->sched);
                }
        }
        mutex_unlock(&sdma_instance->engine_reset_mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 47d56fd0589f..620fd7663526 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -50,6 +50,11 @@ enum amdgpu_sdma_irq {
 
 #define NUM_SDMA(x) hweight32(x)
 
+struct amdgpu_sdma_funcs {
+       int (*stop_kernel_queue)(struct amdgpu_ring *ring);
+       int (*start_kernel_queue)(struct amdgpu_ring *ring);
+};
+
 struct amdgpu_sdma_instance {
        /* SDMA firmware */
        const struct firmware   *fw;
@@ -68,7 +73,7 @@ struct amdgpu_sdma_instance {
        /* track guilty state of GFX and PAGE queues */
        bool                    gfx_guilty;
        bool                    page_guilty;
-
+       const struct amdgpu_sdma_funcs   *funcs;
 };
 
 enum amdgpu_sdma_ras_memory_id {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 688a720bbbbd..c663c63485f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -108,6 +108,8 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device 
*adev);
 static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev);
 static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev);
 static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev);
+static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring);
+static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring);
 
 static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev,
                u32 instance, u32 offset)
@@ -1333,6 +1335,11 @@ static bool sdma_v4_4_2_fw_support_paging_queue(struct 
amdgpu_device *adev)
        }
 }
 
+static const struct amdgpu_sdma_funcs sdma_v4_4_2_sdma_funcs = {
+       .stop_kernel_queue = &sdma_v4_4_2_stop_queue,
+       .start_kernel_queue = &sdma_v4_4_2_restore_queue,
+};
+
 static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block)
 {
        struct amdgpu_device *adev = ip_block->adev;
@@ -1352,7 +1359,6 @@ static int sdma_v4_4_2_early_init(struct amdgpu_ip_block 
*ip_block)
        sdma_v4_4_2_set_irq_funcs(adev);
        sdma_v4_4_2_set_ras_funcs(adev);
        sdma_v4_4_2_set_engine_reset_funcs(adev);
-
        return 0;
 }
 
@@ -1447,6 +1453,7 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block 
*ip_block)
                /* Initialize guilty flags for GFX and PAGE queues */
                adev->sdma.instance[i].gfx_guilty = false;
                adev->sdma.instance[i].page_guilty = false;
+               adev->sdma.instance[i].funcs = &sdma_v4_4_2_sdma_funcs;
 
                ring = &adev->sdma.instance[i].ring;
                ring->ring_obj = NULL;
@@ -1678,11 +1685,12 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring 
*ring, unsigned int vmid)
        return r;
 }
 
-static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t 
instance_id)
+static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring)
 {
+       struct amdgpu_device *adev = ring->adev;
+       u32 instance_id = GET_INST(SDMA0, ring->me);
        u32 inst_mask;
        uint64_t rptr;
-       struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring;
 
        if (amdgpu_sriov_vf(adev))
                return -EINVAL;
@@ -1715,11 +1723,11 @@ static int sdma_v4_4_2_stop_queue(struct amdgpu_device 
*adev, uint32_t instance_
        return 0;
 }
 
-static int sdma_v4_4_2_restore_queue(struct amdgpu_device *adev, uint32_t 
instance_id)
+static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring)
 {
-       int i;
+       struct amdgpu_device *adev = ring->adev;
        u32 inst_mask;
-       struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring;
+       int i;
 
        inst_mask = 1 << ring->me;
        udelay(50);
@@ -1740,8 +1748,6 @@ static int sdma_v4_4_2_restore_queue(struct amdgpu_device 
*adev, uint32_t instan
 }
 
 static struct sdma_on_reset_funcs sdma_v4_4_2_engine_reset_funcs = {
-       .pre_reset = sdma_v4_4_2_stop_queue,
-       .post_reset = sdma_v4_4_2_restore_queue,
 };
 
 static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev)
-- 
2.25.1

Reply via email to