Since KFD no longer registers its own callbacks for SDMA resets, and only KGD uses the reset mechanism, we can simplify the SDMA reset flow by directly calling the ring's `stop_queue` and `start_queue` functions. This patch removes the dynamic callback mechanism and prepares for its eventual deprecation.
1. **Remove Dynamic Callbacks**: - The `pre_reset` and `post_reset` callback invocations in `amdgpu_sdma_reset_engine` are removed. - Instead, the ring's `stop_queue` and `start_queue` functions are called directly during the reset process. 2. **Prepare for Deprecation of Dynamic Mechanism**: - By removing the callback invocations, this patch prepares the codebase for the eventual removal of the dynamic callback registration mechanism. v2: Update stop_queue/start_queue function paramters to use ring pointer instead of device/instance(Christian) v3: The general coding style is to declare variables like "i" or "r" last. E.g. longest lines first and short lasts. (Chritian) v4: move stop_queue/start_queue to struct amdgpu_sdma_instance and rename them. (Alex) Signed-off-by: Jesse Zhang <jesse.zh...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 54 ++++++------------------ drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 7 ++- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 22 ++++++---- 3 files changed, 34 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 0a9893fee828..541b349e0310 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -558,20 +558,14 @@ void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct * @adev: Pointer to the AMDGPU device * @instance_id: ID of the SDMA engine instance to reset * - * This function performs the following steps: - * 1. Calls all registered pre_reset callbacks to allow KFD and AMDGPU to save their state. - * 2. Resets the specified SDMA engine instance. - * 3. Calls all registered post_reset callbacks to allow KFD and AMDGPU to restore their state. - * * Returns: 0 on success, or a negative error code on failure. */ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) { - struct sdma_on_reset_funcs *funcs; int ret = 0; struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id]; - struct amdgpu_ring *gfx_ring = &sdma_instance->ring; - struct amdgpu_ring *page_ring = &sdma_instance->page; + struct amdgpu_ring *sdma_gfx_ring = &sdma_instance->ring; + struct amdgpu_ring *sdma_page_ring = &sdma_instance->page; bool gfx_sched_stopped = false, page_sched_stopped = false; mutex_lock(&sdma_instance->engine_reset_mutex); @@ -579,28 +573,18 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) * This ensures that no new tasks are submitted to the queues while * the reset is in progress. */ - if (!amdgpu_ring_sched_ready(gfx_ring)) { - drm_sched_wqueue_stop(&gfx_ring->sched); + if (!amdgpu_ring_sched_ready(sdma_gfx_ring)) { + drm_sched_wqueue_stop(&sdma_gfx_ring->sched); gfx_sched_stopped = true; } - if (adev->sdma.has_page_queue && !amdgpu_ring_sched_ready(page_ring)) { - drm_sched_wqueue_stop(&page_ring->sched); + if (adev->sdma.has_page_queue && !amdgpu_ring_sched_ready(sdma_page_ring)) { + drm_sched_wqueue_stop(&sdma_page_ring->sched); page_sched_stopped = true; } - /* Invoke all registered pre_reset callbacks */ - list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) { - if (funcs->pre_reset) { - ret = funcs->pre_reset(adev, instance_id); - if (ret) { - dev_err(adev->dev, - "beforeReset callback failed for instance %u: %d\n", - instance_id, ret); - goto exit; - } - } - } + if (sdma_instance->funcs->stop_kernel_queue) + sdma_instance->funcs->stop_kernel_queue(sdma_gfx_ring); /* Perform the SDMA reset for the specified instance */ ret = amdgpu_dpm_reset_sdma(adev, 1 << instance_id); @@ -609,18 +593,8 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) goto exit; } - /* Invoke all registered post_reset callbacks */ - list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) { - if (funcs->post_reset) { - ret = funcs->post_reset(adev, instance_id); - if (ret) { - dev_err(adev->dev, - "afterReset callback failed for instance %u: %d\n", - instance_id, ret); - goto exit; - } - } - } + if (sdma_instance->funcs->start_kernel_queue) + sdma_instance->funcs->start_kernel_queue(sdma_gfx_ring); exit: /* Restart the scheduler's work queue for the GFX and page rings @@ -628,11 +602,11 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) * to be submitted to the queues after the reset is complete. */ if (!ret) { - if (gfx_sched_stopped && amdgpu_ring_sched_ready(gfx_ring)) { - drm_sched_wqueue_start(&gfx_ring->sched); + if (gfx_sched_stopped && amdgpu_ring_sched_ready(sdma_gfx_ring)) { + drm_sched_wqueue_start(&sdma_gfx_ring->sched); } - if (page_sched_stopped && amdgpu_ring_sched_ready(page_ring)) { - drm_sched_wqueue_start(&page_ring->sched); + if (page_sched_stopped && amdgpu_ring_sched_ready(sdma_page_ring)) { + drm_sched_wqueue_start(&sdma_page_ring->sched); } } mutex_unlock(&sdma_instance->engine_reset_mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 47d56fd0589f..620fd7663526 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -50,6 +50,11 @@ enum amdgpu_sdma_irq { #define NUM_SDMA(x) hweight32(x) +struct amdgpu_sdma_funcs { + int (*stop_kernel_queue)(struct amdgpu_ring *ring); + int (*start_kernel_queue)(struct amdgpu_ring *ring); +}; + struct amdgpu_sdma_instance { /* SDMA firmware */ const struct firmware *fw; @@ -68,7 +73,7 @@ struct amdgpu_sdma_instance { /* track guilty state of GFX and PAGE queues */ bool gfx_guilty; bool page_guilty; - + const struct amdgpu_sdma_funcs *funcs; }; enum amdgpu_sdma_ras_memory_id { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 688a720bbbbd..c663c63485f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -108,6 +108,8 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev); +static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring); +static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring); static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 offset) @@ -1333,6 +1335,11 @@ static bool sdma_v4_4_2_fw_support_paging_queue(struct amdgpu_device *adev) } } +static const struct amdgpu_sdma_funcs sdma_v4_4_2_sdma_funcs = { + .stop_kernel_queue = &sdma_v4_4_2_stop_queue, + .start_kernel_queue = &sdma_v4_4_2_restore_queue, +}; + static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1352,7 +1359,6 @@ static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block) sdma_v4_4_2_set_irq_funcs(adev); sdma_v4_4_2_set_ras_funcs(adev); sdma_v4_4_2_set_engine_reset_funcs(adev); - return 0; } @@ -1447,6 +1453,7 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) /* Initialize guilty flags for GFX and PAGE queues */ adev->sdma.instance[i].gfx_guilty = false; adev->sdma.instance[i].page_guilty = false; + adev->sdma.instance[i].funcs = &sdma_v4_4_2_sdma_funcs; ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; @@ -1678,11 +1685,12 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) return r; } -static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_id) +static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + u32 instance_id = GET_INST(SDMA0, ring->me); u32 inst_mask; uint64_t rptr; - struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring; if (amdgpu_sriov_vf(adev)) return -EINVAL; @@ -1715,11 +1723,11 @@ static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_ return 0; } -static int sdma_v4_4_2_restore_queue(struct amdgpu_device *adev, uint32_t instance_id) +static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring) { - int i; + struct amdgpu_device *adev = ring->adev; u32 inst_mask; - struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring; + int i; inst_mask = 1 << ring->me; udelay(50); @@ -1740,8 +1748,6 @@ static int sdma_v4_4_2_restore_queue(struct amdgpu_device *adev, uint32_t instan } static struct sdma_on_reset_funcs sdma_v4_4_2_engine_reset_funcs = { - .pre_reset = sdma_v4_4_2_stop_queue, - .post_reset = sdma_v4_4_2_restore_queue, }; static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev) -- 2.25.1