On Wed, Apr 2, 2025 at 5:28 AM jesse.zh...@amd.com <jesse.zh...@amd.com> wrote: > > Since KFD no longer registers its own callbacks for SDMA resets, and only KGD > uses the reset mechanism, > we can simplify the SDMA reset flow by directly calling the ring's > `stop_queue` and `start_queue` functions. > This patch removes the dynamic callback mechanism and prepares for its > eventual deprecation. > > 1. **Remove Dynamic Callbacks**: > - The `pre_reset` and `post_reset` callback invocations in > `amdgpu_sdma_reset_engine` are removed. > - Instead, the ring's `stop_queue` and `start_queue` functions are called > directly during the reset process. > > 2. **Prepare for Deprecation of Dynamic Mechanism**: > - By removing the callback invocations, this patch prepares the codebase > for the eventual removal of the dynamic callback registration mechanism. > > v2: Update stop_queue/start_queue function paramters to use ring pointer > instead of device/instance(Christian) > > Signed-off-by: Jesse Zhang <jesse.zh...@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 34 +++--------------------- > drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 13 ++++----- > 3 files changed, 13 insertions(+), 36 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > index 615c3d5c5a8d..23ea221e26de 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > @@ -237,6 +237,8 @@ struct amdgpu_ring_funcs { > void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset); > void (*patch_de)(struct amdgpu_ring *ring, unsigned offset); > int (*reset)(struct amdgpu_ring *ring, unsigned int vmid); > + int (*stop_queue)(struct amdgpu_ring *ring); > + int (*start_queue)(struct amdgpu_ring *ring);
Since this is specific to SDMA, maybe it would be cleaner to add these to struct amdgpu_sdma_instance. And if so, maybe rename it since it's specific to kernel queues. E.g., int (*stop_kernel_queue)(struct amdgpu_ring *ring); int (*start_kernel_queue)(struct amdgpu_ring *ring); Unless you think we may need it for other engines in the future which only support engine level soft resets. Alex > void (*emit_cleaner_shader)(struct amdgpu_ring *ring); > bool (*is_guilty)(struct amdgpu_ring *ring); > }; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > index 0a9893fee828..b51fe644940f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > @@ -558,16 +558,10 @@ void amdgpu_sdma_register_on_reset_callbacks(struct > amdgpu_device *adev, struct > * @adev: Pointer to the AMDGPU device > * @instance_id: ID of the SDMA engine instance to reset > * > - * This function performs the following steps: > - * 1. Calls all registered pre_reset callbacks to allow KFD and AMDGPU to > save their state. > - * 2. Resets the specified SDMA engine instance. > - * 3. Calls all registered post_reset callbacks to allow KFD and AMDGPU to > restore their state. > - * > * Returns: 0 on success, or a negative error code on failure. > */ > int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t > instance_id) > { > - struct sdma_on_reset_funcs *funcs; > int ret = 0; > struct amdgpu_sdma_instance *sdma_instance = > &adev->sdma.instance[instance_id]; > struct amdgpu_ring *gfx_ring = &sdma_instance->ring; > @@ -589,18 +583,8 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, > uint32_t instance_id) > page_sched_stopped = true; > } > > - /* Invoke all registered pre_reset callbacks */ > - list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) { > - if (funcs->pre_reset) { > - ret = funcs->pre_reset(adev, instance_id); > - if (ret) { > - dev_err(adev->dev, > - "beforeReset callback failed for instance %u: > %d\n", > - instance_id, ret); > - goto exit; > - } > - } > - } > + if (gfx_ring->funcs->stop_queue) > + gfx_ring->funcs->stop_queue(gfx_ring); > > /* Perform the SDMA reset for the specified instance */ > ret = amdgpu_dpm_reset_sdma(adev, 1 << instance_id); > @@ -609,18 +593,8 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, > uint32_t instance_id) > goto exit; > } > > - /* Invoke all registered post_reset callbacks */ > - list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) { > - if (funcs->post_reset) { > - ret = funcs->post_reset(adev, instance_id); > - if (ret) { > - dev_err(adev->dev, > - "afterReset callback failed for instance %u: > %d\n", > - instance_id, ret); > - goto exit; > - } > - } > - } > + if (gfx_ring->funcs->start_queue) > + gfx_ring->funcs->start_queue(gfx_ring); > > exit: > /* Restart the scheduler's work queue for the GFX and page rings > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > index 688a720bbbbd..a8330504692d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > @@ -1678,11 +1678,12 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring > *ring, unsigned int vmid) > return r; > } > > -static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t > instance_id) > +static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring) > { > u32 inst_mask; > uint64_t rptr; > - struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring; > + struct amdgpu_device *adev = ring->adev; > + u32 instance_id = GET_INST(SDMA0, ring->me); > > if (amdgpu_sriov_vf(adev)) > return -EINVAL; > @@ -1715,11 +1716,11 @@ static int sdma_v4_4_2_stop_queue(struct > amdgpu_device *adev, uint32_t instance_ > return 0; > } > > -static int sdma_v4_4_2_restore_queue(struct amdgpu_device *adev, uint32_t > instance_id) > +static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring) > { > int i; > u32 inst_mask; > - struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring; > + struct amdgpu_device *adev = ring->adev; > > inst_mask = 1 << ring->me; > udelay(50); > @@ -1740,8 +1741,6 @@ static int sdma_v4_4_2_restore_queue(struct > amdgpu_device *adev, uint32_t instan > } > > static struct sdma_on_reset_funcs sdma_v4_4_2_engine_reset_funcs = { > - .pre_reset = sdma_v4_4_2_stop_queue, > - .post_reset = sdma_v4_4_2_restore_queue, > }; > > static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev) > @@ -2143,6 +2142,8 @@ static const struct amdgpu_ring_funcs > sdma_v4_4_2_ring_funcs = { > .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait, > .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, > .reset = sdma_v4_4_2_reset_queue, > + .stop_queue = sdma_v4_4_2_stop_queue, > + .start_queue = sdma_v4_4_2_restore_queue, > .is_guilty = sdma_v4_4_2_ring_is_guilty, > }; > > -- > 2.25.1 >