Am 02.04.25 um 11:14 schrieb jesse.zh...@amd.com: > Since KFD no longer registers its own callbacks for SDMA resets, and only KGD > uses the reset mechanism, > we can simplify the SDMA reset flow by directly calling the ring's > `stop_queue` and `start_queue` functions. > This patch removes the dynamic callback mechanism and prepares for its > eventual deprecation. > > 1. **Remove Dynamic Callbacks**: > - The `pre_reset` and `post_reset` callback invocations in > `amdgpu_sdma_reset_engine` are removed. > - Instead, the ring's `stop_queue` and `start_queue` functions are called > directly during the reset process. > > 2. **Prepare for Deprecation of Dynamic Mechanism**: > - By removing the callback invocations, this patch prepares the codebase > for the eventual removal of the dynamic callback registration mechanism. > > v2: Update stop_queue/start_queue function paramters to use ring pointer > instead of device/instance(Christian) > > Signed-off-by: Jesse Zhang <jesse.zh...@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 34 +++--------------------- > drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 13 ++++----- > 3 files changed, 13 insertions(+), 36 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > index 615c3d5c5a8d..23ea221e26de 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > @@ -237,6 +237,8 @@ struct amdgpu_ring_funcs { > void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset); > void (*patch_de)(struct amdgpu_ring *ring, unsigned offset); > int (*reset)(struct amdgpu_ring *ring, unsigned int vmid); > + int (*stop_queue)(struct amdgpu_ring *ring); > + int (*start_queue)(struct amdgpu_ring *ring); > void (*emit_cleaner_shader)(struct amdgpu_ring *ring); > bool (*is_guilty)(struct amdgpu_ring *ring); > }; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > index 0a9893fee828..b51fe644940f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > @@ -558,16 +558,10 @@ void amdgpu_sdma_register_on_reset_callbacks(struct > amdgpu_device *adev, struct > * @adev: Pointer to the AMDGPU device > * @instance_id: ID of the SDMA engine instance to reset > * > - * This function performs the following steps: > - * 1. Calls all registered pre_reset callbacks to allow KFD and AMDGPU to > save their state. > - * 2. Resets the specified SDMA engine instance. > - * 3. Calls all registered post_reset callbacks to allow KFD and AMDGPU to > restore their state. > - * > * Returns: 0 on success, or a negative error code on failure. > */ > int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t > instance_id) > { > - struct sdma_on_reset_funcs *funcs; > int ret = 0; > struct amdgpu_sdma_instance *sdma_instance = > &adev->sdma.instance[instance_id]; > struct amdgpu_ring *gfx_ring = &sdma_instance->ring; > @@ -589,18 +583,8 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, > uint32_t instance_id) > page_sched_stopped = true; > } > > - /* Invoke all registered pre_reset callbacks */ > - list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) { > - if (funcs->pre_reset) { > - ret = funcs->pre_reset(adev, instance_id); > - if (ret) { > - dev_err(adev->dev, > - "beforeReset callback failed for instance %u: > %d\n", > - instance_id, ret); > - goto exit; > - } > - } > - } > + if (gfx_ring->funcs->stop_queue) > + gfx_ring->funcs->stop_queue(gfx_ring); > > /* Perform the SDMA reset for the specified instance */ > ret = amdgpu_dpm_reset_sdma(adev, 1 << instance_id); > @@ -609,18 +593,8 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, > uint32_t instance_id) > goto exit; > } > > - /* Invoke all registered post_reset callbacks */ > - list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) { > - if (funcs->post_reset) { > - ret = funcs->post_reset(adev, instance_id); > - if (ret) { > - dev_err(adev->dev, > - "afterReset callback failed for instance %u: > %d\n", > - instance_id, ret); > - goto exit; > - } > - } > - } > + if (gfx_ring->funcs->start_queue) > + gfx_ring->funcs->start_queue(gfx_ring); > > exit: > /* Restart the scheduler's work queue for the GFX and page rings > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > index 688a720bbbbd..a8330504692d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > @@ -1678,11 +1678,12 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring > *ring, unsigned int vmid) > return r; > } > > -static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t > instance_id) > +static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring) > { > u32 inst_mask; > uint64_t rptr; > - struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring; > + struct amdgpu_device *adev = ring->adev; > + u32 instance_id = GET_INST(SDMA0, ring->me); > > if (amdgpu_sriov_vf(adev)) > return -EINVAL; > @@ -1715,11 +1716,11 @@ static int sdma_v4_4_2_stop_queue(struct > amdgpu_device *adev, uint32_t instance_ > return 0; > } > > -static int sdma_v4_4_2_restore_queue(struct amdgpu_device *adev, uint32_t > instance_id) > +static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring) > { > int i; > u32 inst_mask; > - struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring; > + struct amdgpu_device *adev = ring->adev;
Just a nit pick for further code. The general coding style is to declare variables like "i" or "r" last. E.g. longest lines first and short lasts. It seems to be the exact opposite of what some people are used to. I honestly don't care much myself, but some upstream maintainers insist on that. Apart from that I only skimmed over the patches and Alex needs to take a closer look when he's back from vacation next week. Thanks, Christian. > > inst_mask = 1 << ring->me; > udelay(50); > @@ -1740,8 +1741,6 @@ static int sdma_v4_4_2_restore_queue(struct > amdgpu_device *adev, uint32_t instan > } > > static struct sdma_on_reset_funcs sdma_v4_4_2_engine_reset_funcs = { > - .pre_reset = sdma_v4_4_2_stop_queue, > - .post_reset = sdma_v4_4_2_restore_queue, > }; > > static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev) > @@ -2143,6 +2142,8 @@ static const struct amdgpu_ring_funcs > sdma_v4_4_2_ring_funcs = { > .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait, > .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, > .reset = sdma_v4_4_2_reset_queue, > + .stop_queue = sdma_v4_4_2_stop_queue, > + .start_queue = sdma_v4_4_2_restore_queue, > .is_guilty = sdma_v4_4_2_ring_is_guilty, > }; >