[Public]

> -----Original Message-----
> From: Lazar, Lijo <lijo.la...@amd.com>
> Sent: Monday, March 17, 2025 10:19 AM
> To: Deucher, Alexander <alexander.deuc...@amd.com>; amd-
> g...@lists.freedesktop.org
> Subject: Re: [PATCH 2/2] drm/amdgpu/sdma: guilty tracking is per instance
>
>
>
> On 3/17/2025 7:17 PM, Alex Deucher wrote:
> > The gfx and page queues are per instance, so track them per instance.
> >
> > Signed-off-by: Alex Deucher <alexander.deuc...@amd.com>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  7 ++++---
> > drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 18 +++++++++++-------
> >  2 files changed, 15 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> > index 8e7e794ff136f..dc1a81c2f9af7 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> > @@ -65,6 +65,10 @@ struct amdgpu_sdma_instance {
> >     uint64_t                sdma_fw_gpu_addr;
> >     uint32_t                *sdma_fw_ptr;
> >     struct mutex            engine_reset_mutex;
> > +   /* track guilty state of GFX and PAGE queues */
> > +   bool                    gfx_guilty;
> > +   bool                    page_guilty;
> > +
> >  };
> >
> >  enum amdgpu_sdma_ras_memory_id {
> > @@ -127,9 +131,6 @@ struct amdgpu_sdma {
> >     uint32_t                *ip_dump;
> >     uint32_t                supported_reset;
> >     struct list_head        reset_callback_list;
> > -   /* track guilty state of GFX and PAGE queues */
> > -   bool gfx_guilty;
> > -   bool page_guilty;
> >  };
> >
> >  /*
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> > index 927db7a080f0a..c485b582a20f6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> > @@ -989,9 +989,10 @@ static int sdma_v4_4_2_inst_start(struct
> amdgpu_device *adev,
> >             uint32_t temp;
> >
> >             WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
> > -           sdma_v4_4_2_gfx_resume(adev, i, restore, adev->sdma.gfx_guilty);
> > +           sdma_v4_4_2_gfx_resume(adev, i, restore,
> > +adev->sdma.instance[i].gfx_guilty);
> >             if (adev->sdma.has_page_queue)
> > -                   sdma_v4_4_2_page_resume(adev, i, restore, adev-
> >sdma.page_guilty);
> > +                   sdma_v4_4_2_page_resume(adev, i, restore,
> > +                                           
> > adev->sdma.instance[i].page_guilty);
>
> I think passing the flag is no longer be required as the instance id is 
> passed already.

We still need to determine which queue needs to be reset and restored vs.just 
reset.

Alex


>
> Thanks,
> Lijo
>
> >
> >             /* set utc l1 enable flag always to 1 */
> >             temp = RREG32_SDMA(i, regSDMA_CNTL); @@ -1446,6
> +1447,10 @@ static
> > int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block)
> >
> >     for (i = 0; i < adev->sdma.num_instances; i++) {
> >             mutex_init(&adev->sdma.instance[i].engine_reset_mutex);
> > +           /* Initialize guilty flags for GFX and PAGE queues */
> > +           adev->sdma.instance[i].gfx_guilty = false;
> > +           adev->sdma.instance[i].page_guilty = false;
> > +
> >             ring = &adev->sdma.instance[i].ring;
> >             ring->ring_obj = NULL;
> >             ring->use_doorbell = true;
> > @@ -1507,9 +1512,6 @@ static int sdma_v4_4_2_sw_init(struct
> amdgpu_ip_block *ip_block)
> >     r = amdgpu_sdma_sysfs_reset_mask_init(adev);
> >     if (r)
> >             return r;
> > -   /* Initialize guilty flags for GFX and PAGE queues */
> > -   adev->sdma.gfx_guilty = false;
> > -   adev->sdma.page_guilty = false;
> >
> >     return r;
> >  }
> > @@ -1686,9 +1688,11 @@ static int sdma_v4_4_2_stop_queue(struct
> amdgpu_device *adev, uint32_t instance_
> >             return -EINVAL;
> >
> >     /* Check if this queue is the guilty one */
> > -   adev->sdma.gfx_guilty = sdma_v4_4_2_is_queue_selected(adev,
> instance_id, false);
> > +   adev->sdma.instance[instance_id].gfx_guilty =
> > +           sdma_v4_4_2_is_queue_selected(adev, instance_id, false);
> >     if (adev->sdma.has_page_queue)
> > -           adev->sdma.page_guilty = sdma_v4_4_2_is_queue_selected(adev,
> instance_id, true);
> > +           adev->sdma.instance[instance_id].page_guilty =
> > +                   sdma_v4_4_2_is_queue_selected(adev, instance_id, true);
> >
> >     /* Cache the rptr before reset, after the reset,
> >     * all of the registers will be reset to 0

Reply via email to