On Mon, Mar 3, 2025 at 9:46 AM Alex Deucher <alexdeuc...@gmail.com> wrote:
>
> On Fri, Feb 21, 2025 at 8:38 AM Prike Liang <prike.li...@amd.com> wrote:
> >
> > Implement the kernel graphics queue pipe reset,and the driver
> > will fallback to pipe reset when the queue reset fails. However,
> > the ME FW hasn't fully supported pipe reset yet so disable the
> > KGQ pipe reset temporarily.
> >
> > Signed-off-by: Prike Liang <prike.li...@amd.com>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h |  2 +
> >  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c    | 71 ++++++++++++++++++++++-
> >  2 files changed, 71 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
> > index 4eedd92f000b..06fe21e15ed6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
> > @@ -25,6 +25,8 @@
> >
> >  #include "amdgpu_socbb.h"
> >
> > +#define RS64_FW_UC_START_ADDR_LO 0x3000
> > +
> >  struct common_firmware_header {
> >         uint32_t size_bytes; /* size of the entire header+image(s) in bytes 
> > */
> >         uint32_t header_size_bytes; /* size of just the header in bytes */
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > index 2c7f0bb242ff..7e53c0b63f88 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > @@ -6653,6 +6653,68 @@ static void gfx_v11_0_emit_mem_sync(struct 
> > amdgpu_ring *ring)
> >         amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
> >  }
> >
> > +static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev)
> > +{
> > +       /* Disable the pipe reset until the CPFW fully support it.*/
> > +       dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset 
> > yet.\n");
>
> I'd drop these or make them debug only for now.  Same for gfx12.  With
> that fixed, the series is:
> Acked-by: Alex Deucher <alexander.deuc...@amd.com>

Just in case you missed my reply.  See above ^^^^

Alex

>
>
> > +       return false;
> > +}
> > +
> > +
> > +static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring)
> > +{
> > +       struct amdgpu_device *adev = ring->adev;
> > +       uint32_t reset_pipe = 0, clean_pipe = 0;
> > +       int r;
> > +
> > +       if (!gfx_v11_pipe_reset_support(adev))
> > +               return -EOPNOTSUPP;
> > +
> > +       gfx_v11_0_set_safe_mode(adev, 0);
> > +       mutex_lock(&adev->srbm_mutex);
> > +       soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> > +
> > +       switch (ring->pipe) {
> > +       case 0:
> > +               reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> > +                                          PFP_PIPE0_RESET, 1);
> > +               reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> > +                                          ME_PIPE0_RESET, 1);
> > +               clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> > +                                          PFP_PIPE0_RESET, 0);
> > +               clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> > +                                          ME_PIPE0_RESET, 0);
> > +               break;
> > +       case 1:
> > +               reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> > +                                          PFP_PIPE1_RESET, 1);
> > +               reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> > +                                          ME_PIPE1_RESET, 1);
> > +               clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> > +                                          PFP_PIPE1_RESET, 0);
> > +               clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> > +                                          ME_PIPE1_RESET, 0);
> > +               break;
> > +       default:
> > +               break;
> > +       }
> > +
> > +       WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
> > +       WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
> > +
> > +       r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 
> > 2) - RS64_FW_UC_START_ADDR_LO;
> > +       soc21_grbm_select(adev, 0, 0, 0, 0);
> > +       mutex_unlock(&adev->srbm_mutex);
> > +       gfx_v11_0_unset_safe_mode(adev, 0);
> > +
> > +       dev_info(adev->dev,"The ring %s pipe reset to the ME firmware start 
> > PC: %s\n", ring->name,
> > +                       r == 0 ? "successfuly" : "failed");
> > +       /* FIXME: Sometimes driver can't cache the ME firmware start PC 
> > correctly, so the pipe reset status
> > +        * relies on the later gfx ring test result.
> > +        */
> > +       return 0;
> > +}
> > +
> >  static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
> >  {
> >         struct amdgpu_device *adev = ring->adev;
> > @@ -6662,8 +6724,13 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring 
> > *ring, unsigned int vmid)
> >                 return -EINVAL;
> >
> >         r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
> > -       if (r)
> > -               return r;
> > +       if (r) {
> > +
> > +               dev_warn(adev->dev,"reset via MES failed and try pipe reset 
> > %d\n", r);
> > +               r = gfx_v11_reset_gfx_pipe(ring);
> > +               if (r)
> > +                       return r;
> > +       }
> >
> >         r = amdgpu_bo_reserve(ring->mqd_obj, false);
> >         if (unlikely(r != 0)) {
> > --
> > 2.34.1
> >

Reply via email to