On Mon, Mar 3, 2025 at 9:46 AM Alex Deucher <alexdeuc...@gmail.com> wrote: > > On Fri, Feb 21, 2025 at 8:38 AM Prike Liang <prike.li...@amd.com> wrote: > > > > Implement the kernel graphics queue pipe reset,and the driver > > will fallback to pipe reset when the queue reset fails. However, > > the ME FW hasn't fully supported pipe reset yet so disable the > > KGQ pipe reset temporarily. > > > > Signed-off-by: Prike Liang <prike.li...@amd.com> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 2 + > > drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 71 ++++++++++++++++++++++- > > 2 files changed, 71 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h > > index 4eedd92f000b..06fe21e15ed6 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h > > @@ -25,6 +25,8 @@ > > > > #include "amdgpu_socbb.h" > > > > +#define RS64_FW_UC_START_ADDR_LO 0x3000 > > + > > struct common_firmware_header { > > uint32_t size_bytes; /* size of the entire header+image(s) in bytes > > */ > > uint32_t header_size_bytes; /* size of just the header in bytes */ > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > > b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > > index 2c7f0bb242ff..7e53c0b63f88 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > > @@ -6653,6 +6653,68 @@ static void gfx_v11_0_emit_mem_sync(struct > > amdgpu_ring *ring) > > amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ > > } > > > > +static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) > > +{ > > + /* Disable the pipe reset until the CPFW fully support it.*/ > > + dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset > > yet.\n"); > > I'd drop these or make them debug only for now. Same for gfx12. With > that fixed, the series is: > Acked-by: Alex Deucher <alexander.deuc...@amd.com>
Just in case you missed my reply. See above ^^^^ Alex > > > > + return false; > > +} > > + > > + > > +static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) > > +{ > > + struct amdgpu_device *adev = ring->adev; > > + uint32_t reset_pipe = 0, clean_pipe = 0; > > + int r; > > + > > + if (!gfx_v11_pipe_reset_support(adev)) > > + return -EOPNOTSUPP; > > + > > + gfx_v11_0_set_safe_mode(adev, 0); > > + mutex_lock(&adev->srbm_mutex); > > + soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); > > + > > + switch (ring->pipe) { > > + case 0: > > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, > > + PFP_PIPE0_RESET, 1); > > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, > > + ME_PIPE0_RESET, 1); > > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, > > + PFP_PIPE0_RESET, 0); > > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, > > + ME_PIPE0_RESET, 0); > > + break; > > + case 1: > > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, > > + PFP_PIPE1_RESET, 1); > > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, > > + ME_PIPE1_RESET, 1); > > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, > > + PFP_PIPE1_RESET, 0); > > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, > > + ME_PIPE1_RESET, 0); > > + break; > > + default: > > + break; > > + } > > + > > + WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); > > + WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); > > + > > + r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << > > 2) - RS64_FW_UC_START_ADDR_LO; > > + soc21_grbm_select(adev, 0, 0, 0, 0); > > + mutex_unlock(&adev->srbm_mutex); > > + gfx_v11_0_unset_safe_mode(adev, 0); > > + > > + dev_info(adev->dev,"The ring %s pipe reset to the ME firmware start > > PC: %s\n", ring->name, > > + r == 0 ? "successfuly" : "failed"); > > + /* FIXME: Sometimes driver can't cache the ME firmware start PC > > correctly, so the pipe reset status > > + * relies on the later gfx ring test result. > > + */ > > + return 0; > > +} > > + > > static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) > > { > > struct amdgpu_device *adev = ring->adev; > > @@ -6662,8 +6724,13 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring > > *ring, unsigned int vmid) > > return -EINVAL; > > > > r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); > > - if (r) > > - return r; > > + if (r) { > > + > > + dev_warn(adev->dev,"reset via MES failed and try pipe reset > > %d\n", r); > > + r = gfx_v11_reset_gfx_pipe(ring); > > + if (r) > > + return r; > > + } > > > > r = amdgpu_bo_reserve(ring->mqd_obj, false); > > if (unlikely(r != 0)) { > > -- > > 2.34.1 > >