On Fri, Apr 11, 2025 at 4:07 AM Emily Deng <emily.d...@amd.com> wrote: > > For VF, it doesn't have the permission to clear overflow, clear the bit > by reset. > > Signed-off-by: Emily Deng <emily.d...@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 10 ++++++++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h | 1 + > drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 6 +++++- > drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 6 +++++- > 4 files changed, 19 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c > index 901f8b12c672..1c8a40d579c7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c > @@ -227,13 +227,19 @@ int amdgpu_ih_process(struct amdgpu_device *adev, > struct amdgpu_ih_ring *ih) > ih->rptr &= ih->ptr_mask; > } > > - amdgpu_ih_set_rptr(adev, ih); > + if (!ih->overflow) > + amdgpu_ih_set_rptr(adev, ih); > + > wake_up_all(&ih->wait_process); > > /* make sure wptr hasn't changed while processing */ > wptr = amdgpu_ih_get_wptr(adev, ih); > if (wptr != ih->rptr) > - goto restart_ih; > + if (!ih->overflow) > + goto restart_ih; > + > + if (ih->overflow) > + amdgpu_amdkfd_gpu_reset(adev);
This won't work if KFD is not compiled in your kernel config. Alex > > return IRQ_HANDLED; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h > index 7d4395a5d8ac..ff76f02d3e96 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h > @@ -72,6 +72,7 @@ struct amdgpu_ih_ring { > /* For waiting on IH processing at checkpoint. */ > wait_queue_head_t wait_process; > uint64_t processed_timestamp; > + bool overflow; > }; > > /* return true if time stamp t2 is after t1 with 48bit wrap around */ > diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c > b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c > index f8a485164437..8d3ae88b96a4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c > @@ -349,6 +349,7 @@ static int ih_v6_0_irq_init(struct amdgpu_device *adev) > if (ret) > return ret; > } > + ih[i]->overflow = false; > } > > /* update doorbell range for ih ring 0 */ > @@ -446,7 +447,10 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev, > wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr); > if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) > goto out; > - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); > + if (!amdgpu_sriov_vf(adev)) > + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); > + else > + ih->overflow = true; > > /* When a ring buffer overflow happen start parsing interrupt > * from the last not overwritten vector (wptr + 32). Hopefully > diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c > b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c > index e9e3b2ed4b7b..2ad209406d17 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c > +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c > @@ -350,6 +350,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) > if (ret) > return ret; > } > + ih[i]->overflow = false; > } > > if (!amdgpu_sriov_vf(adev)) > @@ -437,7 +438,10 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, > if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) > goto out; > > - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); > + if (!amdgpu_sriov_vf(adev)) > + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); > + else > + ih->overflow = true; > > /* When a ring buffer overflow happen start parsing interrupt > * from the last not overwritten vector (wptr + 32). Hopefully > -- > 2.34.1 >