[AMD Official Use Only - General] > -----Original Message----- > From: Yang, Philip <philip.y...@amd.com> > Sent: Friday, July 7, 2023 10:15 AM > To: amd-gfx@lists.freedesktop.org > Cc: Kuehling, Felix <felix.kuehl...@amd.com>; Joshi, Mukul > <mukul.jo...@amd.com>; Yang, Philip <philip.y...@amd.com> > Subject: [PATCH] drm/amdgpu: Increase IH soft ring size > > Retry faults are delegated to IH soft ring and then processed by deferred > worker. Current IH soft ring size PAGE_SIZE can store 128 entries, which may > overflow and drop retry faults, causes HW stucks because the retry fault is > not > recovered. > > Increase IH soft ring size to the same size as IH ring, define macro > IH_RING_SIZE to remove duplicate constant. > > Show warning message if IH soft ring overflows because this should not > happen any more. > > Signed-off-by: Philip Yang <philip.y...@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 8 ++++++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h | 4 ++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +- > drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 5 +++-- > drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 5 +++-- > drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 5 +++-- > drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 5 +++-- > 7 files changed, 21 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c > index fceb3b384955..51a0dbd2358a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c > @@ -138,6 +138,7 @@ void amdgpu_ih_ring_fini(struct amdgpu_device > *adev, struct amdgpu_ih_ring *ih) > /** > * amdgpu_ih_ring_write - write IV to the ring buffer > * > + * @adev: amdgpu_device pointer > * @ih: ih ring to write to > * @iv: the iv to write > * @num_dw: size of the iv in dw > @@ -145,8 +146,8 @@ void amdgpu_ih_ring_fini(struct amdgpu_device > *adev, struct amdgpu_ih_ring *ih) > * Writes an IV to the ring buffer using the CPU and increment the wptr. > * Used for testing and delegating IVs to a software ring. > */ > -void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, > - unsigned int num_dw) > +void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct > amdgpu_ih_ring *ih, > + const uint32_t *iv, unsigned int num_dw) > { > uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2; > unsigned int i; > @@ -161,6 +162,9 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring > *ih, const uint32_t *iv, > if (wptr != READ_ONCE(ih->rptr)) { > wmb(); > WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr)); > + } else { > + dev_warn(adev->dev, "IH soft ring buffer overflow 0x%X, > 0x%X\n", > + wptr, ih->rptr); > } > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h > index dd1c2eded6b9..a8cf67f1f011 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h > @@ -97,8 +97,8 @@ struct amdgpu_ih_funcs { int > amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, > unsigned ring_size, bool use_bus_addr); void > amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring > *ih); -void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t > *iv, > - unsigned int num_dw); > +void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct > amdgpu_ih_ring *ih, > + const uint32_t *iv, unsigned int num_dw); > int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev, > struct amdgpu_ih_ring *ih); > int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring > *ih); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c > index 5273decc5753..fa6d0adcec20 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c > @@ -493,7 +493,7 @@ void amdgpu_irq_delegate(struct amdgpu_device > *adev, > struct amdgpu_iv_entry *entry, > unsigned int num_dw) > { > - amdgpu_ih_ring_write(&adev->irq.ih_soft, entry->iv_entry, num_dw); > + amdgpu_ih_ring_write(adev, &adev->irq.ih_soft, entry->iv_entry, > +num_dw); > schedule_work(&adev->irq.ih_soft_work); > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c > b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c > index b02e1cef78a7..21d2e57cffe7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c > @@ -32,6 +32,7 @@ > #include "soc15_common.h" > #include "ih_v6_0.h" > > +#define IH_RING_SIZE (256 * 1024)
I would recommend moving IH_RING_SIZE to amdgpu_ih.h instead of duplicating in the .c files. The rest looks good to me. Regards, Mukul > #define MAX_REARM_RETRY 10 > > static void ih_v6_0_set_interrupt_funcs(struct amdgpu_device *adev); @@ - > 535,7 +536,7 @@ static int ih_v6_0_sw_init(void *handle) > * use bus address for ih ring by psp bl */ > use_bus_addr = > (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? > false : true; > - r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, > use_bus_addr); > + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, > +use_bus_addr); > if (r) > return r; > > @@ -548,7 +549,7 @@ static int ih_v6_0_sw_init(void *handle) > /* initialize ih control register offset */ > ih_v6_0_init_register_offset(adev); > > - r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); > + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_RING_SIZE, > true); > if (r) > return r; > > diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c > b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c > index eec13cb5bf75..df33db6fd07b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c > +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c > @@ -32,6 +32,7 @@ > #include "soc15_common.h" > #include "navi10_ih.h" > > +#define IH_RING_SIZE (256 * 1024) > #define MAX_REARM_RETRY 10 > > #define mmIH_CHICKEN_Sienna_Cichlid 0x018d > @@ -565,7 +566,7 @@ static int navi10_ih_sw_init(void *handle) > use_bus_addr = false; > else > use_bus_addr = true; > - r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, > use_bus_addr); > + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, > +use_bus_addr); > if (r) > return r; > > @@ -578,7 +579,7 @@ static int navi10_ih_sw_init(void *handle) > /* initialize ih control registers offset */ > navi10_ih_init_register_offset(adev); > > - r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); > + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_RING_SIZE, > true); > if (r) > return r; > > diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c > b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c > index 1e83db0c5438..c9b37983a18d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c > +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c > @@ -33,6 +33,7 @@ > #include "soc15_common.h" > #include "vega10_ih.h" > > +#define IH_RING_SIZE (256 * 1024) > #define MAX_REARM_RETRY 10 > > static void vega10_ih_set_interrupt_funcs(struct amdgpu_device *adev); @@ > -485,7 +486,7 @@ static int vega10_ih_sw_init(void *handle) > if (r) > return r; > > - r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, true); > + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, true); > if (r) > return r; > > @@ -510,7 +511,7 @@ static int vega10_ih_sw_init(void *handle) > /* initialize ih control registers offset */ > vega10_ih_init_register_offset(adev); > > - r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); > + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_RING_SIZE, > true); > if (r) > return r; > > diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c > b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c > index 4d719df376a7..06d4176e4c68 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c > +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c > @@ -33,6 +33,7 @@ > #include "soc15_common.h" > #include "vega20_ih.h" > > +#define IH_RING_SIZE (256 * 1024) > #define MAX_REARM_RETRY 10 > > #define mmIH_CHICKEN_ALDEBARAN 0x18d > @@ -539,7 +540,7 @@ static int vega20_ih_sw_init(void *handle) > (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2))) > use_bus_addr = false; > > - r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, > use_bus_addr); > + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, > +use_bus_addr); > if (r) > return r; > > @@ -565,7 +566,7 @@ static int vega20_ih_sw_init(void *handle) > /* initialize ih control registers offset */ > vega20_ih_init_register_offset(adev); > > - r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, > use_bus_addr); > + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_RING_SIZE, > +use_bus_addr); > if (r) > return r; > > -- > 2.35.1