If the IH ring buffer overflows, it's possible that fence signal events
were lost. Check each ring for progress to prevent job timeouts/GPU
hangs due to the fences staying unsignaled despite the work being done.

Cc: Joshua Ashton <jos...@froggi.es>
Cc: Alex Deucher <alexander.deuc...@amd.com>
Cc: sta...@vger.kernel.org

Signed-off-by: Friedrich Vock <friedrich.v...@gmx.de>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index f3b0aaf3ebc6..2a246db1d3a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -209,6 +209,7 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct 
amdgpu_ih_ring *ih)
 {
        unsigned int count;
        u32 wptr;
+       int i;

        if (!ih->enabled || adev->shutdown)
                return IRQ_NONE;
@@ -227,6 +228,20 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct 
amdgpu_ih_ring *ih)
                ih->rptr &= ih->ptr_mask;
        }

+       /* If the ring buffer overflowed, we might have lost some fence
+        * signal interrupts. Check if there was any activity so the signal
+        * doesn't get lost.
+        */
+       if (ih->overflow) {
+               for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+                       struct amdgpu_ring *ring = adev->rings[i];
+
+                       if (!ring || !ring->fence_drv.initialized)
+                               continue;
+                       amdgpu_fence_process(ring);
+               }
+       }
+
        amdgpu_ih_set_rptr(adev, ih);
        wake_up_all(&ih->wait_process);

--
2.43.0

Reply via email to