On 1/5/2024 4:22 AM, Jacek Lawrynowicz wrote:
Limit number of iterations in ivpu_mmu_irq_evtq_handler() and
ivpu_ipc_irq_handler().

"potential infinite loops" sounds like something that has not been observed. Has a problem actually occurred?

Are you concerned that the FW is broken and spamming Linux with events?

Why a limit of 100 events?  Seems arbitrary.

I suspect threaded irqs might be useful here, but it is hard to tell.


Signed-off-by: Jacek Lawrynowicz <jacek.lawrynow...@linux.intel.com>
---
  drivers/accel/ivpu/ivpu_ipc.c |  6 ++++++
  drivers/accel/ivpu/ivpu_mmu.c | 21 +++++++++++++--------
  2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index e86621f16f85..f69780248803 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -389,12 +389,18 @@ void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool 
*wake_thread)
        unsigned long flags;
        bool dispatched;
        u32 vpu_addr;
+       int msg_count = 0;
/*
         * Driver needs to purge all messages from IPC FIFO to clear IPC 
interrupt.
         * Without purge IPC FIFO to 0 next IPC interrupts won't be generated.
         */
        while (ivpu_hw_reg_ipc_rx_count_get(vdev)) {
+               if (++msg_count > IPC_MAX_RX_MSG) {
+                       ivpu_pm_schedule_recovery(vdev);
+                       return;
+               }
+
                vpu_addr = ivpu_hw_reg_ipc_rx_addr_get(vdev);
                if (vpu_addr == REG_IO_ERROR) {
                        ivpu_err_ratelimited(vdev, "Failed to read IPC rx addr 
register\n");
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index 1f813625aab3..c82929b0ae9d 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -236,6 +236,8 @@
  #define IVPU_MMU_CERROR_ABT          0x2
  #define IVPU_MMU_CERROR_ATC_INV_SYNC 0x3
+#define IVPU_MMU_MAX_EVENT_COUNT 100
+
  static const char *ivpu_mmu_event_to_str(u32 cmd)
  {
        switch (cmd) {
@@ -887,7 +889,7 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev)
void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
  {
-       bool schedule_recovery = false;
+       int event_count = 0;
        u32 *event;
        u32 ssid;
@@ -895,16 +897,19 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev) while ((event = ivpu_mmu_get_event(vdev)) != NULL) {
                ivpu_mmu_dump_event(vdev, event);
+               if (++event_count > IVPU_MMU_MAX_EVENT_COUNT) {
+                       ivpu_pm_schedule_recovery(vdev);
+                       return;
+               }
ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]);
-               if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID)
-                       schedule_recovery = true;
-               else
-                       ivpu_mmu_user_context_mark_invalid(vdev, ssid);
-       }
+               if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) {
+                       ivpu_pm_schedule_recovery(vdev);
+                       return;
+               }
- if (schedule_recovery)
-               ivpu_pm_schedule_recovery(vdev);
+               ivpu_mmu_user_context_mark_invalid(vdev, ssid);
+       }
  }
void ivpu_mmu_evtq_dump(struct ivpu_device *vdev)

Reply via email to