On 2025/04/11 13:31, Nicholas Piggin wrote:
Interrupt throttling is broken in several ways:
- Timer expiry sends an interrupt even if there is no cause.
- (e1000e) Mitigated interrupts still auto-clear cause bits.
- Timer expiry that results in an interrupt does not re-arm the timer so
an interrupt can appear immediately after the timer expiry interrupt.
To fix:
- When the throttle timer expires, check the cause bits corresponding to
the msix vector before sending an irq.
- (e1000e) Skip the auto-clear logic if an interrupt is delayed, and
send delayed irqs using e1000e_msix_notify() to perform auto-clear.
- Re-load the throttle timer when a delayed interrupt is signaled. e1000e
gets this by signaling them with e1000e_msix_notify(), igb calls
igb_intrmgr_rearm_timer() directly.
Please split this patch into independent changes.
Signed-off-by: Nicholas Piggin <npig...@gmail.com>
---
hw/net/e1000e_core.c | 59 +++++++++++++++++++++++++++++++++++++++-----
hw/net/igb_core.c | 50 ++++++++++++++++++++++++-------------
2 files changed, 86 insertions(+), 23 deletions(-)
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index f8e6522f810..6fb8da32e4d 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -178,16 +178,62 @@ e1000e_intrmgr_on_throttling_timer(void *opaque)
}
}
+static uint32_t find_msix_causes(E1000ECore *core, int vec)
+{
+ uint32_t causes = 0;
+ uint32_t int_cfg;
+
+ int_cfg = E1000_IVAR_RXQ0(core->mac[IVAR]);
+ if (E1000_IVAR_ENTRY_VALID(int_cfg) &&
+ E1000_IVAR_ENTRY_VEC(int_cfg) == vec) {
+ causes |= E1000_ICR_RXQ0;
+ }
+
+ int_cfg = E1000_IVAR_RXQ1(core->mac[IVAR]);
+ if (E1000_IVAR_ENTRY_VALID(int_cfg) &&
+ E1000_IVAR_ENTRY_VEC(int_cfg) == vec) {
+ causes |= E1000_ICR_RXQ1;
+ }
+
+ int_cfg = E1000_IVAR_TXQ0(core->mac[IVAR]);
+ if (E1000_IVAR_ENTRY_VALID(int_cfg) &&
+ E1000_IVAR_ENTRY_VEC(int_cfg) == vec) {
+ causes |= E1000_ICR_TXQ0;
+ }
+
+ int_cfg = E1000_IVAR_TXQ1(core->mac[IVAR]);
+ if (E1000_IVAR_ENTRY_VALID(int_cfg) &&
+ E1000_IVAR_ENTRY_VEC(int_cfg) == vec) {
+ causes |= E1000_ICR_TXQ1;
+ }
+
+ int_cfg = E1000_IVAR_OTHER(core->mac[IVAR]);
+ if (E1000_IVAR_ENTRY_VALID(int_cfg) &&
+ E1000_IVAR_ENTRY_VEC(int_cfg) == vec) {
+ causes |= E1000_ICR_OTHER;
+ }
+
+ return causes;
+}
+
+static void
+e1000e_msix_notify(E1000ECore *core, uint32_t causes);
+
static void
e1000e_intrmgr_on_msix_throttling_timer(void *opaque)
{
E1000IntrDelayTimer *timer = opaque;
- int idx = timer - &timer->core->eitr[0];
+ E1000ECore *core = timer->core;
+ int idx = timer - &core->eitr[0];
+ uint32_t causes;
timer->running = false;
- trace_e1000e_irq_msix_notify_postponed_vec(idx);
- msix_notify(timer->core->owner, idx);
+ causes = find_msix_causes(core, idx) & core->mac[IMS] & core->mac[ICR];
+ if (causes) {
+ trace_e1000e_irq_msix_notify_postponed_vec(idx);
+ e1000e_msix_notify(core, causes);
+ }
}
static void
@@ -1992,10 +2038,11 @@ e1000e_msix_notify_one(E1000ECore *core, uint32_t
cause, uint32_t int_cfg)
if (E1000_IVAR_ENTRY_VALID(int_cfg)) {
uint32_t vec = E1000_IVAR_ENTRY_VEC(int_cfg);
if (vec < E1000E_MSIX_VEC_NUM) {
- if (!e1000e_eitr_should_postpone(core, vec)) {
- trace_e1000e_irq_msix_notify_vec(vec);
- msix_notify(core->owner, vec);
+ if (e1000e_eitr_should_postpone(core, vec)) {
+ return;
}
+ trace_e1000e_irq_msix_notify_vec(vec);
+ msix_notify(core->owner, vec);
} else {
trace_e1000e_wrn_msix_vec_wrong(cause, int_cfg);
}
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 3ae3e53530b..cc25a1d5baa 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -152,11 +152,14 @@ igb_intrmgr_arm_timer(IGBIntrDelayTimer *timer, int64_t
delay_ns)
static inline void
igb_intrmgr_rearm_timer(IGBIntrDelayTimer *timer)
{
- uint32_t interval = (timer->core->mac[timer->delay_reg] &
- E1000_EITR_INTERVAL) >> 2;
- int64_t delay_ns = (int64_t)interval * timer->delay_resolution_ns;
+ uint32_t eitr = timer->core->mac[timer->delay_reg];
- igb_intrmgr_arm_timer(timer, delay_ns);
+ if (eitr != 0) {
+ uint32_t interval = (eitr & E1000_EITR_INTERVAL) >> 2;
+ int64_t delay_ns = (int64_t)interval * timer->delay_resolution_ns;
+
+ igb_intrmgr_arm_timer(timer, delay_ns);
+ }
}
static void
@@ -168,16 +171,7 @@ igb_intmgr_timer_resume(IGBIntrDelayTimer *timer)
}
static void
-igb_intrmgr_on_msix_throttling_timer(void *opaque)
-{
- IGBIntrDelayTimer *timer = opaque;
- int idx = timer - &timer->core->eitr[0];
-
- timer->running = false;
-
- trace_e1000e_irq_msix_notify_postponed_vec(idx);
- igb_msix_notify(timer->core, idx);
-}
+igb_intrmgr_on_msix_throttling_timer(void *opaque);
static void
igb_intrmgr_initialize_all_timers(IGBCore *core, bool create)
@@ -2253,9 +2247,7 @@ igb_postpone_interrupt(IGBIntrDelayTimer *timer)
return true;
}
- if (timer->core->mac[timer->delay_reg] != 0) {
- igb_intrmgr_rearm_timer(timer);
- }
+ igb_intrmgr_rearm_timer(timer);
return false;
}
@@ -2279,6 +2271,30 @@ static void igb_send_msix(IGBCore *core, uint32_t causes)
}
}
+static void
+igb_intrmgr_on_msix_throttling_timer(void *opaque)
+{
+ IGBIntrDelayTimer *timer = opaque;
+ IGBCore *core = timer->core;
+ int vector = timer - &core->eitr[0];
+ uint32_t causes;
+
+ timer->running = false;
+
+ causes = core->mac[EICR] & core->mac[EIMS];
+ if (causes & BIT(vector)) {
+ /*
+ * The moderation counter is loaded with interval value whenever the
+ * interrupt is signaled. This includes when the interrupt is signaled
+ * by the counter reaching 0.
+ */
+ igb_intrmgr_rearm_timer(timer);
+
+ trace_e1000e_irq_msix_notify_postponed_vec(vector);
+ igb_msix_notify(core, vector);
+ }
+}
+
static inline void
igb_fix_icr_asserted(IGBCore *core)
{