On Mon, Nov 23, 2020 at 12:32:41PM +0100, Manuel Bouyer wrote:
> On Mon, Nov 23, 2020 at 10:57:13AM +0100, Roger Pau Monné wrote:
> > > [...]
> > > OK.
> > > I finally found where the EOI occurs (it's within a macro so s simple grep
> > > didn't show it).
> > > 
> > > When interrupts are not masked (e.g. via cli), the ioapic halder is 
> > > called.
> > > From here, 2 paths can happen:
> > > a) the software interrupt priority level (called spl in BSD world) allows 
> > > the
> > >   driver's handler to run. In this case it's called, then the interrupt
> > >   is unmasked at ioapic level, and EOI'd at lapic.
> > > b) the software interrupt priority level doesn't allow this driver's 
> > > handler to
> > >   run. In this case, the interrupt is marked as pending in software,
> > >   explicitely masked at the iopic level and EOI'd at lapic.
> > >   Later, when the spl is lowered, the driver's interrupt handler is run,
> > >   then the interrupt is unmasked at ioapic level, and EOI'd at lapic
> > >   (this is the same path as a)). here we may EOI the lapic twice, and the
> > >   second time when there's no hardware interrupt pending any more.
> > > 
> > > I suspect it's case b) which causes the problem with Xen.
> > 
> > Case b) should be handled fine AFAICT. If there's no interrupt pending
> > in the lapic ISR the EOI is just a noop. Iff there's somehow another
> > vector pending in ISR you might actually be EOIing the wrong vector,
> > and thus this would be a bug in NetBSD. I certainly don't know much of
> > NetBSD interrupt model in order to know whether this second EOI is just
> > not necessary or whether it could cause issues.
> > 
> > Can you actually assert that disabling this second unneeded EOI does
> > solve the problem?
> 
> I tried this, and it didn't change anything ...
> 
> I'm out of idea to try.

Hm, yes, it's quite weird. Do you know whether a NetBSD kernel can be
multibooted from pxelinux with Xen? I would like to see if I can
reproduce this myself.

I have the following patch also which will print a warning message
when GSI 34 is injected from hardware or when Xen performs an EOI
(either from a time out or when reacting to a guest one). I would
expect at least the interrupt injection one to trigger together with
the existing message.

Thanks, Roger.
---8<---
diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
index 67d4a6237f..bbd141a3d9 100644
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -43,7 +43,12 @@
 /* HACK: Route IRQ0 only to VCPU0 to prevent time jumps. */
 #define IRQ0_SPECIAL_ROUTING 1
 
-static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int irq);
+static void _vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int irq);
+
+bool __read_mostly irqprint;
+#define vioapic_deliver(vioapic, irq) ({\
+    WARN_ON(irqprint && vioapic->base_gsi + irq == 34); \
+    _vioapic_deliver(vioapic, irq); })
 
 static struct hvm_vioapic *addr_vioapic(const struct domain *d,
                                         unsigned long addr)
@@ -119,6 +124,16 @@ static uint32_t vioapic_read_indirect(const struct 
hvm_vioapic *vioapic)
 
         if ( redir_index >= vioapic->nr_pins )
         {
+            switch ( vioapic->ioregsel )
+            {
+            case 3:
+                irqprint = true;
+                break;
+
+            case 0xf:
+                irqprint = false;
+                break;
+            }
             gdprintk(XENLOG_WARNING, "apic_mem_readl:undefined ioregsel %x\n",
                      vioapic->ioregsel);
             break;
@@ -391,7 +406,7 @@ static void ioapic_inj_irq(
     vlapic_set_irq(target, vector, trig_mode);
 }
 
-static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
+static void _vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
 {
     uint16_t dest = vioapic->redirtbl[pin].fields.dest_id;
     uint8_t dest_mode = vioapic->redirtbl[pin].fields.dest_mode;
diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
index 8d1f9a9fc6..91fb99d271 100644
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -1388,10 +1388,12 @@ static void set_eoi_ready(void *data)
     flush_ready_eoi();
 }
 
+extern bool irqprint;
 void pirq_guest_eoi(struct pirq *pirq)
 {
     struct irq_desc *desc;
 
+    WARN_ON(irqprint && pirq->pirq == 34);
     ASSERT(local_irq_is_enabled());
     desc = pirq_spin_lock_irq_desc(pirq, NULL);
     if ( desc )
@@ -1837,6 +1839,8 @@ static void do_IRQ_guest(struct irq_desc *desc, unsigned 
int vector)
     unsigned int        i;
     struct pending_eoi *peoi = this_cpu(pending_eoi);
 
+    WARN_ON(irqprint && desc->irq == 34);
+
     if ( unlikely(!action->nr_guests) )
     {
         /* An interrupt may slip through while freeing an ACKTYPE_EOI irq. */


Reply via email to