ohci1394 has basically no protection against concurrent access to registers. I suspect this as a reason for bug "Host disappears on 1394 bus reset". http://bugzilla.kernel.org/show_bug.cgi?id=7569
The following changes are done here: - Use the ohci->event_lock to serialize the trigger for software- enforced bus resets against a large portion of the IRQ handler. - To accomplish this, cover much more regions of ohci_irq_handler by event_lock. Needless to say, this has to be tested for a lot of scenarios. - Use spin_lock instead of spin_lock_irqsave in ohci_irq_handler. It is somehow disturbing to see how the event_lock was originally taken and released without discernable reason. Todo: - Test. - Audit *all* of ohci1394's register accesses for the need of serialization by spinlocks + mmiowb. - Look into unifying ohci1394's internal locking to a single lock per host. - Audit the selfID complete event handling, including what happens higher up in ieee1394 core, for the possibility to move workload into tasklets or workqueue jobs. Signed-off-by: Stefan Richter <[EMAIL PROTECTED]> --- Robert, does this do anything for your setup? So far I only did a quick test with a UP PC (kernel configured for SMP PREEMPT) and a FireWire HDD and at least it didn't lock up... drivers/ieee1394/ohci1394.c | 51 ++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 28 deletions(-) Index: linux-2.6.20-rc6/drivers/ieee1394/ohci1394.c =================================================================== --- linux-2.6.20-rc6.orig/drivers/ieee1394/ohci1394.c 2007-01-26 16:59:45.000000000 +0100 +++ linux-2.6.20-rc6/drivers/ieee1394/ohci1394.c 2007-01-26 18:21:01.000000000 +0100 @@ -943,6 +943,7 @@ static int ohci_devctl(struct hpsb_host switch (cmd) { case RESET_BUS: + spin_lock_irqsave(&ohci->event_lock, flags); switch (arg) { case SHORT_RESET: phy_reg = get_phy_reg(ohci, 5); @@ -990,6 +991,8 @@ static int ohci_devctl(struct hpsb_host default: retval = -1; } + mmiowb(); + spin_unlock_irqrestore(&ohci->event_lock, flags); break; case GET_CYCLE_COUNTER: @@ -2304,27 +2307,21 @@ static irqreturn_t ohci_irq_handler(int quadlet_t event, node_id; struct ti_ohci *ohci = (struct ti_ohci *)dev_id; struct hpsb_host *host = ohci->host; - int phyid = -1, isroot = 0; - unsigned long flags; + int phyid = -1, isroot = 0, call_selfid_complete = 0; - /* Read and clear the interrupt event register. Don't clear - * the busReset event, though. This is done when we get the - * selfIDComplete interrupt. */ - spin_lock_irqsave(&ohci->event_lock, flags); - event = reg_read(ohci, OHCI1394_IntEventClear); - reg_write(ohci, OHCI1394_IntEventClear, event & ~OHCI1394_busReset); - spin_unlock_irqrestore(&ohci->event_lock, flags); - - if (!event) - return IRQ_NONE; + spin_lock(&ohci->event_lock); - /* If event is ~(u32)0 cardbus card was ejected. In this case - * we just return, and clean up in the ohci1394_pci_remove - * function. */ - if (event == ~(u32) 0) { - DBGMSG("Device removed."); + /* Read and clear the interrupt event register. Don't clear the + * busReset event though. This is done when we get the selfIDComplete + * interrupt. + * If event is ~0, the CardBus card was ejected. In this case we just + * return and clean up in ohci1394_pci_remove. */ + event = reg_read(ohci, OHCI1394_IntEventClear); + if (!event || !~event) { + spin_unlock(&ohci->event_lock); return IRQ_NONE; } + reg_write(ohci, OHCI1394_IntEventClear, event & ~OHCI1394_busReset); DBGMSG("IntEvent: %08x", event); @@ -2399,20 +2396,17 @@ static irqreturn_t ohci_irq_handler(int /* The busReset event bit can't be cleared during the * selfID phase, so we disable busReset interrupts, to * avoid burying the cpu in interrupt requests. */ - spin_lock_irqsave(&ohci->event_lock, flags); reg_write(ohci, OHCI1394_IntMaskClear, OHCI1394_busReset); if (ohci->check_busreset) { int loop_count = 0; + /* Remember kids: Don't do this at home. */ + spin_unlock(&ohci->event_lock); udelay(10); - while (reg_read(ohci, OHCI1394_IntEventSet) & OHCI1394_busReset) { reg_write(ohci, OHCI1394_IntEventClear, OHCI1394_busReset); - - spin_unlock_irqrestore(&ohci->event_lock, flags); udelay(10); - spin_lock_irqsave(&ohci->event_lock, flags); /* The loop counter check is to prevent the driver * from remaining in this state forever. For the @@ -2428,8 +2422,8 @@ static irqreturn_t ohci_irq_handler(int loop_count++; } + spin_lock(&ohci->event_lock); } - spin_unlock_irqrestore(&ohci->event_lock, flags); if (!host->in_bus_reset) { DBGMSG("irq_handler: Bus reset requested"); @@ -2520,10 +2514,8 @@ static irqreturn_t ohci_irq_handler(int /* Clear the bus reset event and re-enable the * busReset interrupt. */ - spin_lock_irqsave(&ohci->event_lock, flags); reg_write(ohci, OHCI1394_IntEventClear, OHCI1394_busReset); reg_write(ohci, OHCI1394_IntMaskSet, OHCI1394_busReset); - spin_unlock_irqrestore(&ohci->event_lock, flags); /* Turn on phys dma reception. * @@ -2540,7 +2532,7 @@ static irqreturn_t ohci_irq_handler(int reg_read(ohci, OHCI1394_PhyReqFilterHiSet), reg_read(ohci, OHCI1394_PhyReqFilterLoSet)); - hpsb_selfid_complete(host, phyid, isroot); + call_selfid_complete = 1; } else PRINT(KERN_ERR, "SelfID received outside of bus reset sequence"); @@ -2552,9 +2544,12 @@ selfid_not_valid: /* Make sure we handle everything, just in case we accidentally * enabled an interrupt that we didn't write a handler for. */ if (event) - PRINT(KERN_ERR, "Unhandled interrupt(s) 0x%08x", - event); + PRINT(KERN_ERR, "Unhandled interrupt(s) 0x%08x", event); + mmiowb(); + spin_unlock(&ohci->event_lock); + if (call_selfid_complete) + hpsb_selfid_complete(host, phyid, isroot); return IRQ_HANDLED; } -- Stefan Richter -=====-=-=== ---= ==-=- http://arcgraph.de/sr/ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/