Currnetly, with a single event queue EEH recovery is entirely serialized and takes place within a single kernel thread. This can cause recovery to take a long time when there are many devices.
Have the recovery event queue per PHB and allow the recovery to happen in parallel for all the PHBs. Signed-off-by: Ganesh Goudar <ganes...@linux.ibm.com> --- v2: Include missing hunk, which modifies __eeh_send_failure_event. --- arch/powerpc/include/asm/eeh_event.h | 7 ++++ arch/powerpc/include/asm/pci-bridge.h | 4 ++ arch/powerpc/kernel/eeh_driver.c | 27 +++++++++++- arch/powerpc/kernel/eeh_event.c | 59 +++++++++++++++++---------- arch/powerpc/kernel/eeh_pe.c | 4 ++ 5 files changed, 78 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h index dadde7d52f46..6af1b5bb6103 100644 --- a/arch/powerpc/include/asm/eeh_event.h +++ b/arch/powerpc/include/asm/eeh_event.h @@ -8,6 +8,8 @@ #define ASM_POWERPC_EEH_EVENT_H #ifdef __KERNEL__ +#include <linux/workqueue.h> + /* * structure holding pci controller data that describes a * change in the isolation status of a PCI slot. A pointer @@ -15,15 +17,20 @@ * callback. */ struct eeh_event { + struct work_struct work; struct list_head list; /* to form event queue */ struct eeh_pe *pe; /* EEH PE */ }; +extern spinlock_t eeh_eventlist_lock; + int eeh_event_init(void); +int eeh_phb_event(struct eeh_pe *pe); int eeh_send_failure_event(struct eeh_pe *pe); int __eeh_send_failure_event(struct eeh_pe *pe); void eeh_remove_event(struct eeh_pe *pe, bool force); void eeh_handle_normal_event(struct eeh_pe *pe); +void eeh_handle_normal_event_work(struct work_struct *work); void eeh_handle_special_event(void); #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 2aa3a091ef20..61884d9398bf 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -138,6 +138,10 @@ struct pci_controller { /* iommu_ops support */ struct iommu_device iommu; + + bool eeh_in_progress; + struct list_head eeh_eventlist; + spinlock_t eeh_eventlist_lock; }; /* These are used for config access before all the PCI probing diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 7efe04c68f0f..4cf5fd409369 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -1116,6 +1116,30 @@ void eeh_handle_normal_event(struct eeh_pe *pe) eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true); } +void eeh_handle_normal_event_work(struct work_struct *work) +{ + unsigned long flags; + struct eeh_event *event = container_of(work, struct eeh_event, work); + struct pci_controller *phb = event->pe->phb; + + eeh_handle_normal_event(event->pe); + + kfree(event); + spin_lock_irqsave(&phb->eeh_eventlist_lock, flags); + WARN_ON_ONCE(!phb->eeh_in_progress); + if (list_empty(&phb->eeh_eventlist)) { + phb->eeh_in_progress = false; + pr_debug("EEH: No more work to do\n"); + } else { + pr_warn("EEH: More work to do\n"); + event = list_entry(phb->eeh_eventlist.next, + struct eeh_event, list); + list_del(&event->list); + queue_work(system_unbound_wq, &event->work); + } + spin_unlock_irqrestore(&phb->eeh_eventlist_lock, flags); +} + /** * eeh_handle_special_event - Handle EEH events without a specific failing PE * @@ -1185,8 +1209,7 @@ void eeh_handle_special_event(void) */ if (rc == EEH_NEXT_ERR_FROZEN_PE || rc == EEH_NEXT_ERR_FENCED_PHB) { - eeh_pe_state_mark(pe, EEH_PE_RECOVERING); - eeh_handle_normal_event(pe); + eeh_phb_event(pe); } else { eeh_for_each_pe(pe, tmp_pe) eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev) diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index c23a454af08a..8a9d6358d39f 100644 --- a/arch/powerpc/kernel/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c @@ -22,7 +22,7 @@ * work-queue, where a worker thread can drive recovery. */ -static DEFINE_SPINLOCK(eeh_eventlist_lock); +DEFINE_SPINLOCK(eeh_eventlist_lock); static DECLARE_COMPLETION(eeh_eventlist_event); static LIST_HEAD(eeh_eventlist); @@ -91,6 +91,42 @@ int eeh_event_init(void) return 0; } +int eeh_phb_event(struct eeh_pe *pe) +{ + struct eeh_event *event; + unsigned long flags; + struct pci_controller *phb; + + event = kzalloc(sizeof(*event), GFP_ATOMIC); + if (!event) + return -ENOMEM; + + if (pe) { + phb = pe->phb; + event->pe = pe; + INIT_WORK(&event->work, eeh_handle_normal_event_work); + eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + pr_err("EEH: EVENT=ERROR_DETECTED PHB=%#x PE=%#x\n", + phb->global_number, pe->addr); + spin_lock_irqsave(&phb->eeh_eventlist_lock, flags); + if (phb->eeh_in_progress) { + pr_info("EEH: EEH already in progress on this PHB, queueing.\n"); + list_add(&event->list, &phb->eeh_eventlist); + } else { + pr_info("EEH: Beginning recovery on this PHB.\n"); + WARN_ON_ONCE(!list_empty(&phb->eeh_eventlist)); + phb->eeh_in_progress = true; + queue_work(system_unbound_wq, &event->work); + } + spin_unlock_irqrestore(&phb->eeh_eventlist_lock, flags); + } else { + spin_lock_irqsave(&eeh_eventlist_lock, flags); + list_add(&event->list, &eeh_eventlist); + complete(&eeh_eventlist_event); + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); + } + return 0; +} /** * eeh_send_failure_event - Generate a PCI error event * @pe: EEH PE @@ -101,16 +137,6 @@ int eeh_event_init(void) */ int __eeh_send_failure_event(struct eeh_pe *pe) { - unsigned long flags; - struct eeh_event *event; - - event = kzalloc(sizeof(*event), GFP_ATOMIC); - if (!event) { - pr_err("EEH: out of memory, event not handled\n"); - return -ENOMEM; - } - event->pe = pe; - /* * Mark the PE as recovering before inserting it in the queue. * This prevents the PE from being free()ed by a hotplug driver @@ -128,16 +154,7 @@ int __eeh_send_failure_event(struct eeh_pe *pe) eeh_pe_state_mark(pe, EEH_PE_RECOVERING); } - - /* We may or may not be called in an interrupt context */ - spin_lock_irqsave(&eeh_eventlist_lock, flags); - list_add(&event->list, &eeh_eventlist); - spin_unlock_irqrestore(&eeh_eventlist_lock, flags); - - /* For EEH deamon to knick in */ - complete(&eeh_eventlist_event); - - return 0; + return eeh_phb_event(pe); } int eeh_send_failure_event(struct eeh_pe *pe) diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index d1030bc52564..67c6e6621b0d 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -81,6 +81,10 @@ int eeh_phb_pe_create(struct pci_controller *phb) { struct eeh_pe *pe; + phb->eeh_in_progress = false; + INIT_LIST_HEAD(&phb->eeh_eventlist); + spin_lock_init(&phb->eeh_eventlist_lock); + /* Allocate PHB PE */ pe = eeh_pe_alloc(phb, EEH_PE_PHB); if (!pe) { -- 2.44.0