On 19/3/18 1:46 pm, Sam Bobroff wrote: > Currently the EEH_PE_RECOVERING flag for a PE is managed by both the > caller and callee of eeh_handle_normal_event() (among other places not > considered here). This is complicated by the fact that the PE may > or may not have been invalidated by the call. > > So move the callee's handling into eeh_handle_normal_event(), which > clarifies it and allows the return type to be changed to void (because > it no longer needs to indicate at the PE has been invalidated). > > This should not change behaviour except in eeh_event_handler() where > it was previously possible to cause eeh_pe_state_clear() to be called > on an invalid PE, which is now avoided. > > Signed-off-by: Sam Bobroff <sam.bobr...@au1.ibm.com>
Reviewed-by: Alexey Kardashevskiy <a...@ozlabs.ru> > --- > arch/powerpc/include/asm/eeh_event.h | 2 +- > arch/powerpc/kernel/eeh_driver.c | 29 +++++++++++------------------ > arch/powerpc/kernel/eeh_event.c | 2 -- > 3 files changed, 12 insertions(+), 21 deletions(-) > > diff --git a/arch/powerpc/include/asm/eeh_event.h > b/arch/powerpc/include/asm/eeh_event.h > index 0a168038882d..9884e872686f 100644 > --- a/arch/powerpc/include/asm/eeh_event.h > +++ b/arch/powerpc/include/asm/eeh_event.h > @@ -34,7 +34,7 @@ struct eeh_event { > int eeh_event_init(void); > int eeh_send_failure_event(struct eeh_pe *pe); > void eeh_remove_event(struct eeh_pe *pe, bool force); > -bool eeh_handle_normal_event(struct eeh_pe *pe); > +void eeh_handle_normal_event(struct eeh_pe *pe); > void eeh_handle_special_event(void); > > #endif /* __KERNEL__ */ > diff --git a/arch/powerpc/kernel/eeh_driver.c > b/arch/powerpc/kernel/eeh_driver.c > index 51b21c97910f..5b7a5ed4db4d 100644 > --- a/arch/powerpc/kernel/eeh_driver.c > +++ b/arch/powerpc/kernel/eeh_driver.c > @@ -733,7 +733,8 @@ static int eeh_reset_device(struct eeh_pe *pe, struct > pci_bus *bus, > > /** > * eeh_handle_normal_event - Handle EEH events on a specific PE > - * @pe: EEH PE > + * @pe: EEH PE - which should not be used after we return, as it may > + * have been invalidated. > * > * Attempts to recover the given PE. If recovery fails or the PE has failed > * too many times, remove the PE. > @@ -750,10 +751,8 @@ static int eeh_reset_device(struct eeh_pe *pe, struct > pci_bus *bus, > * & devices under this slot, and then finally restarting the device > * drivers (which cause a second set of hotplug events to go out to > * userspace). > - * > - * Returns true if @pe should no longer be used, else false. > */ > -bool eeh_handle_normal_event(struct eeh_pe *pe) > +void eeh_handle_normal_event(struct eeh_pe *pe) > { > struct pci_bus *frozen_bus; > struct eeh_dev *edev, *tmp; > @@ -765,9 +764,11 @@ bool eeh_handle_normal_event(struct eeh_pe *pe) > if (!frozen_bus) { > pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", > __func__, pe->phb->global_number, pe->addr); > - return false; > + return; > } > > + eeh_pe_state_mark(pe, EEH_PE_RECOVERING); > + > eeh_pe_update_time_stamp(pe); > pe->freeze_count++; > if (pe->freeze_count > eeh_max_freezes) { > @@ -904,7 +905,7 @@ bool eeh_handle_normal_event(struct eeh_pe *pe) > pr_info("EEH: Notify device driver to resume\n"); > eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); > > - return false; > + goto final; > > hard_fail: > /* > @@ -940,12 +941,12 @@ bool eeh_handle_normal_event(struct eeh_pe *pe) > pci_lock_rescan_remove(); > pci_hp_remove_devices(frozen_bus); > pci_unlock_rescan_remove(); > - > /* The passed PE should no longer be used */ > - return true; > + return; > } > } > - return false; > +final: > + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); > } > > /** > @@ -1018,15 +1019,7 @@ void eeh_handle_special_event(void) > */ > if (rc == EEH_NEXT_ERR_FROZEN_PE || > rc == EEH_NEXT_ERR_FENCED_PHB) { > - /* > - * eeh_handle_normal_event() can make the PE stale if it > - * determines that the PE cannot possibly be recovered. > - * Don't modify the PE state if that's the case. > - */ > - if (eeh_handle_normal_event(pe)) > - continue; > - > - eeh_pe_state_clear(pe, EEH_PE_RECOVERING); > + eeh_handle_normal_event(pe); > } else { > pci_lock_rescan_remove(); > list_for_each_entry(hose, &hose_list, list_node) { > diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c > index 872bcfe8f90e..61c9356bf9c9 100644 > --- a/arch/powerpc/kernel/eeh_event.c > +++ b/arch/powerpc/kernel/eeh_event.c > @@ -73,7 +73,6 @@ static int eeh_event_handler(void * dummy) > /* We might have event without binding PE */ > pe = event->pe; > if (pe) { > - eeh_pe_state_mark(pe, EEH_PE_RECOVERING); > if (pe->type & EEH_PE_PHB) > pr_info("EEH: Detected error on PHB#%x\n", > pe->phb->global_number); > @@ -82,7 +81,6 @@ static int eeh_event_handler(void * dummy) > "PHB#%x-PE#%x\n", > pe->phb->global_number, pe->addr); > eeh_handle_normal_event(pe); > - eeh_pe_state_clear(pe, EEH_PE_RECOVERING); > } else { > eeh_handle_special_event(); > } > -- Alexey