On Fri, May 8, 2020 at 12:36 AM <wenxi...@linux.vnet.ibm.com> wrote: > > From: Wen Xiong <wenxi...@linux.vnet.ibm.com> > > Several device drivers hit EEH(Extended Error handling) when triggering > kdump on Pseries PowerVM. This patch implemented a reset of the PHBs > in pci general code. PHB reset stop all PCI transactions from previous > kernel. We have tested the patch in several enviroments: > - direct slot adapters > - adapters under the switch > - a VF adapter in PowerVM > - a VF adapter/adapter in KVM guest. > > Signed-off-by: Wen Xiong <wenxi...@linux.vnet.ibm.com> > --- > arch/powerpc/platforms/pseries/pci.c | 153 +++++++++++++++++++++++++++ > 1 file changed, 153 insertions(+) > > diff --git a/arch/powerpc/platforms/pseries/pci.c > b/arch/powerpc/platforms/pseries/pci.c > index 911534b89c85..aac7f00696d2 100644 > --- a/arch/powerpc/platforms/pseries/pci.c > +++ b/arch/powerpc/platforms/pseries/pci.c > @@ -11,6 +11,8 @@ > #include <linux/kernel.h> > #include <linux/pci.h> > #include <linux/string.h> > +#include <linux/crash_dump.h> > +#include <linux/delay.h> > > #include <asm/eeh.h> > #include <asm/pci-bridge.h> > @@ -354,3 +356,154 @@ int pseries_root_bridge_prepare(struct pci_host_bridge > *bridge) > > return 0; > } > + > +/** > + * pseries_get_pdn_addr - Retrieve PHB address > + * @pe: EEH PE > + * > + * Retrieve the assocated PHB address. Actually, there're 2 RTAS > + * function calls dedicated for the purpose. We need implement > + * it through the new function and then the old one. Besides, > + * you should make sure the config address is figured out from > + * FDT node before calling the function. > + * > + */ > +static int pseries_get_pdn_addr(struct pci_controller *phb) > +{ > + int ret = -1; > + int rets[3]; > + int ibm_get_config_addr_info; > + int ibm_get_config_addr_info2; > + int config_addr = 0; > + struct pci_dn *root_pdn, *pdn; > + > + ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); > + ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); > + > + root_pdn = PCI_DN(phb->dn); > + pdn = list_first_entry(&root_pdn->child_list, struct pci_dn, list); > + config_addr = (pdn->busno << 16) | (pdn->devfn << 8); > + > + if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { > + /* > + * First of all, we need to make sure there has one PE > + * associated with the device. Otherwise, PE address is > + * meaningless. > + */ > + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, > + config_addr, BUID_HI(pdn->phb->buid), > + BUID_LO(pdn->phb->buid), 1); > + if (ret || (rets[0] == 0)) { > + pr_warn("%s: Failed to get address for PHB#%x-PE# " > + "option=%d config_addr=%x\n", > + __func__, pdn->phb->global_number, 1, > rets[0]); > + return -1; > + } > + > + /* Retrieve the associated PE config address */ > + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, > + config_addr, BUID_HI(pdn->phb->buid), > + BUID_LO(pdn->phb->buid), 0); > + if (ret) { > + pr_warn("%s: Failed to get address for PHB#%x-PE# " > + "option=%d config_addr=%x\n", > + __func__, pdn->phb->global_number, 0, > rets[0]); > + return -1; > + } > + return rets[0]; > + } > + > + if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { > + ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, > + config_addr, BUID_HI(pdn->phb->buid), > + BUID_LO(pdn->phb->buid), 0); > + if (ret || rets[0]) { > + pr_warn("%s: Failed to get address for PHB#%x-PE# " > + "config_addr=%x\n", > + __func__, pdn->phb->global_number, rets[0]); > + return -1; > + } > + return rets[0]; > + } > + > + return ret; > +}
I'd be nice if we could reduce the amount of duplication between this function and eeh_pseries_get_pe_addr(). I was planning to re-working the EEH version though so that this is fine for now. > + > +static int __init pseries_phb_reset(void) > +{ > + struct pci_controller *phb; > + int config_addr; > + int ibm_set_slot_reset; > + int ibm_configure_pe; > + int ret; > + > + if (is_kdump_kernel() || reset_devices) { > + pr_info("Issue PHB reset ...\n"); > + ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); > + ibm_configure_pe = rtas_token("ibm,configure-pe"); > + > + if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE || > + ibm_configure_pe == RTAS_UNKNOWN_SERVICE) { > + pr_info("%s: EEH functionality not supported\n", > + __func__); > + } > + > + list_for_each_entry(phb, &hose_list, list_node) { > + config_addr = pseries_get_pdn_addr(phb); > + if (config_addr == -1) > + continue; Considering we already cache the buid in the pci_controller we could also cache the config_addr. That said, considering all this runs precisely once at boot I'm not that bothered by calling pseries_get_pdn_addr() again in each loop. > + > + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, > + config_addr, BUID_HI(phb->buid), > + BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL); > + > + /* If fundamental-reset not supported, try hot-reset > */ > + if (ret == -8) > + ret = rtas_call(ibm_set_slot_reset, 4, 1, > NULL, > + config_addr, BUID_HI(phb->buid), > + BUID_LO(phb->buid), EEH_RESET_HOT); > + > + if (ret) { > + pr_err("%s: fail with rtas_call fundamental > reset=%d\n", > + __func__, ret); > + continue; > + } > + } > + msleep(EEH_PE_RST_SETTLE_TIME); > + > + list_for_each_entry(phb, &hose_list, list_node) { > + config_addr = pseries_get_pdn_addr(phb); > + if (config_addr == -1) > + continue; > + > + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, > + config_addr, BUID_HI(phb->buid), > + BUID_LO(phb->buid), EEH_RESET_DEACTIVATE); > + if (ret) { > + pr_err("%s: fail with rtas_call > deactive=%d\n", > + __func__, ret); > + continue; > + } > + } > + msleep(EEH_PE_RST_SETTLE_TIME); > + > + list_for_each_entry(phb, &hose_list, list_node) { > + config_addr = pseries_get_pdn_addr(phb); > + if (config_addr == -1) > + continue; > + > + ret = rtas_call(ibm_configure_pe, 3, 1, NULL, > + config_addr, BUID_HI(phb->buid), > + BUID_LO(phb->buid)); > + if (ret) { > + pr_err("%s: fail with rtas_call configure_pe > =%d\n", > + __func__, ret); > + continue; > + } > + } > + } > + > + return 0; > +} > +postcore_initcall(pseries_phb_reset); You probably should use machine_postcore_initcall(pseries, pseries_phb_reset); so that this only gets run on pseries. Without the machine type specifier it'll run on PowerNV too. > + > -- > 2.18.1 >