On Wed, Jun 03, 2015 at 09:37:53AM +0800, Wei Yang wrote:
>On Mon, Jun 01, 2015 at 07:01:36PM -0500, Bjorn Helgaas wrote:
>>On Tue, May 19, 2015 at 06:50:10PM +0800, Wei Yang wrote:
>>> After PE reset, OPAL API opal_pci_reinit() is called on all devices
>>> contained in the PE to reinitialize them. However, VFs can't be seen
>>> from skiboot firmware. We have to implement the functions, similar
>>> those in skiboot firmware, to reinitialize VFs after reset on PE
>>> for VFs.
>>> 
>>> [gwshan: changelog and code refactoring]
>>> Signed-off-by: Wei Yang <weiy...@linux.vnet.ibm.com>
>>> Acked-by: Gavin Shan <gws...@linux.vnet.ibm.com>
>>> ---
>>>  arch/powerpc/include/asm/pci-bridge.h        |    1 +
>>>  arch/powerpc/platforms/powernv/eeh-powernv.c |   70 
>>> +++++++++++++++++++++++++-
>>>  arch/powerpc/platforms/powernv/pci.c         |   18 +++++++
>>>  3 files changed, 88 insertions(+), 1 deletion(-)
>>> 
>>> diff --git a/arch/powerpc/include/asm/pci-bridge.h 
>>> b/arch/powerpc/include/asm/pci-bridge.h
>>> index c324882..ad60263 100644
>>> --- a/arch/powerpc/include/asm/pci-bridge.h
>>> +++ b/arch/powerpc/include/asm/pci-bridge.h
>>> @@ -206,6 +206,7 @@ struct pci_dn {
>>>  #define IODA_INVALID_M64        (-1)
>>>     int     m64_wins[PCI_SRIOV_NUM_BARS][M64_PER_IOV];
>>>  #endif /* CONFIG_PCI_IOV */
>>> +   int     mps;
>>>  #endif
>>>     struct list_head child_list;
>>>     struct list_head list;
>>> diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
>>> b/arch/powerpc/platforms/powernv/eeh-powernv.c
>>> index 7af3c1e..33deb78 100644
>>> --- a/arch/powerpc/platforms/powernv/eeh-powernv.c
>>> +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
>>> @@ -1612,6 +1612,67 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
>>>     return ret;
>>>  }
>>>  
>>> +static int pnv_eeh_restore_vf_config(struct pci_dn *pdn)
>>> +{
>>> +   struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
>>> +   u32 devctl, cmd, cap2, aer_capctl;
>>> +   int old_mps;
>>> +
>>> +   /* Restore MPS */
>>> +   if (edev->pcie_cap) {
>>> +           old_mps = (ffs(pdn->mps) - 8) << 5;
>>> +           eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
>>> +                                2, &devctl);
>>> +           devctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
>>> +           devctl |= old_mps;
>>> +           eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
>>> +                                 2, devctl);
>>> +   }
>>> +
>>> +   /* Disable Completion Timeout */
>>> +   if (edev->pcie_cap) {
>>> +           eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2,
>>> +                                4, &cap2);
>>> +           if (cap2 & 0x10) {
>>> +                   eeh_ops->read_config(pdn,
>>> +                                   edev->pcie_cap + PCI_EXP_DEVCTL2,
>>> +                                   4, &cap2);
>>> +                   cap2 |= 0x10;
>>> +                   eeh_ops->write_config(pdn,
>>> +                                   edev->pcie_cap + PCI_EXP_DEVCTL2,
>>> +                                   4, cap2);
>>> +           }
>>> +   }
>>> +
>>> +   /* Enable SERR and parity checking */
>>> +   eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd);
>>> +   cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
>>> +   eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd);
>>> +
>>> +   /* Enable report various errors */
>>> +   if (edev->pcie_cap) {
>>> +           eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
>>> +                           2, &devctl);
>>> +           devctl &= ~PCI_EXP_DEVCTL_CERE;
>>> +           devctl |= (PCI_EXP_DEVCTL_NFERE |
>>> +                      PCI_EXP_DEVCTL_FERE |
>>> +                      PCI_EXP_DEVCTL_URRE);
>>> +           eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
>>> +                           2, devctl);
>>> +   }
>>> +
>>> +   /* Enable ECRC generation and check */
>>> +   if (edev->pcie_cap && edev->aer_cap) {
>>> +           eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP,
>>> +                           4, &aer_capctl);
>>> +           aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
>>> +           eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP,
>>> +                           4, aer_capctl);
>>> +   }
>>
>>Where is all this stuff set up the first time?  It'd be nice if we could
>>use the same path that did it the first time.
>>
>
>Those steps in this function are called to setup the pci_dev. For those PFs,
>they are done in the skiboot firmware, invoked by opal_pci_reinit(). For VFs,
>since skiboot firmware is not aware of those VFs, we need to setup it in
>kernel.
>
>This means originally, those stuffs are in skiboot firmware. This is the first
>time appears in kernel.
>
>>Are we setting up a PF or a VF here?  The function is called
>>pnv_eeh_restore_vf_config(), but it's called when "edev->physfn", so it's a
>>little confusing.
>>
>
>Yes, this is called for VFs. "edev->physfn" means the edev has a PF, so that
>this edev is a VF.
>
>>> +
>>> +   return 0;
>>> +}
>>> +
>>>  static int pnv_eeh_restore_config(struct pci_dn *pdn)
>>>  {
>>>     struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
>>> @@ -1622,7 +1683,14 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
>>>             return -EEXIST;
>>>  
>>>     phb = edev->phb->private_data;
>>> -   ret = opal_pci_reinit(phb->opal_id,
>>> +   /*
>>> +    * We have to restore the PCI config space after reset since the
>>> +    * firmware can't see SRIOV VFs.
>>> +    */
>>> +   if (edev->physfn)
>>> +           ret = pnv_eeh_restore_vf_config(pdn);
>>> +   else
>>> +           ret = opal_pci_reinit(phb->opal_id,
>>>                           OPAL_REINIT_PCI_DEV, edev->config_addr);
>>>     if (ret) {
>>>             pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
>>> diff --git a/arch/powerpc/platforms/powernv/pci.c 
>>> b/arch/powerpc/platforms/powernv/pci.c
>>> index bca2aeb..10bc8c3 100644
>>> --- a/arch/powerpc/platforms/powernv/pci.c
>>> +++ b/arch/powerpc/platforms/powernv/pci.c
>>> @@ -729,6 +729,24 @@ static void pnv_p7ioc_rc_quirk(struct pci_dev *dev)
>>>  }
>>>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
>>>  
>>> +#ifdef CONFIG_PCI_IOV
>>> +static void pnv_pci_fixup_vf_mps(struct pci_dev *pdev)
>>> +{
>>> +   struct pci_dn *pdn = pci_get_pdn(pdev);
>>> +   int parent_mps;
>>> +
>>> +   if (!pdev->is_virtfn)
>>> +           return;
>>> +
>>> +   /* Synchronize MPS for VF and PF */
>>> +   parent_mps = pcie_get_mps(pdev->physfn);
>>> +   if ((128 << pdev->pcie_mpss) >= parent_mps)
>>> +           pcie_set_mps(pdev, parent_mps);
>>> +   pdn->mps = pcie_get_mps(pdev);
>>> +}
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_pci_fixup_vf_mps);
>>
>>Same comment as before -- I don't like this usage of fixups.  Would it work
>>to do this in pcibios_add_device()?
>>
>>I assume you need this to happen when you hot-remove and hot-add a VF
>>during EEH recovery?  Where does this happen in the normal hotplug path,
>>e.g., pciehp, and can you do it in a corresponding place for EEH hotplug?
>>
>
>Yes, this is called in the EEH recovery. While not only in the hot-add case,
>when we do the normal EEH reset, 
>
>eeh_reset_device()
>    eeh_pe_restore_bars()
>        eeh_restore_one_device_bars()
>           eeh_ops->restore_config()
>               pnv_eeh_restore_config()
>
>eeh_reset_device() would handle both hot-add and non-hot-add cases. So this is
>not proper to move it to the hot-plug path.
>

If I don't miss anything here. The code does have the problem as Bjorn raised:
When VF is brought up for the first time (without EEH involved yet), the AER
and PCIe timeout setting are not initialized to the expected values.

For non-VFs, the device has been initialize properly before it's exposed to
OS from firmware.

Thanks,
Gavin

>>
>>> +#endif /* CONFIG_PCI_IOV */
>>> +
>>>  void __init pnv_pci_init(void)
>>>  {
>>>     struct device_node *np;
>>> -- 
>>> 1.7.9.5
>>> 
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
>>> the body of a message to majord...@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
>-- 
>Richard Yang
>Help you, Help me

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to