On Mon, Aug 10, 2015 at 07:21:12PM +1000, Alexey Kardashevskiy wrote:
>On 08/06/2015 02:11 PM, Gavin Shan wrote:
>>For P7IOC, the whole available DMA32 space, which is below the
>>MEM32 space, is divided evenly into 256MB segments. The number
>>of continuous segments assigned to one particular PE depends on
>>the PE's DMA weight that is calculated based on the type of each
>>PCI devices contained in the PE, and PHB's DMA weight which is
>>accumulative DMA weight of PEs contained in the PHB. It means
>>that the PHB's DMA weight calculation depends on existing PEs,
>>which works perfectly now, but not hotplug friendly. As the
>>whole available DMA32 space can be assigned to one PE on PHB3,
>>so we don't have the issue on PHB3.
>>
>>The patch calculates PHB's DMA weight based on the PCI devices
>>contained in the PHB dynamically so that it's hotplug friendly.
>>
>>Signed-off-by: Gavin Shan <gws...@linux.vnet.ibm.com>
>>---
>>  arch/powerpc/platforms/powernv/pci-ioda.c | 88 
>> +++++++++++++++----------------
>>  arch/powerpc/platforms/powernv/pci.h      |  6 ---
>>  2 files changed, 43 insertions(+), 51 deletions(-)
>>
>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
>>b/arch/powerpc/platforms/powernv/pci-ioda.c
>>index 713f4b4..7342cfd 100644
>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>@@ -927,6 +927,9 @@ static void pnv_ioda_link_pe_by_weight(struct pnv_phb 
>>*phb,
>>
>>  static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
>>  {
>>+     struct pci_controller *hose = pci_bus_to_host(dev->bus);
>>+     struct pnv_phb *phb = hose->private_data;
>>+
>>      /* This is quite simplistic. The "base" weight of a device
>>       * is 10. 0 means no DMA is to be accounted for it.
>>       */
>>@@ -939,14 +942,34 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev 
>>*dev)
>>      if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
>>          dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
>>          dev->class == PCI_CLASS_SERIAL_USB_EHCI)
>>-             return 3;
>>+             return 3 * phb->ioda.tce32_count;
>>
>>      /* Increase the weight of RAID (includes Obsidian) */
>>      if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
>>-             return 15;
>>+             return 15 * phb->ioda.tce32_count;
>>
>>      /* Default */
>>-     return 10;
>>+     return 10 * phb->ioda.tce32_count;
>>+}
>>+
>>+static int __pnv_ioda_phb_dma_weight(struct pci_dev *pdev, void *data)
>>+{
>>+     unsigned int *dma_weight = data;
>>+
>>+     *dma_weight += pnv_ioda_dma_weight(pdev);
>>+     return 0;
>>+}
>>+
>>+static unsigned int pnv_ioda_phb_dma_weight(struct pnv_phb *phb)
>>+{
>>+     unsigned int dma_weight = 0;
>>+
>>+     if (!phb->hose->bus)
>>+             return 0;
>>+
>>+     pci_walk_bus(phb->hose->bus,
>>+                  __pnv_ioda_phb_dma_weight, &dma_weight);
>>+     return dma_weight;
>>  }
>>
>>  #ifdef CONFIG_PCI_IOV
>>@@ -1097,14 +1120,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, 
>>bool all)
>>      /* Put PE to the list */
>>      list_add_tail(&pe->list, &phb->ioda.pe_list);
>>
>>-     /* Account for one DMA PE if at least one DMA capable device exist
>>-      * below the bridge
>>-      */
>>-     if (pe->dma_weight != 0) {
>>-             phb->ioda.dma_weight += pe->dma_weight;
>>-             phb->ioda.dma_pe_count++;
>>-     }
>>-
>>      /* Link the PE */
>>      pnv_ioda_link_pe_by_weight(phb, pe);
>>  }
>>@@ -2431,24 +2446,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb 
>>*phb,
>>  static void pnv_ioda_setup_dma(struct pnv_phb *phb)
>>  {
>>      struct pci_controller *hose = phb->hose;
>>-     unsigned int residual, remaining, segs, tw, base;
>>      struct pnv_ioda_pe *pe;
>>+     unsigned int dma_weight;
>>
>>-     /* If we have more PE# than segments available, hand out one
>>-      * per PE until we run out and let the rest fail. If not,
>>-      * then we assign at least one segment per PE, plus more based
>>-      * on the amount of devices under that PE
>>-      */
>>-     if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
>>-             residual = 0;
>>-     else
>>-             residual = phb->ioda.tce32_count -
>>-                     phb->ioda.dma_pe_count;
>>-
>>-     pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
>>-             hose->global_number, phb->ioda.tce32_count);
>>-     pr_info("PCI: %d PE# for a total weight of %d\n",
>>-             phb->ioda.dma_pe_count, phb->ioda.dma_weight);
>>+     /* Calculate the PHB's DMA weight */
>>+     dma_weight = pnv_ioda_phb_dma_weight(phb);
>>+     pr_info("PCI%04x has %ld DMA32 segments, total weight %d\n",
>>+             hose->global_number, phb->ioda.tce32_count, dma_weight);
>>
>>      pnv_pci_ioda_setup_opal_tce_kill(phb);
>>
>>@@ -2456,22 +2460,9 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
>>       * out one base segment plus any residual segments based on
>>       * weight
>>       */
>>-     remaining = phb->ioda.tce32_count;
>>-     tw = phb->ioda.dma_weight;
>>-     base = 0;
>>      list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
>>              if (!pe->dma_weight)
>>                      continue;
>>-             if (!remaining) {
>>-                     pe_warn(pe, "No DMA32 resources available\n");
>>-                     continue;
>>-             }
>>-             segs = 1;
>>-             if (residual) {
>>-                     segs += ((pe->dma_weight * residual)  + (tw / 2)) / tw;
>>-                     if (segs > remaining)
>>-                             segs = remaining;
>>-             }
>>
>>              /*
>>               * For IODA2 compliant PHB3, we needn't care about the weight.
>>@@ -2479,17 +2470,24 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
>>               * the specific PE.
>>               */
>>              if (phb->type == PNV_PHB_IODA1) {
>>-                     pe_info(pe, "DMA weight %d, assigned %d DMA32 
>>segments\n",
>>+                     unsigned int segs, base = 0;
>>+
>>+                     if (pe->dma_weight <
>>+                         dma_weight / phb->ioda.tce32_count)
>>+                             segs = 1;
>>+                     else
>>+                             segs = (pe->dma_weight *
>>+                                     phb->ioda.tce32_count) / dma_weight;
>>+
>>+                     pe_info(pe, "DMA32 weight %d, assigned %d segments\n",
>>                              pe->dma_weight, segs);
>>                      pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
>>+
>>+                     base += segs;
>
>
>This is not right. @base here is a local variable in the scope,
>pnv_pci_ioda_setup_dma_pe() will always be called with base==0.
>
>
>Sorry for commenting the same patch twice.
>

That's ok to comment for twice on same patch. But I don't see
how it's wrong. The function (pnv_ioda_setup_dma()) is called
as below and it iterate all PEs in the PHB's DMA32 list. That
means the function is affects PHB, not every PE yet. It's out
of problem with "base=0".

pnv_pci_ioda_fixup
  pnv_pci_ioda_setup_DMA
    pnv_ioda_setup_dma
>
>>              } else {
>>                      pe_info(pe, "Assign DMA32 space\n");
>>-                     segs = 0;
>>                      pnv_pci_ioda2_setup_dma_pe(phb, pe);
>>              }
>>-
>>-             remaining -= segs;
>>-             base += segs;
>>      }
>>  }
>>
>>diff --git a/arch/powerpc/platforms/powernv/pci.h 
>>b/arch/powerpc/platforms/powernv/pci.h
>>index 08a4e57..addd3f7 100644
>>--- a/arch/powerpc/platforms/powernv/pci.h
>>+++ b/arch/powerpc/platforms/powernv/pci.h
>>@@ -183,12 +183,6 @@ struct pnv_phb {
>>                      /* 32-bit TCE tables allocation */
>>                      unsigned long           tce32_count;
>>
>>-                     /* Total "weight" for the sake of DMA resources
>>-                      * allocation
>>-                      */
>>-                     unsigned int            dma_weight;
>>-                     unsigned int            dma_pe_count;
>>-
>>                      /* Sorted list of used PE's, sorted at
>>                       * boot for resource allocation purposes
>>                       */
>>

Thanks,
Gavin

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to