From: Robert Jennings <[EMAIL PROTECTED]>

To support Cooperative Memory Overcommitment (CMO), we need to check
for failure and busy responses from some of the tce hcalls.

These changes for the pseries platform affect the powerpc architecture;
patches for the other affected platforms are included in this patch.

pSeries platform IOMMU code changes:
 * platform TCE functions must handle H_NOT_ENOUGH_RESOURCES errors.
 * platform TCE functions must retry when H_LONG_BUSY_* is returned.
 * platform TCE functions must return error when H_NOT_ENOUGH_RESOURCES
   encountered.

Architecture IOMMU code changes:
 * Calls to ppc_md.tce_build need to check return values and return 
   DMA_MAPPING_ERROR

Architecture changes:
 * struct machdep_calls for tce_build*_pSeriesLP functions need to change
   to indicate failure
 * all other platforms will need updates to iommu functions to match the new
   calling semantics; they will return 0 on success.  The other platforms
   default configs have been built, but no further testing was performed.

Signed-off-by: Robert Jennings <[EMAIL PROTECTED]>

---
 arch/powerpc/kernel/iommu.c            |   71 +++++++++++++++++++++++++++++--
 arch/powerpc/platforms/cell/iommu.c    |    3 +
 arch/powerpc/platforms/iseries/iommu.c |    3 +
 arch/powerpc/platforms/pasemi/iommu.c  |    3 +
 arch/powerpc/platforms/pseries/iommu.c |   76 ++++++++++++++++++++++++++++-----
 arch/powerpc/sysdev/dart_iommu.c       |    3 +
 include/asm-powerpc/machdep.h          |    2 
 7 files changed, 139 insertions(+), 22 deletions(-)

Index: b/arch/powerpc/kernel/iommu.c
===================================================================
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -183,6 +183,49 @@ static unsigned long iommu_range_alloc(s
        return n;
 }
 
+/** iommu_undo - Clear iommu_table bits without calling platform tce_free.
+ *
+ * @tbl - struct iommu_table to alter
+ * @dma_addr - DMA address to free entries for
+ * @npages - number of pages to free entries for
+ *
+ * This is the same as __iommu_free without the call to ppc_md.tce_free();
+ *
+ * To clean up after ppc_md.tce_build() errors we need to clear bits
+ * in the table without calling the ppc_md.tce_free() method; calling
+ * ppc_md.tce_free() could alter entries that were not touched due to a
+ * premature failure in ppc_md.tce_build().
+ *
+ * The ppc_md.tce_build() needs to perform its own clean up prior to
+ * returning its error.
+ */
+static void iommu_undo(struct iommu_table *tbl, dma_addr_t dma_addr,
+                        unsigned int npages)
+{
+       unsigned long entry, free_entry;
+
+       entry = dma_addr >> IOMMU_PAGE_SHIFT;
+       free_entry = entry - tbl->it_offset;
+
+       if (((free_entry + npages) > tbl->it_size) ||
+           (entry < tbl->it_offset)) {
+               if (printk_ratelimit()) {
+                       printk(KERN_INFO "iommu_undo: invalid entry\n");
+                       printk(KERN_INFO "\tentry    = 0x%lx\n", entry);
+                       printk(KERN_INFO "\tdma_addr = 0x%lx\n", (u64)dma_addr);
+                       printk(KERN_INFO "\tTable    = 0x%lx\n", (u64)tbl);
+                       printk(KERN_INFO "\tbus#     = 0x%lx\n", tbl->it_busno);
+                       printk(KERN_INFO "\tsize     = 0x%lx\n", tbl->it_size);
+                       printk(KERN_INFO "\tstartOff = 0x%lx\n", 
tbl->it_offset);
+                       printk(KERN_INFO "\tindex    = 0x%lx\n", tbl->it_index);
+                       WARN_ON(1);
+               }
+               return;
+       }
+
+       iommu_area_free(tbl->it_map, free_entry, npages);
+}
+
 static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
                              void *page, unsigned int npages,
                              enum dma_data_direction direction,
@@ -190,6 +233,7 @@ static dma_addr_t iommu_alloc(struct dev
 {
        unsigned long entry, flags;
        dma_addr_t ret = DMA_ERROR_CODE;
+       int rc;
 
        spin_lock_irqsave(&(tbl->it_lock), flags);
 
@@ -204,9 +248,20 @@ static dma_addr_t iommu_alloc(struct dev
        ret = entry << IOMMU_PAGE_SHIFT;        /* Set the return dma address */
 
        /* Put the TCEs in the HW table */
-       ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & 
IOMMU_PAGE_MASK,
-                        direction);
+       rc = ppc_md.tce_build(tbl, entry, npages,
+                             (unsigned long)page & IOMMU_PAGE_MASK, direction);
 
+       /* ppc_md.tce_build() only returns non-zero for transient errors.
+        * Clean up the table bitmap in this case and return
+        * DMA_ERROR_CODE. For all other errors the functionality is
+        * not altered.
+        */
+       if (unlikely(rc)) {
+               iommu_undo(tbl, ret, npages);
+
+               spin_unlock_irqrestore(&(tbl->it_lock), flags);
+               return DMA_ERROR_CODE;
+       }
 
        /* Flush/invalidate TLB caches if necessary */
        if (ppc_md.tce_flush)
@@ -275,7 +330,7 @@ int iommu_map_sg(struct device *dev, str
        dma_addr_t dma_next = 0, dma_addr;
        unsigned long flags;
        struct scatterlist *s, *outs, *segstart;
-       int outcount, incount, i;
+       int outcount, incount, i, rc = 0;
        unsigned int align;
        unsigned long handle;
        unsigned int max_seg_size;
@@ -336,7 +391,10 @@ int iommu_map_sg(struct device *dev, str
                            npages, entry, dma_addr);
 
                /* Insert into HW table */
-               ppc_md.tce_build(tbl, entry, npages, vaddr & IOMMU_PAGE_MASK, 
direction);
+               rc = ppc_md.tce_build(tbl, entry, npages,
+                                     vaddr & IOMMU_PAGE_MASK, direction);
+               if(unlikely(rc))
+                       goto failure;
 
                /* If we are in an open segment, try merging */
                if (segstart != s) {
@@ -399,7 +457,10 @@ int iommu_map_sg(struct device *dev, str
 
                        vaddr = s->dma_address & IOMMU_PAGE_MASK;
                        npages = iommu_num_pages(s->dma_address, s->dma_length);
-                       __iommu_free(tbl, vaddr, npages);
+                       if (!rc)
+                               __iommu_free(tbl, vaddr, npages);
+                       else
+                               iommu_undo(tbl, vaddr, npages);
                        s->dma_address = DMA_ERROR_CODE;
                        s->dma_length = 0;
                }
Index: b/arch/powerpc/platforms/cell/iommu.c
===================================================================
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -172,7 +172,7 @@ static void invalidate_tce_cache(struct 
        }
 }
 
-static void tce_build_cell(struct iommu_table *tbl, long index, long npages,
+static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
                unsigned long uaddr, enum dma_data_direction direction)
 {
        int i;
@@ -210,6 +210,7 @@ static void tce_build_cell(struct iommu_
 
        pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
                 index, npages, direction, base_pte);
+       return 0;
 }
 
 static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
Index: b/arch/powerpc/platforms/iseries/iommu.c
===================================================================
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -41,7 +41,7 @@
 #include <asm/iseries/hv_call_event.h>
 #include <asm/iseries/iommu.h>
 
-static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
+static int tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
                unsigned long uaddr, enum dma_data_direction direction)
 {
        u64 rc;
@@ -70,6 +70,7 @@ static void tce_build_iSeries(struct iom
                index++;
                uaddr += TCE_PAGE_SIZE;
        }
+       return 0;
 }
 
 static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
Index: b/arch/powerpc/platforms/pasemi/iommu.c
===================================================================
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -83,7 +83,7 @@ static u32 *iob_l2_base;
 static struct iommu_table iommu_table_iobmap;
 static int iommu_table_iobmap_inited;
 
-static void iobmap_build(struct iommu_table *tbl, long index,
+static int iobmap_build(struct iommu_table *tbl, long index,
                         long npages, unsigned long uaddr,
                         enum dma_data_direction direction)
 {
@@ -107,6 +107,7 @@ static void iobmap_build(struct iommu_ta
                uaddr += IOBMAP_PAGE_SIZE;
                bus_addr += IOBMAP_PAGE_SIZE;
        }
+       return 0;
 }
 
 
Index: b/arch/powerpc/platforms/pseries/iommu.c
===================================================================
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -25,6 +25,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/delay.h>
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
@@ -48,7 +49,7 @@
 #include "plpar_wrappers.h"
 
 
-static void tce_build_pSeries(struct iommu_table *tbl, long index,
+static int tce_build_pSeries(struct iommu_table *tbl, long index,
                              long npages, unsigned long uaddr,
                              enum dma_data_direction direction)
 {
@@ -71,6 +72,7 @@ static void tce_build_pSeries(struct iom
                uaddr += TCE_PAGE_SIZE;
                tcep++;
        }
+       return 0;
 }
 
 
@@ -93,13 +95,18 @@ static unsigned long tce_get_pseries(str
        return *tcep;
 }
 
-static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
+static void tce_free_pSeriesLP(struct iommu_table*, long, long);
+static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
+
+static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
                                long npages, unsigned long uaddr,
                                enum dma_data_direction direction)
 {
-       u64 rc;
+       u64 rc = 0;
        u64 proto_tce, tce;
        u64 rpn;
+       int sleep_msecs, ret = 0;
+       long tcenum_start = tcenum, npages_start = npages;
 
        rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
        proto_tce = TCE_PCI_READ;
@@ -108,7 +115,21 @@ static void tce_build_pSeriesLP(struct i
 
        while (npages--) {
                tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
-               rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
+               do {
+                       rc = plpar_tce_put((u64)tbl->it_index,
+                                          (u64)tcenum << 12, tce);
+                       if (unlikely(H_IS_LONG_BUSY(rc))) {
+                               sleep_msecs = plpar_get_longbusy_msecs(rc);
+                               mdelay(sleep_msecs);
+                       }
+               } while (unlikely(H_IS_LONG_BUSY(rc)));
+
+               if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+                       ret = (int)rc;
+                       tce_free_pSeriesLP(tbl, tcenum_start,
+                                          (npages_start - (npages + 1)));
+                       break;
+               }
 
                if (rc && printk_ratelimit()) {
                        printk("tce_build_pSeriesLP: plpar_tce_put failed. 
rc=%ld\n", rc);
@@ -121,19 +142,22 @@ static void tce_build_pSeriesLP(struct i
                tcenum++;
                rpn++;
        }
+       return ret;
 }
 
 static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
 
-static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
                                     long npages, unsigned long uaddr,
                                     enum dma_data_direction direction)
 {
-       u64 rc;
+       u64 rc = 0;
        u64 proto_tce;
        u64 *tcep;
        u64 rpn;
        long l, limit;
+       long tcenum_start = tcenum, npages_start = npages;
+       int sleep_msecs, ret = 0;
 
        if (npages == 1)
                return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
@@ -171,15 +195,26 @@ static void tce_buildmulti_pSeriesLP(str
                        rpn++;
                }
 
-               rc = plpar_tce_put_indirect((u64)tbl->it_index,
-                                           (u64)tcenum << 12,
-                                           (u64)virt_to_abs(tcep),
-                                           limit);
+               do {
+                       rc = plpar_tce_put_indirect(tbl->it_index, tcenum << 12,
+                                                   virt_to_abs(tcep), limit);
+                       if (unlikely(H_IS_LONG_BUSY(rc))) {
+                               sleep_msecs = plpar_get_longbusy_msecs(rc);
+                               mdelay(sleep_msecs);
+                       }
+               } while (unlikely(H_IS_LONG_BUSY(rc)));
 
                npages -= limit;
                tcenum += limit;
        } while (npages > 0 && !rc);
 
+       if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+               ret = (int)rc;
+               tce_freemulti_pSeriesLP(tbl, tcenum_start,
+                                       (npages_start - (npages + limit)));
+               return ret;
+       }
+
        if (rc && printk_ratelimit()) {
                printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. 
rc=%ld\n", rc);
                printk("\tindex   = 0x%lx\n", (u64)tbl->it_index);
@@ -187,14 +222,23 @@ static void tce_buildmulti_pSeriesLP(str
                printk("\ttce[0] val = 0x%lx\n", tcep[0]);
                show_stack(current, (unsigned long *)__get_SP());
        }
+       return ret;
 }
 
 static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long 
npages)
 {
+       int sleep_msecs;
        u64 rc;
 
        while (npages--) {
-               rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0);
+               do {
+                       rc = plpar_tce_put((u64)tbl->it_index,
+                                          (u64)tcenum << 12, 0);
+                       if (unlikely(H_IS_LONG_BUSY(rc))) {
+                               sleep_msecs = plpar_get_longbusy_msecs(rc);
+                               mdelay(sleep_msecs);
+                       }
+               } while (unlikely(H_IS_LONG_BUSY(rc)));
 
                if (rc && printk_ratelimit()) {
                        printk("tce_free_pSeriesLP: plpar_tce_put failed. 
rc=%ld\n", rc);
@@ -210,9 +254,17 @@ static void tce_free_pSeriesLP(struct io
 
 static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long 
npages)
 {
+       int sleep_msecs;
        u64 rc;
 
-       rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
+       do {
+               rc = plpar_tce_stuff((u64)tbl->it_index,
+                                    (u64)tcenum << 12, 0, npages);
+               if (unlikely(H_IS_LONG_BUSY(rc))) {
+                       sleep_msecs = plpar_get_longbusy_msecs(rc);
+                       mdelay(sleep_msecs);
+               }
+       } while (unlikely(H_IS_LONG_BUSY(rc)));
 
        if (rc && printk_ratelimit()) {
                printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
Index: b/arch/powerpc/sysdev/dart_iommu.c
===================================================================
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -147,7 +147,7 @@ static void dart_flush(struct iommu_tabl
        }
 }
 
-static void dart_build(struct iommu_table *tbl, long index,
+static int dart_build(struct iommu_table *tbl, long index,
                       long npages, unsigned long uaddr,
                       enum dma_data_direction direction)
 {
@@ -183,6 +183,7 @@ static void dart_build(struct iommu_tabl
        } else {
                dart_dirty = 1;
        }
+       return 0;
 }
 
 
Index: b/include/asm-powerpc/machdep.h
===================================================================
--- a/include/asm-powerpc/machdep.h
+++ b/include/asm-powerpc/machdep.h
@@ -76,7 +76,7 @@ struct machdep_calls {
         * destroyed as well */
        void            (*hpte_clear_all)(void);
 
-       void            (*tce_build)(struct iommu_table * tbl,
+       int             (*tce_build)(struct iommu_table * tbl,
                                     long index,
                                     long npages,
                                     unsigned long uaddr,
_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev

Reply via email to