From: Robert Jennings <[EMAIL PROTECTED]> To support Cooperative Memory Overcommitment (CMO), we need to check for failure and busy responses from some of the tce hcalls.
These changes for the pseries platform affect the powerpc architecture; patches for the other affected platforms are included in this patch. pSeries platform IOMMU code changes: * platform TCE functions must handle H_NOT_ENOUGH_RESOURCES errors. * platform TCE functions must retry when H_LONG_BUSY_* is returned. * platform TCE functions must return error when H_NOT_ENOUGH_RESOURCES encountered. Architecture IOMMU code changes: * Calls to ppc_md.tce_build need to check return values and return DMA_MAPPING_ERROR Architecture changes: * struct machdep_calls for tce_build*_pSeriesLP functions need to change to indicate failure * all other platforms will need updates to iommu functions to match the new calling semantics; they will return 0 on success. The other platforms default configs have been built, but no further testing was performed. Signed-off-by: Robert Jennings <[EMAIL PROTECTED]> --- arch/powerpc/kernel/iommu.c | 71 +++++++++++++++++++++++++++++-- arch/powerpc/platforms/cell/iommu.c | 3 + arch/powerpc/platforms/iseries/iommu.c | 3 + arch/powerpc/platforms/pasemi/iommu.c | 3 + arch/powerpc/platforms/pseries/iommu.c | 76 ++++++++++++++++++++++++++++----- arch/powerpc/sysdev/dart_iommu.c | 3 + include/asm-powerpc/machdep.h | 2 7 files changed, 139 insertions(+), 22 deletions(-) Index: b/arch/powerpc/kernel/iommu.c =================================================================== --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -183,6 +183,49 @@ static unsigned long iommu_range_alloc(s return n; } +/** iommu_undo - Clear iommu_table bits without calling platform tce_free. + * + * @tbl - struct iommu_table to alter + * @dma_addr - DMA address to free entries for + * @npages - number of pages to free entries for + * + * This is the same as __iommu_free without the call to ppc_md.tce_free(); + * + * To clean up after ppc_md.tce_build() errors we need to clear bits + * in the table without calling the ppc_md.tce_free() method; calling + * ppc_md.tce_free() could alter entries that were not touched due to a + * premature failure in ppc_md.tce_build(). + * + * The ppc_md.tce_build() needs to perform its own clean up prior to + * returning its error. + */ +static void iommu_undo(struct iommu_table *tbl, dma_addr_t dma_addr, + unsigned int npages) +{ + unsigned long entry, free_entry; + + entry = dma_addr >> IOMMU_PAGE_SHIFT; + free_entry = entry - tbl->it_offset; + + if (((free_entry + npages) > tbl->it_size) || + (entry < tbl->it_offset)) { + if (printk_ratelimit()) { + printk(KERN_INFO "iommu_undo: invalid entry\n"); + printk(KERN_INFO "\tentry = 0x%lx\n", entry); + printk(KERN_INFO "\tdma_addr = 0x%lx\n", (u64)dma_addr); + printk(KERN_INFO "\tTable = 0x%lx\n", (u64)tbl); + printk(KERN_INFO "\tbus# = 0x%lx\n", tbl->it_busno); + printk(KERN_INFO "\tsize = 0x%lx\n", tbl->it_size); + printk(KERN_INFO "\tstartOff = 0x%lx\n", tbl->it_offset); + printk(KERN_INFO "\tindex = 0x%lx\n", tbl->it_index); + WARN_ON(1); + } + return; + } + + iommu_area_free(tbl->it_map, free_entry, npages); +} + static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, void *page, unsigned int npages, enum dma_data_direction direction, @@ -190,6 +233,7 @@ static dma_addr_t iommu_alloc(struct dev { unsigned long entry, flags; dma_addr_t ret = DMA_ERROR_CODE; + int rc; spin_lock_irqsave(&(tbl->it_lock), flags); @@ -204,9 +248,20 @@ static dma_addr_t iommu_alloc(struct dev ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */ /* Put the TCEs in the HW table */ - ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & IOMMU_PAGE_MASK, - direction); + rc = ppc_md.tce_build(tbl, entry, npages, + (unsigned long)page & IOMMU_PAGE_MASK, direction); + /* ppc_md.tce_build() only returns non-zero for transient errors. + * Clean up the table bitmap in this case and return + * DMA_ERROR_CODE. For all other errors the functionality is + * not altered. + */ + if (unlikely(rc)) { + iommu_undo(tbl, ret, npages); + + spin_unlock_irqrestore(&(tbl->it_lock), flags); + return DMA_ERROR_CODE; + } /* Flush/invalidate TLB caches if necessary */ if (ppc_md.tce_flush) @@ -275,7 +330,7 @@ int iommu_map_sg(struct device *dev, str dma_addr_t dma_next = 0, dma_addr; unsigned long flags; struct scatterlist *s, *outs, *segstart; - int outcount, incount, i; + int outcount, incount, i, rc = 0; unsigned int align; unsigned long handle; unsigned int max_seg_size; @@ -336,7 +391,10 @@ int iommu_map_sg(struct device *dev, str npages, entry, dma_addr); /* Insert into HW table */ - ppc_md.tce_build(tbl, entry, npages, vaddr & IOMMU_PAGE_MASK, direction); + rc = ppc_md.tce_build(tbl, entry, npages, + vaddr & IOMMU_PAGE_MASK, direction); + if(unlikely(rc)) + goto failure; /* If we are in an open segment, try merging */ if (segstart != s) { @@ -399,7 +457,10 @@ int iommu_map_sg(struct device *dev, str vaddr = s->dma_address & IOMMU_PAGE_MASK; npages = iommu_num_pages(s->dma_address, s->dma_length); - __iommu_free(tbl, vaddr, npages); + if (!rc) + __iommu_free(tbl, vaddr, npages); + else + iommu_undo(tbl, vaddr, npages); s->dma_address = DMA_ERROR_CODE; s->dma_length = 0; } Index: b/arch/powerpc/platforms/cell/iommu.c =================================================================== --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -172,7 +172,7 @@ static void invalidate_tce_cache(struct } } -static void tce_build_cell(struct iommu_table *tbl, long index, long npages, +static int tce_build_cell(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction) { int i; @@ -210,6 +210,7 @@ static void tce_build_cell(struct iommu_ pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n", index, npages, direction, base_pte); + return 0; } static void tce_free_cell(struct iommu_table *tbl, long index, long npages) Index: b/arch/powerpc/platforms/iseries/iommu.c =================================================================== --- a/arch/powerpc/platforms/iseries/iommu.c +++ b/arch/powerpc/platforms/iseries/iommu.c @@ -41,7 +41,7 @@ #include <asm/iseries/hv_call_event.h> #include <asm/iseries/iommu.h> -static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages, +static int tce_build_iSeries(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction) { u64 rc; @@ -70,6 +70,7 @@ static void tce_build_iSeries(struct iom index++; uaddr += TCE_PAGE_SIZE; } + return 0; } static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages) Index: b/arch/powerpc/platforms/pasemi/iommu.c =================================================================== --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -83,7 +83,7 @@ static u32 *iob_l2_base; static struct iommu_table iommu_table_iobmap; static int iommu_table_iobmap_inited; -static void iobmap_build(struct iommu_table *tbl, long index, +static int iobmap_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction) { @@ -107,6 +107,7 @@ static void iobmap_build(struct iommu_ta uaddr += IOBMAP_PAGE_SIZE; bus_addr += IOBMAP_PAGE_SIZE; } + return 0; } Index: b/arch/powerpc/platforms/pseries/iommu.c =================================================================== --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -25,6 +25,7 @@ */ #include <linux/init.h> +#include <linux/delay.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/mm.h> @@ -48,7 +49,7 @@ #include "plpar_wrappers.h" -static void tce_build_pSeries(struct iommu_table *tbl, long index, +static int tce_build_pSeries(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction) { @@ -71,6 +72,7 @@ static void tce_build_pSeries(struct iom uaddr += TCE_PAGE_SIZE; tcep++; } + return 0; } @@ -93,13 +95,18 @@ static unsigned long tce_get_pseries(str return *tcep; } -static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, +static void tce_free_pSeriesLP(struct iommu_table*, long, long); +static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); + +static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, enum dma_data_direction direction) { - u64 rc; + u64 rc = 0; u64 proto_tce, tce; u64 rpn; + int sleep_msecs, ret = 0; + long tcenum_start = tcenum, npages_start = npages; rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; proto_tce = TCE_PCI_READ; @@ -108,7 +115,21 @@ static void tce_build_pSeriesLP(struct i while (npages--) { tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; - rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce); + do { + rc = plpar_tce_put((u64)tbl->it_index, + (u64)tcenum << 12, tce); + if (unlikely(H_IS_LONG_BUSY(rc))) { + sleep_msecs = plpar_get_longbusy_msecs(rc); + mdelay(sleep_msecs); + } + } while (unlikely(H_IS_LONG_BUSY(rc))); + + if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { + ret = (int)rc; + tce_free_pSeriesLP(tbl, tcenum_start, + (npages_start - (npages + 1))); + break; + } if (rc && printk_ratelimit()) { printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); @@ -121,19 +142,22 @@ static void tce_build_pSeriesLP(struct i tcenum++; rpn++; } + return ret; } static DEFINE_PER_CPU(u64 *, tce_page) = NULL; -static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, +static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, enum dma_data_direction direction) { - u64 rc; + u64 rc = 0; u64 proto_tce; u64 *tcep; u64 rpn; long l, limit; + long tcenum_start = tcenum, npages_start = npages; + int sleep_msecs, ret = 0; if (npages == 1) return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, @@ -171,15 +195,26 @@ static void tce_buildmulti_pSeriesLP(str rpn++; } - rc = plpar_tce_put_indirect((u64)tbl->it_index, - (u64)tcenum << 12, - (u64)virt_to_abs(tcep), - limit); + do { + rc = plpar_tce_put_indirect(tbl->it_index, tcenum << 12, + virt_to_abs(tcep), limit); + if (unlikely(H_IS_LONG_BUSY(rc))) { + sleep_msecs = plpar_get_longbusy_msecs(rc); + mdelay(sleep_msecs); + } + } while (unlikely(H_IS_LONG_BUSY(rc))); npages -= limit; tcenum += limit; } while (npages > 0 && !rc); + if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { + ret = (int)rc; + tce_freemulti_pSeriesLP(tbl, tcenum_start, + (npages_start - (npages + limit))); + return ret; + } + if (rc && printk_ratelimit()) { printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); printk("\tindex = 0x%lx\n", (u64)tbl->it_index); @@ -187,14 +222,23 @@ static void tce_buildmulti_pSeriesLP(str printk("\ttce[0] val = 0x%lx\n", tcep[0]); show_stack(current, (unsigned long *)__get_SP()); } + return ret; } static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) { + int sleep_msecs; u64 rc; while (npages--) { - rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0); + do { + rc = plpar_tce_put((u64)tbl->it_index, + (u64)tcenum << 12, 0); + if (unlikely(H_IS_LONG_BUSY(rc))) { + sleep_msecs = plpar_get_longbusy_msecs(rc); + mdelay(sleep_msecs); + } + } while (unlikely(H_IS_LONG_BUSY(rc))); if (rc && printk_ratelimit()) { printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); @@ -210,9 +254,17 @@ static void tce_free_pSeriesLP(struct io static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) { + int sleep_msecs; u64 rc; - rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages); + do { + rc = plpar_tce_stuff((u64)tbl->it_index, + (u64)tcenum << 12, 0, npages); + if (unlikely(H_IS_LONG_BUSY(rc))) { + sleep_msecs = plpar_get_longbusy_msecs(rc); + mdelay(sleep_msecs); + } + } while (unlikely(H_IS_LONG_BUSY(rc))); if (rc && printk_ratelimit()) { printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n"); Index: b/arch/powerpc/sysdev/dart_iommu.c =================================================================== --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -147,7 +147,7 @@ static void dart_flush(struct iommu_tabl } } -static void dart_build(struct iommu_table *tbl, long index, +static int dart_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction) { @@ -183,6 +183,7 @@ static void dart_build(struct iommu_tabl } else { dart_dirty = 1; } + return 0; } Index: b/include/asm-powerpc/machdep.h =================================================================== --- a/include/asm-powerpc/machdep.h +++ b/include/asm-powerpc/machdep.h @@ -76,7 +76,7 @@ struct machdep_calls { * destroyed as well */ void (*hpte_clear_all)(void); - void (*tce_build)(struct iommu_table * tbl, + int (*tce_build)(struct iommu_table * tbl, long index, long npages, unsigned long uaddr, _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev