Hi,

Some comments and questions below.


-Olof

On Thu, Jun 12, 2008 at 05:19:36PM -0500, Robert Jennings wrote:
> Index: b/arch/powerpc/kernel/iommu.c
> ===================================================================
> --- a/arch/powerpc/kernel/iommu.c
> +++ b/arch/powerpc/kernel/iommu.c
> @@ -183,6 +183,49 @@ static unsigned long iommu_range_alloc(s
>       return n;
>  }
>  
> +/** iommu_undo - Clear iommu_table bits without calling platform tce_free.
> + *
> + * @tbl - struct iommu_table to alter
> + * @dma_addr - DMA address to free entries for
> + * @npages - number of pages to free entries for
> + *
> + * This is the same as __iommu_free without the call to ppc_md.tce_free();

__iommu_free has the __ prepended to indicate that it's not locking.
Since this does the same, please keep the __. Also see comments below.

> + *
> + * To clean up after ppc_md.tce_build() errors we need to clear bits
> + * in the table without calling the ppc_md.tce_free() method; calling
> + * ppc_md.tce_free() could alter entries that were not touched due to a
> + * premature failure in ppc_md.tce_build().
> + *
> + * The ppc_md.tce_build() needs to perform its own clean up prior to
> + * returning its error.
> + */
> +static void iommu_undo(struct iommu_table *tbl, dma_addr_t dma_addr,
> +                      unsigned int npages)
> +{
> +     unsigned long entry, free_entry;
> +
> +     entry = dma_addr >> IOMMU_PAGE_SHIFT;
> +     free_entry = entry - tbl->it_offset;
> +
> +     if (((free_entry + npages) > tbl->it_size) ||
> +         (entry < tbl->it_offset)) {
> +             if (printk_ratelimit()) {
> +                     printk(KERN_INFO "iommu_undo: invalid entry\n");
> +                     printk(KERN_INFO "\tentry    = 0x%lx\n", entry);
> +                     printk(KERN_INFO "\tdma_addr = 0x%lx\n", (u64)dma_addr);
> +                     printk(KERN_INFO "\tTable    = 0x%lx\n", (u64)tbl);
> +                     printk(KERN_INFO "\tbus#     = 0x%lx\n", tbl->it_busno);
> +                     printk(KERN_INFO "\tsize     = 0x%lx\n", tbl->it_size);
> +                     printk(KERN_INFO "\tstartOff = 0x%lx\n", 
> tbl->it_offset);
> +                     printk(KERN_INFO "\tindex    = 0x%lx\n", tbl->it_index);
> +                     WARN_ON(1);
> +             }
> +             return;
> +     }
> +
> +     iommu_area_free(tbl->it_map, free_entry, npages);
> +}

Ick, This should just be refactored to reuse code together with
iommu_free() instead of duplicating it. Also, the error checking
shouldn't be needed here.

Actually, is there harm in calling tce_free for these cases anyway? I'm
guessing it's not a performance critical path.

> @@ -275,7 +330,7 @@ int iommu_map_sg(struct device *dev, str
>       dma_addr_t dma_next = 0, dma_addr;
>       unsigned long flags;
>       struct scatterlist *s, *outs, *segstart;
> -     int outcount, incount, i;
> +     int outcount, incount, i, rc = 0;
>       unsigned int align;
>       unsigned long handle;
>       unsigned int max_seg_size;
> @@ -336,7 +391,10 @@ int iommu_map_sg(struct device *dev, str
>                           npages, entry, dma_addr);
>  
>               /* Insert into HW table */
> -             ppc_md.tce_build(tbl, entry, npages, vaddr & IOMMU_PAGE_MASK, 
> direction);
> +             rc = ppc_md.tce_build(tbl, entry, npages,
> +                                   vaddr & IOMMU_PAGE_MASK, direction);
> +             if(unlikely(rc))
> +                     goto failure;
>  
>               /* If we are in an open segment, try merging */
>               if (segstart != s) {
> @@ -399,7 +457,10 @@ int iommu_map_sg(struct device *dev, str
>  
>                       vaddr = s->dma_address & IOMMU_PAGE_MASK;
>                       npages = iommu_num_pages(s->dma_address, s->dma_length);
> -                     __iommu_free(tbl, vaddr, npages);
> +                     if (!rc)
> +                             __iommu_free(tbl, vaddr, npages);
> +                     else
> +                             iommu_undo(tbl, vaddr, npages);

'rc' is a quite generic name to carry state this far away from where
it's set. Either a more descriptive name (build_fail, whatever), or if
the above is true, just call __iommu_free here as well.

> -static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
> +static void tce_free_pSeriesLP(struct iommu_table*, long, long);
> +static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
> +
> +static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
>                               long npages, unsigned long uaddr,
>                               enum dma_data_direction direction)
>  {
> -     u64 rc;
> +     u64 rc = 0;
>       u64 proto_tce, tce;
>       u64 rpn;
> +     int sleep_msecs, ret = 0;
> +     long tcenum_start = tcenum, npages_start = npages;
>  
>       rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
>       proto_tce = TCE_PCI_READ;
> @@ -108,7 +115,21 @@ static void tce_build_pSeriesLP(struct i
>  
>       while (npages--) {
>               tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
> -             rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
> +             do {
> +                     rc = plpar_tce_put((u64)tbl->it_index,
> +                                        (u64)tcenum << 12, tce);
> +                     if (unlikely(H_IS_LONG_BUSY(rc))) {
> +                             sleep_msecs = plpar_get_longbusy_msecs(rc);
> +                             mdelay(sleep_msecs);

Ouch! You're holding locks and stuff here. Do you really want this right
here?

> +                     }
> +             } while (unlikely(H_IS_LONG_BUSY(rc)));

Do you also want to keep doing this forever, or eventually just fail
instead?

> +             if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
> +                     ret = (int)rc;
> +                     tce_free_pSeriesLP(tbl, tcenum_start,
> +                                        (npages_start - (npages + 1)));
> +                     break;
> +             }
>  
>               if (rc && printk_ratelimit()) {
>                       printk("tce_build_pSeriesLP: plpar_tce_put failed. 
> rc=%ld\n", rc);
> @@ -121,19 +142,22 @@ static void tce_build_pSeriesLP(struct i
>               tcenum++;
>               rpn++;
>       }
> +     return ret;
>  }
>  
>  static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
>  
> -static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
> +static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
>                                    long npages, unsigned long uaddr,
>                                    enum dma_data_direction direction)
>  {
> -     u64 rc;
> +     u64 rc = 0;
>       u64 proto_tce;
>       u64 *tcep;
>       u64 rpn;
>       long l, limit;
> +     long tcenum_start = tcenum, npages_start = npages;
> +     int sleep_msecs, ret = 0;
>  
>       if (npages == 1)
>               return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
> @@ -171,15 +195,26 @@ static void tce_buildmulti_pSeriesLP(str
>                       rpn++;
>               }
>  
> -             rc = plpar_tce_put_indirect((u64)tbl->it_index,
> -                                         (u64)tcenum << 12,
> -                                         (u64)virt_to_abs(tcep),
> -                                         limit);
> +             do {
> +                     rc = plpar_tce_put_indirect(tbl->it_index, tcenum << 12,
> +                                                 virt_to_abs(tcep), limit);
> +                     if (unlikely(H_IS_LONG_BUSY(rc))) {
> +                             sleep_msecs = plpar_get_longbusy_msecs(rc);
> +                             mdelay(sleep_msecs);
> +                     }
> +             } while (unlikely(H_IS_LONG_BUSY(rc)));
>  
>               npages -= limit;
>               tcenum += limit;
>       } while (npages > 0 && !rc);
>  
> +     if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
> +             ret = (int)rc;
> +             tce_freemulti_pSeriesLP(tbl, tcenum_start,
> +                                     (npages_start - (npages + limit)));
> +             return ret;
> +     }
> +
>       if (rc && printk_ratelimit()) {
>               printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. 
> rc=%ld\n", rc);
>               printk("\tindex   = 0x%lx\n", (u64)tbl->it_index);
> @@ -187,14 +222,23 @@ static void tce_buildmulti_pSeriesLP(str
>               printk("\ttce[0] val = 0x%lx\n", tcep[0]);
>               show_stack(current, (unsigned long *)__get_SP());
>       }
> +     return ret;
>  }
>  
>  static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long 
> npages)
>  {
> +     int sleep_msecs;
>       u64 rc;
>  
>       while (npages--) {
> -             rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0);
> +             do {
> +                     rc = plpar_tce_put((u64)tbl->it_index,
> +                                        (u64)tcenum << 12, 0);
> +                     if (unlikely(H_IS_LONG_BUSY(rc))) {
> +                             sleep_msecs = plpar_get_longbusy_msecs(rc);
> +                             mdelay(sleep_msecs);
> +                     }
> +             } while (unlikely(H_IS_LONG_BUSY(rc)));

Can this ever happen? I would hope that any entry that's got an active
mapping is actually pinned in memory, what other than paging in from
disk can result in long busy?

> @@ -210,9 +254,17 @@ static void tce_free_pSeriesLP(struct io
>  
>  static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, 
> long npages)
>  {
> +     int sleep_msecs;
>       u64 rc;
>  
> -     rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
> +     do {
> +             rc = plpar_tce_stuff((u64)tbl->it_index,
> +                                  (u64)tcenum << 12, 0, npages);
> +             if (unlikely(H_IS_LONG_BUSY(rc))) {
> +                     sleep_msecs = plpar_get_longbusy_msecs(rc);
> +                     mdelay(sleep_msecs);
> +             }
> +     } while (unlikely(H_IS_LONG_BUSY(rc)));
>  
>       if (rc && printk_ratelimit()) {
>               printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev

Reply via email to