On 08/10/2018 09:02, Christoph Hellwig wrote:
Handle architectures that are not cache coherent directly in the main
swiotlb code by calling arch_sync_dma_for_{device,cpu} in all the right
places from the various dma_map/unmap/sync methods when the device is
non-coherent.

Because swiotlb now uses dma_direct_alloc for the coherent allocation
that side is already taken care of by the dma-direct code calling into
arch_dma_{alloc,free} for devices that are non-coherent.

Signed-off-by: Christoph Hellwig <h...@lst.de>
---
  kernel/dma/swiotlb.c | 23 +++++++++++++++++------
  1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 475a41eff3dc..52885b274368 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -21,6 +21,7 @@
#include <linux/cache.h>
  #include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
  #include <linux/mm.h>
  #include <linux/export.h>
  #include <linux/spinlock.h>
@@ -677,6 +678,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct 
page *page,
                dma_addr = swiotlb_bounce_page(dev, &phys, size, dir, attrs);
        }
+ if (!dev_is_dma_coherent(dev) &&
+           (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)

Nit: other parts of the file are already using the "!(...)" style rather than "(...) == 0".

+               arch_sync_dma_for_device(dev, phys, size, dir);
+
        return dma_addr;
  }
@@ -696,6 +701,10 @@ void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, BUG_ON(dir == DMA_NONE); + if (!dev_is_dma_coherent(hwdev) &&
+           (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
+               arch_sync_dma_for_cpu(hwdev, paddr, size, dir);
+
        if (is_swiotlb_buffer(paddr)) {
                swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
                return;
@@ -732,15 +741,17 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t 
dev_addr,
BUG_ON(dir == DMA_NONE); - if (is_swiotlb_buffer(paddr)) {
+       if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_CPU)
+               arch_sync_dma_for_cpu(hwdev, paddr, size, dir);
+
+       if (is_swiotlb_buffer(paddr))
                swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
-               return;
-       }
- if (dir != DMA_FROM_DEVICE)
-               return;
+       if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_DEVICE)
+               arch_sync_dma_for_device(hwdev, paddr, size, dir);
- dma_mark_clean(phys_to_virt(paddr), size);
+       if (!is_swiotlb_buffer(paddr) && dir == DMA_FROM_DEVICE)
+               dma_mark_clean(phys_to_virt(paddr), size);
  }

All these "if"s end up pretty hard to follow at first glance :(

I had a quick play at moving the cache maintenance here out into the callers, which comes out arguably looking perhaps a little cleaner (only +1 source line overall, and actually reduces text size by 32 bytes for my build), but sadly I can't really see any way of doing the equivalent for map/unmap short of duplicating the whole 3-line arch_sync_*() block, which just makes for a different readability problem. As you mentioned on patch #7, I guess this really is just one of those things which has no nice solution, so cosmetics aside,

Reviewed-by: Robin Murphy <robin.mur...@arm.com>

FWIW, below is my "cleanup" attempt (diff on top of the swiotlb-noncoherent.3 branch).

Robin.

----->8-----
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 52885b274368..43ee29969fdd 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -741,23 +741,24 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,

        BUG_ON(dir == DMA_NONE);

-       if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_CPU)
-               arch_sync_dma_for_cpu(hwdev, paddr, size, dir);
-
-       if (is_swiotlb_buffer(paddr))
+       if (is_swiotlb_buffer(paddr)) {
                swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
+               return;
+       }

-       if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_DEVICE)
-               arch_sync_dma_for_device(hwdev, paddr, size, dir);
+       if (dir != DMA_FROM_DEVICE)
+               return;

-       if (!is_swiotlb_buffer(paddr) && dir == DMA_FROM_DEVICE)
-               dma_mark_clean(phys_to_virt(paddr), size);
+       dma_mark_clean(phys_to_virt(paddr), size);
 }

 void
 swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
                            size_t size, enum dma_data_direction dir)
 {
+       if (!dev_is_dma_coherent(hwdev))
+               arch_sync_dma_for_cpu(hwdev, dma_to_phys(hwdev, dev_addr),
+                                     size, dir);
        swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
 }

@@ -766,6 +767,9 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
                               size_t size, enum dma_data_direction dir)
 {
        swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
+       if (!dev_is_dma_coherent(hwdev))
+               arch_sync_dma_for_device(hwdev, dma_to_phys(hwdev, dev_addr),
+                                        size, dir);
 }

 /*
@@ -828,31 +832,28 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
  * and usage.
  */
-static void
-swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
-               int nelems, enum dma_data_direction dir,
-               enum dma_sync_target target)
+void
+swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sgl,
+                       int nelems, enum dma_data_direction dir)
 {
        struct scatterlist *sg;
        int i;

        for_each_sg(sgl, sg, nelems, i)
-               swiotlb_sync_single(hwdev, sg->dma_address,
-                                   sg_dma_len(sg), dir, target);
+               swiotlb_sync_single_for_cpu(hwdev, sg->dma_address,
+                                           sg_dma_len(sg), dir);
 }

 void
-swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
-                       int nelems, enum dma_data_direction dir)
-{
-       swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
-}
-
-void
-swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
+swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sgl,
                           int nelems, enum dma_data_direction dir)
 {
-       swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
+       struct scatterlist *sg;
+       int i;
+
+       for_each_sg(sgl, sg, nelems, i)
+               swiotlb_sync_single_for_device(hwdev, sg->dma_address,
+                                              sg_dma_len(sg), dir);
 }

 /*
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to