Re: use generic DMA mapping code in powerpc V4

2018-12-16 Thread Christoph Hellwig
FYI, given that we are one week before the expected 4.20 release
date and I haven't found the bug plaging Christians setups I think
we need to defer most of this to the next merge window.

I'd still like to get a few bits in earlier, which I will send out
separately now.


[PATCH] powerpc: use mm zones more sensibly

2018-12-16 Thread Christoph Hellwig
Powerpc has somewhat odd usage where ZONE_DMA is used for all memory on
common 64-bit configfs, and ZONE_DMA32 is used for 31-bit schemes.

Move to a scheme closer to what other architectures use (and I dare to
say the intent of the system):

 - ZONE_DMA: optionally for memory < 31-bit (64-bit embedded only)
 - ZONE_NORMAL: everything addressable by the kernel
 - ZONE_HIGHMEM: memory > 32-bit for 32-bit kernels

Also provide information on how ZONE_DMA is used by defining
ARCH_ZONE_DMA_BITS.

Contains various fixes from Benjamin Herrenschmidt.

Signed-off-by: Christoph Hellwig 
---
 arch/powerpc/Kconfig  |  8 +---
 arch/powerpc/include/asm/page.h   |  2 +
 arch/powerpc/include/asm/pgtable.h|  1 -
 arch/powerpc/kernel/dma-swiotlb.c |  6 +--
 arch/powerpc/kernel/dma.c |  7 +--
 arch/powerpc/mm/mem.c | 47 +++
 arch/powerpc/platforms/85xx/corenet_generic.c | 10 
 arch/powerpc/platforms/85xx/qemu_e500.c   |  9 
 include/linux/mmzone.h|  2 +-
 9 files changed, 25 insertions(+), 67 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4bc8edd83cee..964e22e3b8d7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -373,9 +373,9 @@ config PPC_ADV_DEBUG_DAC_RANGE
depends on PPC_ADV_DEBUG_REGS && 44x
default y
 
-config ZONE_DMA32
+config ZONE_DMA
bool
-   default y if PPC64
+   default y if PPC_BOOK3E_64
 
 config PGTABLE_LEVELS
int
@@ -868,10 +868,6 @@ config ISA
  have an IBM RS/6000 or pSeries machine, say Y.  If you have an
  embedded board, consult your board documentation.
 
-config ZONE_DMA
-   bool
-   default y
-
 config GENERIC_ISA_DMA
bool
depends on ISA_DMA_API
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index f6a1265face2..fc8c9ac0c6be 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -354,4 +354,6 @@ typedef struct page *pgtable_t;
 #endif /* __ASSEMBLY__ */
 #include 
 
+#define ARCH_ZONE_DMA_BITS 31
+
 #endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 9679b7519a35..8af32ce93c7f 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -66,7 +66,6 @@ extern unsigned long empty_zero_page[];
 
 extern pgd_t swapper_pg_dir[];
 
-void limit_zone_pfn(enum zone_type zone, unsigned long max_pfn);
 int dma_pfn_limit_to_zone(u64 pfn_limit);
 extern void paging_init(void);
 
diff --git a/arch/powerpc/kernel/dma-swiotlb.c 
b/arch/powerpc/kernel/dma-swiotlb.c
index 430a7d0aa2cb..7d5fc9751622 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -107,12 +107,8 @@ int __init swiotlb_setup_bus_notifier(void)
 
 void __init swiotlb_detect_4g(void)
 {
-   if ((memblock_end_of_DRAM() - 1) > 0x) {
+   if ((memblock_end_of_DRAM() - 1) > 0x)
ppc_swiotlb_enable = 1;
-#ifdef CONFIG_ZONE_DMA32
-   limit_zone_pfn(ZONE_DMA32, (1ULL << 32) >> PAGE_SHIFT);
-#endif
-   }
 }
 
 static int __init check_swiotlb_enabled(void)
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index d442d23e182b..f55bc60274b9 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -50,7 +50,7 @@ static int dma_nommu_dma_supported(struct device *dev, u64 
mask)
return 1;
 
 #ifdef CONFIG_FSL_SOC
-   /* Freescale gets another chance via ZONE_DMA/ZONE_DMA32, however
+   /* Freescale gets another chance via ZONE_DMA, however
 * that will have to be refined if/when they support iommus
 */
return 1;
@@ -94,13 +94,10 @@ void *__dma_nommu_alloc_coherent(struct device *dev, size_t 
size,
}
 
switch (zone) {
+#ifdef CONFIG_ZONE_DMA
case ZONE_DMA:
flag |= GFP_DMA;
break;
-#ifdef CONFIG_ZONE_DMA32
-   case ZONE_DMA32:
-   flag |= GFP_DMA32;
-   break;
 #endif
};
 #endif /* CONFIG_FSL_SOC */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 0a64fffabee1..c0b676c3a5ba 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -246,35 +246,19 @@ static int __init mark_nonram_nosave(void)
 }
 #endif
 
-static bool zone_limits_final;
-
 /*
- * The memory zones past TOP_ZONE are managed by generic mm code.
- * These should be set to zero since that's what every other
- * architecture does.
+ * Zones usage:
+ *
+ * We setup ZONE_DMA to be 31-bits on all platforms and ZONE_NORMAL to be
+ * everything else. GFP_DMA32 page allocations automatically fall back to
+ * ZONE_DMA.
+ *
+ * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to
+ * inform the generic DMA mapping code.  32-bit only devices (if not handled
+ * by an IOMMU anywa

[PATCH 1/8] powerpc: allow NOT_COHERENT_CACHE for amigaone

2018-12-16 Thread Christoph Hellwig
AMIGAONE selects NOT_COHERENT_CACHE, so we better allow it.

Signed-off-by: Christoph Hellwig 
---
 arch/powerpc/platforms/Kconfig.cputype | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/Kconfig.cputype 
b/arch/powerpc/platforms/Kconfig.cputype
index 7e130035bbba..ab176fd3dfb5 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -400,7 +400,8 @@ config NR_CPUS
 
 config NOT_COHERENT_CACHE
bool
-   depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
+   depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || \
+   GAMECUBE_COMMON || AMIGAONE
default n if PPC_47x
default y
 
-- 
2.19.2



small powerpc DMA fixes and cleanups

2018-12-16 Thread Christoph Hellwig
Hi all,

this series has a few smaller fixes and cleanups for the powerpc
DMA code.

The biggest changes are related to the not cache coherent DMA
support, which as far as I can tell was not consistent in its
cache maintainance operations.  Also the AMCC ppc44x crypto
driver looks like it was affected by that (or otherwise just
odd) and used a helper internal to the powerpc DMA code to
work around that fact.  I've added Christian Lamparter who has
been trying to beat that driver into shape in the last years
for that reason.


[PATCH 2/8] powerpc/dma: properly wire up the unmap_page and unmap_sg methods

2018-12-16 Thread Christoph Hellwig
The unmap methods need to transfer memory ownership back from the device
to the cpu by identical means as dma_sync_*_to_cpu.  I'm not sure powerpc
needs to do any work in this transfer direction, but given that it does
invalidate the caches in dma_sync_*_to_cpu already we should make sure
we also do so on unmapping.

Signed-off-by: Christoph Hellwig 
---
 arch/powerpc/kernel/dma.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index dbfc7056d7df..d442d23e182b 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -210,10 +210,15 @@ static int dma_nommu_map_sg(struct device *dev, struct 
scatterlist *sgl,
return nents;
 }
 
-static void dma_nommu_unmap_sg(struct device *dev, struct scatterlist *sg,
+static void dma_nommu_unmap_sg(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction direction,
unsigned long attrs)
 {
+   struct scatterlist *sg;
+   int i;
+
+   for_each_sg(sgl, sg, nents, i)
+   __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
 }
 
 static u64 dma_nommu_get_required_mask(struct device *dev)
@@ -247,6 +252,8 @@ static inline void dma_nommu_unmap_page(struct device *dev,
 enum dma_data_direction direction,
 unsigned long attrs)
 {
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   __dma_sync(bus_to_virt(dma_address), size, dir);
 }
 
 #ifdef CONFIG_NOT_COHERENT_CACHE
-- 
2.19.2



[PATCH 3/8] crypto4xx_core: don't abuse __dma_sync_page

2018-12-16 Thread Christoph Hellwig
This function is internal to the DMA API implementation.  Instead use the
DMA API to properly unmap.  Note that the DMA API usage in this driver
is a disaster and urgently needs some work - it is missing all the unmaps,
seems to do a secondary map where it looks like it should to a unmap
in one place to work around cache coherency and the directions passed in
seem to be partially wrong.

Signed-off-by: Christoph Hellwig 
---
 drivers/crypto/amcc/crypto4xx_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/amcc/crypto4xx_core.c 
b/drivers/crypto/amcc/crypto4xx_core.c
index 6eaec9ba0f68..63cb6956c948 100644
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -596,7 +596,7 @@ static void crypto4xx_aead_done(struct crypto4xx_device 
*dev,
  pd->pd_ctl_len.bf.pkt_len,
  dst);
} else {
-   __dma_sync_page(sg_page(dst), dst->offset, dst->length,
+   dma_unmap_page(dev->core_dev->device, pd->dest, dst->length,
DMA_FROM_DEVICE);
}
 
-- 
2.19.2



[PATCH 4/8] powerpc/dma: remove the unused ARCH_HAS_DMA_MMAP_COHERENT define

2018-12-16 Thread Christoph Hellwig
Signed-off-by: Christoph Hellwig 
Acked-by: Benjamin Herrenschmidt 
---
 arch/powerpc/include/asm/dma-mapping.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/include/asm/dma-mapping.h 
b/arch/powerpc/include/asm/dma-mapping.h
index 8fa394520af6..f2a4a7142b1e 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -112,7 +112,5 @@ extern int dma_set_mask(struct device *dev, u64 dma_mask);
 
 extern u64 __dma_get_required_mask(struct device *dev);
 
-#define ARCH_HAS_DMA_MMAP_COHERENT
-
 #endif /* __KERNEL__ */
 #endif /* _ASM_DMA_MAPPING_H */
-- 
2.19.2



[PATCH 6/8] powerpc/dma: remove the unused dma_iommu_ops export

2018-12-16 Thread Christoph Hellwig
Signed-off-by: Christoph Hellwig 
---
 arch/powerpc/kernel/dma-iommu.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index f9fe2080ceb9..2ca6cfaebf65 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -6,7 +6,6 @@
  * busses using the iommu infrastructure
  */
 
-#include 
 #include 
 
 /*
@@ -123,4 +122,3 @@ struct dma_map_ops dma_iommu_ops = {
.get_required_mask  = dma_iommu_get_required_mask,
.mapping_error  = dma_iommu_mapping_error,
 };
-EXPORT_SYMBOL(dma_iommu_ops);
-- 
2.19.2



[PATCH 5/8] powerpc/dma: remove the unused ISA_DMA_THRESHOLD export

2018-12-16 Thread Christoph Hellwig
Signed-off-by: Christoph Hellwig 
Acked-by: Benjamin Herrenschmidt 
---
 arch/powerpc/kernel/setup_32.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index be6ee8dec98d..947f904688b0 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -59,7 +59,6 @@ unsigned long ISA_DMA_THRESHOLD;
 unsigned int DMA_MODE_READ;
 unsigned int DMA_MODE_WRITE;
 
-EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
 EXPORT_SYMBOL(DMA_MODE_READ);
 EXPORT_SYMBOL(DMA_MODE_WRITE);
 
-- 
2.19.2



[PATCH 7/8] powerpc/dma: split the two __dma_alloc_coherent implementations

2018-12-16 Thread Christoph Hellwig
The implemementation for the CONFIG_NOT_COHERENT_CACHE case doesn't share
any code with the one for systems with coherent caches.  Split it off
and merge it with the helpers in dma-noncoherent.c that have no other
callers.

Signed-off-by: Christoph Hellwig 
Acked-by: Benjamin Herrenschmidt 
---
 arch/powerpc/include/asm/dma-mapping.h |  5 -
 arch/powerpc/kernel/dma.c  | 14 ++
 arch/powerpc/mm/dma-noncoherent.c  | 15 +++
 arch/powerpc/platforms/44x/warp.c  |  2 +-
 4 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/dma-mapping.h 
b/arch/powerpc/include/asm/dma-mapping.h
index f2a4a7142b1e..dacd0f93f2b2 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -39,9 +39,6 @@ extern int dma_nommu_mmap_coherent(struct device *dev,
  * to ensure it is consistent.
  */
 struct device;
-extern void *__dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *handle, gfp_t gfp);
-extern void __dma_free_coherent(size_t size, void *vaddr);
 extern void __dma_sync(void *vaddr, size_t size, int direction);
 extern void __dma_sync_page(struct page *page, unsigned long offset,
 size_t size, int direction);
@@ -52,8 +49,6 @@ extern unsigned long __dma_get_coherent_pfn(unsigned long 
cpu_addr);
  * Cache coherent cores.
  */
 
-#define __dma_alloc_coherent(dev, gfp, size, handle)   NULL
-#define __dma_free_coherent(size, addr)((void)0)
 #define __dma_sync(addr, size, rw) ((void)0)
 #define __dma_sync_page(pg, off, sz, rw)   ((void)0)
 
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index d442d23e182b..270b2911c437 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -62,18 +62,12 @@ static int dma_nommu_dma_supported(struct device *dev, u64 
mask)
 #endif
 }
 
+#ifndef CONFIG_NOT_COHERENT_CACHE
 void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
  dma_addr_t *dma_handle, gfp_t flag,
  unsigned long attrs)
 {
void *ret;
-#ifdef CONFIG_NOT_COHERENT_CACHE
-   ret = __dma_alloc_coherent(dev, size, dma_handle, flag);
-   if (ret == NULL)
-   return NULL;
-   *dma_handle += get_dma_offset(dev);
-   return ret;
-#else
struct page *page;
int node = dev_to_node(dev);
 #ifdef CONFIG_FSL_SOC
@@ -113,19 +107,15 @@ void *__dma_nommu_alloc_coherent(struct device *dev, 
size_t size,
*dma_handle = __pa(ret) + get_dma_offset(dev);
 
return ret;
-#endif
 }
 
 void __dma_nommu_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle,
unsigned long attrs)
 {
-#ifdef CONFIG_NOT_COHERENT_CACHE
-   __dma_free_coherent(size, vaddr);
-#else
free_pages((unsigned long)vaddr, get_order(size));
-#endif
 }
+#endif /* !CONFIG_NOT_COHERENT_CACHE */
 
 static void *dma_nommu_alloc_coherent(struct device *dev, size_t size,
   dma_addr_t *dma_handle, gfp_t flag,
diff --git a/arch/powerpc/mm/dma-noncoherent.c 
b/arch/powerpc/mm/dma-noncoherent.c
index b6e7b5952ab5..e955539686a4 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -29,7 +29,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 
 #include 
@@ -151,8 +151,8 @@ static struct ppc_vm_region *ppc_vm_region_find(struct 
ppc_vm_region *head, unsi
  * Allocate DMA-coherent memory space and return both the kernel remapped
  * virtual and bus address for that space.
  */
-void *
-__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, 
gfp_t gfp)
+void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
+   dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
struct page *page;
struct ppc_vm_region *c;
@@ -223,7 +223,7 @@ __dma_alloc_coherent(struct device *dev, size_t size, 
dma_addr_t *handle, gfp_t
/*
 * Set the "dma handle"
 */
-   *handle = page_to_phys(page);
+   *dma_handle = phys_to_dma(dev, page_to_phys(page));
 
do {
SetPageReserved(page);
@@ -249,12 +249,12 @@ __dma_alloc_coherent(struct device *dev, size_t size, 
dma_addr_t *handle, gfp_t
  no_page:
return NULL;
 }
-EXPORT_SYMBOL(__dma_alloc_coherent);
 
 /*
  * free a page as defined by the above mapping.
  */
-void __dma_free_coherent(size_t size, void *vaddr)
+void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
+   dma_addr_t dma_handle, unsigned long attrs)
 {
struct ppc_vm_region *c;
unsigned long flags, addr;
@@ -309,7 +309,6 @@ void __dma_free_coherent(size_t size, void *vaddr)
   __func__, 

[PATCH 8/8] cxl: drop the dma_set_mask callback from vphb

2018-12-16 Thread Christoph Hellwig
The CXL code never even looks at the dma mask, so there is no good
reason for this sanity check.  Remove it because it gets in the way
of the dma ops refactoring.

Signed-off-by: Christoph Hellwig 
---
 drivers/misc/cxl/vphb.c | 12 
 1 file changed, 12 deletions(-)

diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
index 7908633d9204..49da2f744bbf 100644
--- a/drivers/misc/cxl/vphb.c
+++ b/drivers/misc/cxl/vphb.c
@@ -11,17 +11,6 @@
 #include 
 #include "cxl.h"
 
-static int cxl_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
-{
-   if (dma_mask < DMA_BIT_MASK(64)) {
-   pr_info("%s only 64bit DMA supported on CXL", __func__);
-   return -EIO;
-   }
-
-   *(pdev->dev.dma_mask) = dma_mask;
-   return 0;
-}
-
 static int cxl_pci_probe_mode(struct pci_bus *bus)
 {
return PCI_PROBE_NORMAL;
@@ -220,7 +209,6 @@ static struct pci_controller_ops cxl_pci_controller_ops =
.reset_secondary_bus = cxl_pci_reset_secondary_bus,
.setup_msi_irqs = cxl_setup_msi_irqs,
.teardown_msi_irqs = cxl_teardown_msi_irqs,
-   .dma_set_mask = cxl_dma_set_mask,
 };
 
 int cxl_pci_vphb_add(struct cxl_afu *afu)
-- 
2.19.2



[PATCH] powerpc/ptrace: cleanup do_syscall_trace_enter

2018-12-16 Thread Dmitry V. Levin
Invoke tracehook_report_syscall_entry once.

Signed-off-by: Dmitry V. Levin 
---
 arch/powerpc/kernel/ptrace.c | 54 +---
 1 file changed, 31 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 714c3480c52d..8794d32c2d9e 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -3263,32 +3263,40 @@ static inline int do_seccomp(struct pt_regs *regs) { 
return 0; }
  */
 long do_syscall_trace_enter(struct pt_regs *regs)
 {
+   u32 cached_flags;
+
user_exit();
 
-   if (test_thread_flag(TIF_SYSCALL_EMU)) {
-   /*
-* A nonzero return code from tracehook_report_syscall_entry()
-* tells us to prevent the syscall execution, but we are not
-* going to execute it anyway.
-*
-* Returning -1 will skip the syscall execution. We want to
-* avoid clobbering any register also, thus, not 'gotoing'
-* skip label.
-*/
-   if (tracehook_report_syscall_entry(regs))
-   ;
-   return -1;
-   }
+   cached_flags = READ_ONCE(current_thread_info()->flags) &
+  (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
 
-   /*
-* The tracer may decide to abort the syscall, if so tracehook
-* will return !0. Note that the tracer may also just change
-* regs->gpr[0] to an invalid syscall number, that is handled
-* below on the exit path.
-*/
-   if (test_thread_flag(TIF_SYSCALL_TRACE) &&
-   tracehook_report_syscall_entry(regs))
-   goto skip;
+   if (cached_flags) {
+   int rc = tracehook_report_syscall_entry(regs);
+
+   if (unlikely(cached_flags & _TIF_SYSCALL_EMU)) {
+   /*
+* A nonzero return code from
+* tracehook_report_syscall_entry() tells us
+* to prevent the syscall execution, but
+* we are not going to execute it anyway.
+*
+* Returning -1 will skip the syscall execution.
+* We want to avoid clobbering any register also,
+* thus, not 'gotoing' skip label.
+*/
+   return -1;
+   }
+
+   if (rc) {
+   /*
+* The tracer decided to abort the syscall.
+* Note that the tracer may also just change
+* regs->gpr[0] to an invalid syscall number,
+* that is handled below on the exit path.
+*/
+   goto skip;
+   }
+   }
 
/* Run seccomp after ptrace; allow it to set gpr[3]. */
if (do_seccomp(regs))
-- 
ldv


Re: [PATCH 3/8] crypto4xx_core: don't abuse __dma_sync_page

2018-12-16 Thread Christian Lamparter
On Sunday, December 16, 2018 6:19:46 PM CET Christoph Hellwig wrote:
> This function is internal to the DMA API implementation.  Instead use the
> DMA API to properly unmap.  Note that the DMA API usage in this driver
> is a disaster and urgently needs some work - it is missing all the unmaps,
> seems to do a secondary map where it looks like it should to a unmap
> in one place to work around cache coherency and the directions passed in
> seem to be partially wrong.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  drivers/crypto/amcc/crypto4xx_core.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/crypto/amcc/crypto4xx_core.c 
> b/drivers/crypto/amcc/crypto4xx_core.c
> index 6eaec9ba0f68..63cb6956c948 100644
> --- a/drivers/crypto/amcc/crypto4xx_core.c
> +++ b/drivers/crypto/amcc/crypto4xx_core.c
> @@ -596,7 +596,7 @@ static void crypto4xx_aead_done(struct crypto4xx_device 
> *dev,
> pd->pd_ctl_len.bf.pkt_len,
> dst);
>   } else {
> - __dma_sync_page(sg_page(dst), dst->offset, dst->length,
> + dma_unmap_page(dev->core_dev->device, pd->dest, dst->length,
>   DMA_FROM_DEVICE);
>   }
>  
> 
Yeap, the crypto4xx is a real piece of work. However, ibm emac network driver
is even worse:
|/*
| * Lack of dma_unmap_ calls is intentional.
| *
| * API-correct usage requires additional support state information to be
| * maintained for every RX and TX buffer descriptor (BD). Unfortunately, due to
| * EMAC design (e.g. TX buffer passed from network stack can be split into
| * several BDs, dma_map_single/dma_map_page can be used to map particular BD),
| * maintaining such information will add additional overhead.
| * Current DMA API implementation for 4xx processors only ensures cache 
coherency
| * and dma_unmap_ routines are empty and are likely to stay this way.
| * I decided to omit dma_unmap_??? calls because I don't want to add additional
| * complexity just for the sake of following some abstract API, when it doesn't
| * add any real benefit to the driver. I understand that this decision maybe
| * controversial, but I really tried to make code API-correct and efficient
| * at the same time and didn't come up with code I liked :(.
--ebs
| */


Problem is, I can't really enable the DMA_DEBUG because every PPC4XX/APM82181
device I have is some sort of network appliance. So a proper test would require
to fix the emac driver first since DMA_DEBUG will disable itself.

Regards,
Christian

PS: Since it looks like you are located in Germany as well:
If you are interested (PM me), I could just mail you a "MyBook Live" 
NAS Kit (PCB, Shell, Screws). All you would need: a SATA 3,5" HDD and
a standard 12V PSU with a 5.5mm plug. I maintain a active OpenWrt port
for the device: 
http://downloads.openwrt.org/snapshots/targets/apm821xx/sata/




Re: [PATCH v1 6/9] arm64: kexec: no need to ClearPageReserved()

2018-12-16 Thread Matthias Brugger



On 14/12/2018 12:10, David Hildenbrand wrote:
> This will be done by free_reserved_page().
> 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Cc: Bhupesh Sharma 
> Cc: James Morse 
> Cc: Marc Zyngier 
> Cc: Dave Kleikamp 
> Cc: Mark Rutland 
> Cc: Andrew Morton 
> Cc: Michal Hocko 
> Cc: Matthew Wilcox 
> Acked-by: James Morse 
> Signed-off-by: David Hildenbrand 

Reviewed-by: Matthias Brugger 

> ---
>  arch/arm64/kernel/machine_kexec.c | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/arch/arm64/kernel/machine_kexec.c 
> b/arch/arm64/kernel/machine_kexec.c
> index aa9c94113700..6f0587b5e941 100644
> --- a/arch/arm64/kernel/machine_kexec.c
> +++ b/arch/arm64/kernel/machine_kexec.c
> @@ -361,7 +361,6 @@ void crash_free_reserved_phys_range(unsigned long begin, 
> unsigned long end)
>  
>   for (addr = begin; addr < end; addr += PAGE_SIZE) {
>   page = phys_to_page(addr);
> - ClearPageReserved(page);
>   free_reserved_page(page);
>   }
>  }
> 


[PATCH] ocxl: Fix endiannes bug in ocxl_link_update_pe()

2018-12-16 Thread Greg Kurz
All fields in the PE are big-endian. Use cpu_to_be32() like everywhere
else something is written to the PE. Otherwise a wrong TID will be used
by the NPU. If this TID happens to point to an existing thread sharing
the same mm, it could be woken up by error. This is highly improbable
though. The likely outcome of this is the NPU not finding the target
thread and forcing the AFU into sending an interrupt, which userspace
is supposed to handle anyway.

Fixes: e948e06fc63a ("ocxl: Expose the thread_id needed for wait on POWER9")
Cc: sta...@vger.kernel.org  # v4.18
Signed-off-by: Greg Kurz 
---

This bug remained unnoticed so far because the current OCXL test suite
happens to call OCXL_IOCTL_ENABLE_P9_WAIT before attaching a context.
This causes ocxl_link_update_pe() to be called before ocxl_link_add_pe()
which re-writes the TID in the PE with the appropriate endianness.

I have some patches that change the behavior of the OCXL test suite so that
it can catch the issue:

https://github.com/gkurz/libocxl/commits/wake-host-thread-rework
---
 drivers/misc/ocxl/link.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
index 31695a078485..646d16450066 100644
--- a/drivers/misc/ocxl/link.c
+++ b/drivers/misc/ocxl/link.c
@@ -566,7 +566,7 @@ int ocxl_link_update_pe(void *link_handle, int pasid, __u16 
tid)
 
mutex_lock(&spa->spa_lock);
 
-   pe->tid = tid;
+   pe->tid = cpu_to_be32(tid);
 
/*
 * The barrier makes sure the PE is updated



Re: [PATCH 8/8] cxl: drop the dma_set_mask callback from vphb

2018-12-16 Thread Andrew Donnellan

On 17/12/18 4:19 am, Christoph Hellwig wrote:

The CXL code never even looks at the dma mask, so there is no good
reason for this sanity check.  Remove it because it gets in the way
of the dma ops refactoring.

Signed-off-by: Christoph Hellwig 


Acked-by: Andrew Donnellan 


---
  drivers/misc/cxl/vphb.c | 12 
  1 file changed, 12 deletions(-)

diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
index 7908633d9204..49da2f744bbf 100644
--- a/drivers/misc/cxl/vphb.c
+++ b/drivers/misc/cxl/vphb.c
@@ -11,17 +11,6 @@
  #include 
  #include "cxl.h"
  
-static int cxl_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)

-{
-   if (dma_mask < DMA_BIT_MASK(64)) {
-   pr_info("%s only 64bit DMA supported on CXL", __func__);
-   return -EIO;
-   }
-
-   *(pdev->dev.dma_mask) = dma_mask;
-   return 0;
-}
-
  static int cxl_pci_probe_mode(struct pci_bus *bus)
  {
return PCI_PROBE_NORMAL;
@@ -220,7 +209,6 @@ static struct pci_controller_ops cxl_pci_controller_ops =
.reset_secondary_bus = cxl_pci_reset_secondary_bus,
.setup_msi_irqs = cxl_setup_msi_irqs,
.teardown_msi_irqs = cxl_teardown_msi_irqs,
-   .dma_set_mask = cxl_dma_set_mask,
  };
  
  int cxl_pci_vphb_add(struct cxl_afu *afu)




--
Andrew Donnellan  OzLabs, ADL Canberra
andrew.donnel...@au1.ibm.com  IBM Australia Limited



Re: [PATCH] kernel/dma/direct: Do not include SME mask in the DMA supported check

2018-12-16 Thread Lendacky, Thomas
On 12/15/2018 04:55 AM, Christoph Hellwig wrote:
> The mail seems to be so oddly encoded so that git-am fails on it.  Can
> you resend as plain text?

Hmmm... not sure what happened with that, but yeah, looking at the message
source shows something strange went on. Let me take a look and I'll try to
get a good version to you tommorow (Monday).

Thanks,
Tom

> 


Re: [PATCH] ocxl: Fix endiannes bug in ocxl_link_update_pe()

2018-12-16 Thread Andrew Donnellan

On 17/12/18 8:28 am, Greg Kurz wrote:

All fields in the PE are big-endian. Use cpu_to_be32() like everywhere
else something is written to the PE. Otherwise a wrong TID will be used
by the NPU. If this TID happens to point to an existing thread sharing
the same mm, it could be woken up by error. This is highly improbable
though. The likely outcome of this is the NPU not finding the target
thread and forcing the AFU into sending an interrupt, which userspace
is supposed to handle anyway.

Fixes: e948e06fc63a ("ocxl: Expose the thread_id needed for wait on POWER9")
Cc: sta...@vger.kernel.org  # v4.18
Signed-off-by: Greg Kurz 


If only we read our sparse warnings which would have told us this. One 
warning down :)


Thanks for finding this!

Acked-by: Andrew Donnellan 


---

This bug remained unnoticed so far because the current OCXL test suite
happens to call OCXL_IOCTL_ENABLE_P9_WAIT before attaching a context.
This causes ocxl_link_update_pe() to be called before ocxl_link_add_pe()
which re-writes the TID in the PE with the appropriate endianness.

I have some patches that change the behavior of the OCXL test suite so that
it can catch the issue:

https://github.com/gkurz/libocxl/commits/wake-host-thread-rework
---
  drivers/misc/ocxl/link.c |2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
index 31695a078485..646d16450066 100644
--- a/drivers/misc/ocxl/link.c
+++ b/drivers/misc/ocxl/link.c
@@ -566,7 +566,7 @@ int ocxl_link_update_pe(void *link_handle, int pasid, __u16 
tid)
  
  	mutex_lock(&spa->spa_lock);
  
-	pe->tid = tid;

+   pe->tid = cpu_to_be32(tid);
  
  	/*

 * The barrier makes sure the PE is updated




--
Andrew Donnellan  OzLabs, ADL Canberra
andrew.donnel...@au1.ibm.com  IBM Australia Limited



Re: [PATCH] ocxl: Fix endiannes bug in ocxl_link_update_pe()

2018-12-16 Thread Alastair D'Silva
On Sun, 2018-12-16 at 22:28 +0100, Greg Kurz wrote:
> All fields in the PE are big-endian. Use cpu_to_be32() like
> everywhere
> else something is written to the PE. Otherwise a wrong TID will be
> used
> by the NPU. If this TID happens to point to an existing thread
> sharing
> the same mm, it could be woken up by error. This is highly improbable
> though. The likely outcome of this is the NPU not finding the target
> thread and forcing the AFU into sending an interrupt, which userspace
> is supposed to handle anyway.
> 
> Fixes: e948e06fc63a ("ocxl: Expose the thread_id needed for wait on
> POWER9")
> Cc: sta...@vger.kernel.org  # v4.18
> Signed-off-by: Greg Kurz 
> ---
> 
> This bug remained unnoticed so far because the current OCXL test
> suite
> happens to call OCXL_IOCTL_ENABLE_P9_WAIT before attaching a context.
> This causes ocxl_link_update_pe() to be called before
> ocxl_link_add_pe()
> which re-writes the TID in the PE with the appropriate endianness.
> 
> I have some patches that change the behavior of the OCXL test suite
> so that
> it can catch the issue:
> 
> https://github.com/gkurz/libocxl/commits/wake-host-thread-rework
> ---
>  drivers/misc/ocxl/link.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
> index 31695a078485..646d16450066 100644
> --- a/drivers/misc/ocxl/link.c
> +++ b/drivers/misc/ocxl/link.c
> @@ -566,7 +566,7 @@ int ocxl_link_update_pe(void *link_handle, int
> pasid, __u16 tid)
>  
>   mutex_lock(&spa->spa_lock);
>  
> - pe->tid = tid;
> + pe->tid = cpu_to_be32(tid);
>  
>   /*
>* The barrier makes sure the PE is updated
> 

Good catch, thanks.

Reviewed-by: Alastair D'Silva 

-- 
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819



Re: use generic DMA mapping code in powerpc V4

2018-12-16 Thread Michael Ellerman
Christoph Hellwig  writes:

> FYI, given that we are one week before the expected 4.20 release
> date and I haven't found the bug plaging Christians setups I think
> we need to defer most of this to the next merge window.

OK, sorry I couldn't help. I tried powering up my pasemi board last week
but it just gives me a couple of status leds and nothing else, the fan
never spins up.

> I'd still like to get a few bits in earlier, which I will send out
> separately now.

OK.

cheers


Re: [PATCH v1 03/13] powerpc/mm/32s: rework mmu_mapin_ram()

2018-12-16 Thread Jonathan Neuschäfer
Hi, thanks for your reply.

On Thu, Dec 13, 2018 at 03:51:32PM +0100, Christophe Leroy wrote:
> Hi Again,
> 
> Le 13/12/2018 à 13:16, Christophe Leroy a écrit :
[...]
> > Can you tell/provide the .config and dts used ?

I'm using wii.dts and almost the wii_defconfig from my tree (save-
defconfig result is attached), which is 4.20-rc5 plus a few patches:

  https://github.com/neuschaefer/linux wii-4.20-rc5(w/o your patches)
  https://github.com/neuschaefer/linux wii-4.20-rc5-ppcbat (w/ your patches 1-3)

> > You seem to have 319MB RAM wherease arch/powerpc/boot/dts/wii.dts only
> > has 88MB Memory:
> > 
> >  memory {
> >      device_type = "memory";
> >      reg = <0x 0x0180    /* MEM1 24MB 1T-SRAM */
> >     0x1000 0x0400>;    /* MEM2 64MB GDDR3 */
> >  };

This is, I think, because something marks all the address space from 0
to the end of MEM2 as RAM, and then cuts out a hole in the middle. I'm
not sure about the exact mechanism.

Unfortunately this hole has to be treated carefully because it contains
MMIO devices.

> Putting the same description in my mpc832x board DTS and doing a few hacks
> to get the WII functions called, I get the following:
> 
> [0.00] Top of RAM: 0x1400, Total RAM: 0x580
> [0.00] Memory hole size: 232MB
> [0.00] Zone ranges:
> [0.00]   DMA  [mem 0x-0x13ff]
> [0.00]   Normal   empty
> [0.00] Movable zone start for each node
> [0.00] Early memory node ranges
> [0.00]   node   0: [mem 0x-0x017f]
> [0.00]   node   0: [mem 0x1000-0x13ff]
> [0.00] Initmem setup node 0 [mem
> 0x-0x13ff]
> [0.00] On node 0 totalpages: 22528
> [0.00]   DMA zone: 640 pages used for memmap
> [0.00]   DMA zone: 0 pages reserved
> [0.00]   DMA zone: 22528 pages, LIFO batch:3
> [0.00] pcpu-alloc: s0 r0 d32768 u32768 alloc=1*32768
> [0.00] pcpu-alloc: [0] 0
> [0.00] Built 1 zonelists, mobility grouping on.  Total pages: 21888
> [0.00] Kernel command line: loglevel=7
> ip=192.168.2.5:192.168.2.2::255.0
> [0.00] Dentry cache hash table entries: 16384 (order: 4, 65536
> bytes)
> [0.00] Inode-cache hash table entries: 8192 (order: 3, 32768 bytes)
> [0.00] Memory: 77060K/90112K available (6548K kernel code, 1156K
> rwdata,
> [0.00] Kernel virtual memory layout:
> [0.00]   * 0xfffdf000..0xf000  : fixmap
> [0.00]   * 0xfdffd000..0xfe00  : early ioremap
> [0.00]   * 0xd500..0xfdffd000  : vmalloc & ioremap
> 
> 
> 
> 
> root@vgoippro:~# cat /sys/kernel/debug/powerpc/block_address_translation
> ---[ Instruction Block Address Translation ]---
> 0: 0xc000-0xc0ff 0x Kernel EXEC coherent
> 1: -
> 2: 0xc100-0xc17f 0x0100 Kernel EXEC coherent
> 3: -
> 4: 0xd000-0xd3ff 0x1000 Kernel EXEC coherent
> 5: -
> 6: -
> 7: -
> 
> ---[ Data Block Address Translation ]---
> 0: 0xc000-0xc0ff 0x Kernel RW coherent
> 1: 0xfffe-0x 0x0d00 Kernel RW no cache guarded
> 2: 0xc100-0xc17f 0x0100 Kernel RW coherent
> 3: -
> 4: 0xd000-0xd3ff 0x1000 Kernel RW coherent
> 5: -
> 6: -
> 7: -
> 
> 
> Could you please provide the dmesg and
> /sys/kernel/debug/powerpc/block_address_translation from before this patch,
> so that we can compare and identify the differences if any ?

After applying the patch that adds this debugfs file and enabling
CONFIG_PPC_PTDUMP, I get this:

# cat /sys/kernel/debug/powerpc/block_address_translation
---[ Instruction Block Address Translation ]---
0: -
1: -
2: 0xc000-0xc0ff 0x Kernel EXEC
3: 0xc100-0xc17f 0x0100 Kernel EXEC
4: 0xd000-0xd1ff 0x1000 Kernel EXEC
5: -
6: -
7: -

---[ Data Block Address Translation ]---
0: -
1: 0xfffe-0x 0x0d00 Kernel RW no cache guarded
2: 0xc000-0xc0ff 0x Kernel RW
3: 0xc100-0xc17f 0x0100 Kernel RW
4: 0xd000-0xd1ff 0x1000 Kernel RW
5: -
6: -
7: -

dmesg is attached.


I added some tracing to the setbat function:

diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index f6f575bae3bc..4da3dc54fe46 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -120,6 +120,9 @@ void __init setbat(int index, unsigned long virt, 
phys_addr_t phys,
struct ppc_bat *bat = BATS[index];
unsigned long flags = pgprot_val(prot);
 
+   pr_info("setbat(%u, %px, %px, %px, %lx)\n",
+

Re: [PATCH 2/8] powerpc/dma: properly wire up the unmap_page and unmap_sg methods

2018-12-16 Thread Christophe Leroy




Le 16/12/2018 à 18:19, Christoph Hellwig a écrit :

The unmap methods need to transfer memory ownership back from the device
to the cpu by identical means as dma_sync_*_to_cpu.  I'm not sure powerpc
needs to do any work in this transfer direction, but given that it does
invalidate the caches in dma_sync_*_to_cpu already we should make sure
we also do so on unmapping.


Why do we have to do that on unmapping ? If we are unmapping it means we 
are retiring the area, so why would we need to use CPU for syncing an 
area than won't be used anymore ?


Christophe




Signed-off-by: Christoph Hellwig 
---
  arch/powerpc/kernel/dma.c | 9 -
  1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index dbfc7056d7df..d442d23e182b 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -210,10 +210,15 @@ static int dma_nommu_map_sg(struct device *dev, struct 
scatterlist *sgl,
return nents;
  }
  
-static void dma_nommu_unmap_sg(struct device *dev, struct scatterlist *sg,

+static void dma_nommu_unmap_sg(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction direction,
unsigned long attrs)
  {
+   struct scatterlist *sg;
+   int i;
+
+   for_each_sg(sgl, sg, nents, i)
+   __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
  }
  
  static u64 dma_nommu_get_required_mask(struct device *dev)

@@ -247,6 +252,8 @@ static inline void dma_nommu_unmap_page(struct device *dev,
 enum dma_data_direction direction,
 unsigned long attrs)
  {
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   __dma_sync(bus_to_virt(dma_address), size, dir);
  }
  
  #ifdef CONFIG_NOT_COHERENT_CACHE




Re: [PATCH 7/8] powerpc/dma: split the two __dma_alloc_coherent implementations

2018-12-16 Thread Christophe Leroy




Le 16/12/2018 à 18:19, Christoph Hellwig a écrit :

The implemementation for the CONFIG_NOT_COHERENT_CACHE case doesn't share
any code with the one for systems with coherent caches.  Split it off
and merge it with the helpers in dma-noncoherent.c that have no other
callers.

Signed-off-by: Christoph Hellwig 
Acked-by: Benjamin Herrenschmidt 
---
  arch/powerpc/include/asm/dma-mapping.h |  5 -
  arch/powerpc/kernel/dma.c  | 14 ++


Instead of all the ifdefs in dma.c, couldn't we split it
in two files, ie dma.c for common parts and dma-coherence.c for specific 
stuff ?


Christophe


  arch/powerpc/mm/dma-noncoherent.c  | 15 +++
  arch/powerpc/platforms/44x/warp.c  |  2 +-
  4 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/dma-mapping.h 
b/arch/powerpc/include/asm/dma-mapping.h
index f2a4a7142b1e..dacd0f93f2b2 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -39,9 +39,6 @@ extern int dma_nommu_mmap_coherent(struct device *dev,
   * to ensure it is consistent.
   */
  struct device;
-extern void *__dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *handle, gfp_t gfp);
-extern void __dma_free_coherent(size_t size, void *vaddr);
  extern void __dma_sync(void *vaddr, size_t size, int direction);
  extern void __dma_sync_page(struct page *page, unsigned long offset,
 size_t size, int direction);
@@ -52,8 +49,6 @@ extern unsigned long __dma_get_coherent_pfn(unsigned long 
cpu_addr);
   * Cache coherent cores.
   */
  
-#define __dma_alloc_coherent(dev, gfp, size, handle)	NULL

-#define __dma_free_coherent(size, addr)((void)0)
  #define __dma_sync(addr, size, rw)((void)0)
  #define __dma_sync_page(pg, off, sz, rw)  ((void)0)
  
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c

index d442d23e182b..270b2911c437 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -62,18 +62,12 @@ static int dma_nommu_dma_supported(struct device *dev, u64 
mask)
  #endif
  }
  
+#ifndef CONFIG_NOT_COHERENT_CACHE

  void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
  dma_addr_t *dma_handle, gfp_t flag,
  unsigned long attrs)
  {
void *ret;
-#ifdef CONFIG_NOT_COHERENT_CACHE
-   ret = __dma_alloc_coherent(dev, size, dma_handle, flag);
-   if (ret == NULL)
-   return NULL;
-   *dma_handle += get_dma_offset(dev);
-   return ret;
-#else
struct page *page;
int node = dev_to_node(dev);
  #ifdef CONFIG_FSL_SOC
@@ -113,19 +107,15 @@ void *__dma_nommu_alloc_coherent(struct device *dev, 
size_t size,
*dma_handle = __pa(ret) + get_dma_offset(dev);
  
  	return ret;

-#endif
  }
  
  void __dma_nommu_free_coherent(struct device *dev, size_t size,

void *vaddr, dma_addr_t dma_handle,
unsigned long attrs)
  {
-#ifdef CONFIG_NOT_COHERENT_CACHE
-   __dma_free_coherent(size, vaddr);
-#else
free_pages((unsigned long)vaddr, get_order(size));
-#endif
  }
+#endif /* !CONFIG_NOT_COHERENT_CACHE */
  
  static void *dma_nommu_alloc_coherent(struct device *dev, size_t size,

   dma_addr_t *dma_handle, gfp_t flag,
diff --git a/arch/powerpc/mm/dma-noncoherent.c 
b/arch/powerpc/mm/dma-noncoherent.c
index b6e7b5952ab5..e955539686a4 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -29,7 +29,7 @@
  #include 
  #include 
  #include 
-#include 
+#include 
  #include 
  
  #include 

@@ -151,8 +151,8 @@ static struct ppc_vm_region *ppc_vm_region_find(struct 
ppc_vm_region *head, unsi
   * Allocate DMA-coherent memory space and return both the kernel remapped
   * virtual and bus address for that space.
   */
-void *
-__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, 
gfp_t gfp)
+void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
+   dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
  {
struct page *page;
struct ppc_vm_region *c;
@@ -223,7 +223,7 @@ __dma_alloc_coherent(struct device *dev, size_t size, 
dma_addr_t *handle, gfp_t
/*
 * Set the "dma handle"
 */
-   *handle = page_to_phys(page);
+   *dma_handle = phys_to_dma(dev, page_to_phys(page));
  
  		do {

SetPageReserved(page);
@@ -249,12 +249,12 @@ __dma_alloc_coherent(struct device *dev, size_t size, 
dma_addr_t *handle, gfp_t
   no_page:
return NULL;
  }
-EXPORT_SYMBOL(__dma_alloc_coherent);
  
  /*

   * free a page as defined by the above mapping.
   */
-void __dma_free_coherent(size_t size, void *vaddr)
+void __dma_nommu_free_coherent(struct device *dev, size_t

Re: [PATCH v2 2/3] powerpc/lib: Refactor __patch_instruction() to use __put_user_asm()

2018-12-16 Thread Christophe Leroy

Hi Russel,

Le 10/12/2018 à 08:00, Russell Currey a écrit :

__patch_instruction() is called in early boot, and uses
__put_user_size(), which includes the locks and unlocks for KUAP,
which could either be called too early, or in the Radix case, forced to
use "early_" versions of functions just to safely handle this one case.


Looking at x86, I see that __put_user_size() doesn't includes the locks. 
The lock/unlock is do by callers. I'll do the same.





__put_user_asm() does not do this, and thus is safe to use both in early
boot, and later on since in this case it should only ever be touching
kernel memory.

__patch_instruction() was previously refactored to use __put_user_size()
in order to be able to return -EFAULT, which would allow the kernel to
patch instructions in userspace, which should never happen.  This has
the functional change of causing faults on userspace addresses if KUAP
is turned on, which should never happen in practice.

A future enhancement could be to double check the patch address is
definitely allowed to be tampered with by the kernel.


This makes me realise that we are calling lock_user_access() with kernel 
addresses. That most likely breaks protection on kernel addresses for 
book3s/32. I'll have to work around it.


Another thing I realised also is that get_user() at least is called in 
some exceptions/trap handlers. Which means it can be called nested with 
an ongoing user access. It means that get_paca()->user_access_allowed 
might be modified during those exceptions/traps.


Christophe



Signed-off-by: Russell Currey 
---
  arch/powerpc/lib/code-patching.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 89502cbccb1b..15e8c6339960 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -26,9 +26,9 @@
  static int __patch_instruction(unsigned int *exec_addr, unsigned int instr,
   unsigned int *patch_addr)
  {
-   int err;
+   int err = 0;
  
-	__put_user_size(instr, patch_addr, 4, err);

+   __put_user_asm(instr, patch_addr, err, "stw");
if (err)
return err;
  



Re: [PATCH 3/8] crypto4xx_core: don't abuse __dma_sync_page

2018-12-16 Thread Christoph Hellwig
On Sun, Dec 16, 2018 at 07:28:32PM +0100, Christian Lamparter wrote:
> Yeap, the crypto4xx is a real piece of work. However, ibm emac network driver
> is even worse:

Oh, fun.


Re: [PATCH 2/8] powerpc/dma: properly wire up the unmap_page and unmap_sg methods

2018-12-16 Thread Christoph Hellwig
On Mon, Dec 17, 2018 at 07:41:54AM +0100, Christophe Leroy wrote:
>
>
> Le 16/12/2018 à 18:19, Christoph Hellwig a écrit :
>> The unmap methods need to transfer memory ownership back from the device
>> to the cpu by identical means as dma_sync_*_to_cpu.  I'm not sure powerpc
>> needs to do any work in this transfer direction, but given that it does
>> invalidate the caches in dma_sync_*_to_cpu already we should make sure
>> we also do so on unmapping.
>
> Why do we have to do that on unmapping ? If we are unmapping it means we 
> are retiring the area, so why would we need to use CPU for syncing an area 
> than won't be used anymore ?

So in general we need the cache maintaince only at map time if the CPUs
gurantee to never ѕpeculate into memory that might be under DMA, which
for modern CPUs is increasingly rate.  If the CPUs could speculate into
the area that was DMA mapped we need to do another round of cache
maintainance on unmap to make sure we really do not have any data
in the cache.  powerpc currently does that for dma_sync_*_to_cpu, but
not for the unmap case, which not only looks odd but also seems to be
worked around in drivers (see the ppc44x crypto patch).

Note that there are some way to optimize the amount of cache flushing
done even when the cpu speculates, see:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arc/mm/dma.c#n93

But for that I need help with people that actually understand the
non-coherent powerpc SOCs and who can test it.  For now this patch
is conservative.


Re: [PATCH 7/8] powerpc/dma: split the two __dma_alloc_coherent implementations

2018-12-16 Thread Christoph Hellwig
On Mon, Dec 17, 2018 at 07:51:05AM +0100, Christophe Leroy wrote:
>
>
> Le 16/12/2018 à 18:19, Christoph Hellwig a écrit :
>> The implemementation for the CONFIG_NOT_COHERENT_CACHE case doesn't share
>> any code with the one for systems with coherent caches.  Split it off
>> and merge it with the helpers in dma-noncoherent.c that have no other
>> callers.
>>
>> Signed-off-by: Christoph Hellwig 
>> Acked-by: Benjamin Herrenschmidt 
>> ---
>>   arch/powerpc/include/asm/dma-mapping.h |  5 -
>>   arch/powerpc/kernel/dma.c  | 14 ++
>
> Instead of all the ifdefs in dma.c, couldn't we split it
> in two files, ie dma.c for common parts and dma-coherence.c for specific 
> stuff ?

The end goal is to kill dma.c and keep dma-noncoherent.c only with most
of the code moving to common code.  Here is the current state of that:

http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/powerpc-dma.5

But it still has issues on two tested platforms and isn't ready yet.


Re: [PATCH 2/8] powerpc/dma: properly wire up the unmap_page and unmap_sg methods

2018-12-16 Thread Christophe Leroy




Le 17/12/2018 à 08:34, Christoph Hellwig a écrit :

On Mon, Dec 17, 2018 at 07:41:54AM +0100, Christophe Leroy wrote:



Le 16/12/2018 à 18:19, Christoph Hellwig a écrit :

The unmap methods need to transfer memory ownership back from the device
to the cpu by identical means as dma_sync_*_to_cpu.  I'm not sure powerpc
needs to do any work in this transfer direction, but given that it does
invalidate the caches in dma_sync_*_to_cpu already we should make sure
we also do so on unmapping.


Why do we have to do that on unmapping ? If we are unmapping it means we
are retiring the area, so why would we need to use CPU for syncing an area
than won't be used anymore ?


So in general we need the cache maintaince only at map time if the CPUs
gurantee to never ѕpeculate into memory that might be under DMA, which
for modern CPUs is increasingly rate.  If the CPUs could speculate into
the area that was DMA mapped we need to do another round of cache
maintainance on unmap to make sure we really do not have any data
in the cache.  powerpc currently does that for dma_sync_*_to_cpu, but
not for the unmap case, which not only looks odd but also seems to be
worked around in drivers (see the ppc44x crypto patch).

Note that there are some way to optimize the amount of cache flushing
done even when the cpu speculates, see:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arc/mm/dma.c#n93

But for that I need help with people that actually understand the
non-coherent powerpc SOCs and who can test it.  For now this patch
is conservative.



I can help you with powerpc 8xx actually.

Christophe


[PATCH v2 0/2] of: phandle_cache, fix refcounts, remove stale entry

2018-12-16 Thread frowand . list
From: Frank Rowand 

Non-overlay dynamic devicetree node removal may leave the node in
the phandle cache.  Subsequent calls to of_find_node_by_phandle()
will incorrectly find the stale entry.  This bug exposed the foloowing
phandle cache refcount bug.

The refcount of phandle_cache entries is not incremented while in
the cache, allowing use after free error after kfree() of the
cached entry.

Changes since v1:
  - make __of_free_phandle_cache() static
  - add WARN_ON(1) for unexpected condition in of_find_node_by_phandle()
  
Frank Rowand (2):
  of: of_node_get()/of_node_put() nodes held in phandle cache
  of: __of_detach_node() - remove node from phandle cache

 drivers/of/base.c   | 100 
 drivers/of/dynamic.c|   3 ++
 drivers/of/of_private.h |   4 ++
 3 files changed, 82 insertions(+), 25 deletions(-)

-- 
Frank Rowand 



[PATCH v2 1/2] of: of_node_get()/of_node_put() nodes held in phandle cache

2018-12-16 Thread frowand . list
From: Frank Rowand 

The phandle cache contains struct device_node pointers.  The refcount
of the pointers was not incremented while in the cache, allowing use
after free error after kfree() of the node.  Add the proper increment
and decrement of the use count.

Fixes: 0b3ce78e90fc ("of: cache phandle nodes to reduce cost of 
of_find_node_by_phandle()")

Signed-off-by: Frank Rowand 
---

changes since v1
  - make __of_free_phandle_cache() static

 drivers/of/base.c | 70 ---
 1 file changed, 46 insertions(+), 24 deletions(-)

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 09692c9b32a7..6c33d63361b8 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -116,9 +116,6 @@ int __weak of_node_to_nid(struct device_node *np)
 }
 #endif
 
-static struct device_node **phandle_cache;
-static u32 phandle_cache_mask;
-
 /*
  * Assumptions behind phandle_cache implementation:
  *   - phandle property values are in a contiguous range of 1..n
@@ -127,6 +124,44 @@ int __weak of_node_to_nid(struct device_node *np)
  *   - the phandle lookup overhead reduction provided by the cache
  * will likely be less
  */
+
+static struct device_node **phandle_cache;
+static u32 phandle_cache_mask;
+
+/*
+ * Caller must hold devtree_lock.
+ */
+static void __of_free_phandle_cache(void)
+{
+   u32 cache_entries = phandle_cache_mask + 1;
+   u32 k;
+
+   if (!phandle_cache)
+   return;
+
+   for (k = 0; k < cache_entries; k++)
+   of_node_put(phandle_cache[k]);
+
+   kfree(phandle_cache);
+   phandle_cache = NULL;
+}
+
+int of_free_phandle_cache(void)
+{
+   unsigned long flags;
+
+   raw_spin_lock_irqsave(&devtree_lock, flags);
+
+   __of_free_phandle_cache();
+
+   raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+   return 0;
+}
+#if !defined(CONFIG_MODULES)
+late_initcall_sync(of_free_phandle_cache);
+#endif
+
 void of_populate_phandle_cache(void)
 {
unsigned long flags;
@@ -136,8 +171,7 @@ void of_populate_phandle_cache(void)
 
raw_spin_lock_irqsave(&devtree_lock, flags);
 
-   kfree(phandle_cache);
-   phandle_cache = NULL;
+   __of_free_phandle_cache();
 
for_each_of_allnodes(np)
if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL)
@@ -155,30 +189,15 @@ void of_populate_phandle_cache(void)
goto out;
 
for_each_of_allnodes(np)
-   if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL)
+   if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL) {
+   of_node_get(np);
phandle_cache[np->phandle & phandle_cache_mask] = np;
+   }
 
 out:
raw_spin_unlock_irqrestore(&devtree_lock, flags);
 }
 
-int of_free_phandle_cache(void)
-{
-   unsigned long flags;
-
-   raw_spin_lock_irqsave(&devtree_lock, flags);
-
-   kfree(phandle_cache);
-   phandle_cache = NULL;
-
-   raw_spin_unlock_irqrestore(&devtree_lock, flags);
-
-   return 0;
-}
-#if !defined(CONFIG_MODULES)
-late_initcall_sync(of_free_phandle_cache);
-#endif
-
 void __init of_core_init(void)
 {
struct device_node *np;
@@ -1195,8 +1214,11 @@ struct device_node *of_find_node_by_phandle(phandle 
handle)
if (!np) {
for_each_of_allnodes(np)
if (np->phandle == handle) {
-   if (phandle_cache)
+   if (phandle_cache) {
+   /* will put when removed from cache */
+   of_node_get(np);
phandle_cache[masked_handle] = np;
+   }
break;
}
}
-- 
Frank Rowand