Before we rework the "pmem api" to stop abusing __copy_user_nocache()
for memcpy_to_pmem() we need to fix cases where we may strand dirty data
in the cpu cache. The problem occurs when copy_from_iter_pmem() is used
for arbitrary data transfers from userspace. There is no guarantee that
these transfers, performed by dax_iomap_actor(), will have aligned
destinations or aligned transfer lengths. Backstop the usage
__copy_user_nocache() with explicit cache management in these unaligned
cases.

Yes, copy_from_iter_pmem() is now too big for an inline, but addressing
that is saved for a later patch that moves the entirety of the "pmem
api" into the pmem driver directly.

Fixes: 5de490daec8b ("pmem: add copy_from_iter_pmem() and clear_pmem()")
Cc: <sta...@vger.kernel.org>
Cc: <x...@kernel.org>
Cc: Jan Kara <j...@suse.cz>
Cc: Jeff Moyer <jmo...@redhat.com>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: Christoph Hellwig <h...@lst.de>
Cc: Toshi Kani <toshi.k...@hpe.com>
Cc: "H. Peter Anvin" <h...@zytor.com>
Cc: Al Viro <v...@zeniv.linux.org.uk>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Matthew Wilcox <mawil...@microsoft.com>
Cc: Ross Zwisler <ross.zwis...@linux.intel.com>
[toshi: trailing bytes flush only needed in the 4B misalign case]
Signed-off-by: Dan Williams <dan.j.willi...@intel.com>
---
v2: Change the condition for flushing the last cacheline of the
    destination from 8-byte to 4-byte misalignment (Toshi)

 arch/x86/include/asm/pmem.h |   41 ++++++++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 2c1ebeb4d737..cf4e68faedc4 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -55,7 +55,8 @@ static inline int arch_memcpy_from_pmem(void *dst, const void 
*src, size_t n)
  * @size:      number of bytes to write back
  *
  * Write back a cache range using the CLWB (cache line write back)
- * instruction.
+ * instruction. Note that @size is internally rounded up to be cache
+ * line size aligned.
  */
 static inline void arch_wb_cache_pmem(void *addr, size_t size)
 {
@@ -69,15 +70,6 @@ static inline void arch_wb_cache_pmem(void *addr, size_t 
size)
                clwb(p);
 }
 
-/*
- * copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec
- * iterators, so for other types (bvec & kvec) we must do a cache write-back.
- */
-static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
-{
-       return iter_is_iovec(i) == false;
-}
-
 /**
  * arch_copy_from_iter_pmem - copy data from an iterator to PMEM
  * @addr:      PMEM destination address
@@ -94,7 +86,34 @@ static inline size_t arch_copy_from_iter_pmem(void *addr, 
size_t bytes,
        /* TODO: skip the write-back by always using non-temporal stores */
        len = copy_from_iter_nocache(addr, bytes, i);
 
-       if (__iter_needs_pmem_wb(i))
+       /*
+        * In the iovec case on x86_64 copy_from_iter_nocache() uses
+        * non-temporal stores for the bulk of the transfer, but we need
+        * to manually flush if the transfer is unaligned. In the
+        * non-iovec case the entire destination needs to be flushed.
+        */
+       if (iter_is_iovec(i)) {
+               unsigned long dest = (unsigned long) addr;
+
+               /*
+                * If the destination is not 8-byte aligned then
+                * __copy_user_nocache (on x86_64) uses cached copies
+                */
+               if (dest & 8) {
+                       arch_wb_cache_pmem(addr, 1);
+                       dest = ALIGN(dest, 8);
+               }
+
+               /*
+                * If the remaining transfer length, after accounting
+                * for destination alignment, is not 4-byte aligned
+                * then __copy_user_nocache() falls back to cached
+                * copies for the trailing bytes in the final cacheline
+                * of the transfer.
+                */
+               if ((bytes - (dest - (unsigned long) addr)) & 4)
+                       arch_wb_cache_pmem(addr + bytes - 1, 1);
+       } else
                arch_wb_cache_pmem(addr, bytes);
 
        return len;

Reply via email to