On March 6, 2017 5:33:28 AM PST, Borislav Petkov <b...@suse.de> wrote: >On Mon, Mar 06, 2017 at 12:01:10AM -0700, Logan Gunthorpe wrote: >> Well honestly my issue was solved by fixing my kernel config. I have >no >> idea why I had optimize for size in there in the first place. > >I still think that we should address the iomem memcpy Linus mentioned. >So how about this partial revert. I've made 32-bit use the same special >__memcpy() version. > >Hmmm? > >--- >diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h >index 7afb0e2f07f4..9e378a10796d 100644 >--- a/arch/x86/include/asm/io.h >+++ b/arch/x86/include/asm/io.h >@@ -201,6 +201,7 @@ extern void set_iounmap_nonlazy(void); > #ifdef __KERNEL__ > > #include <asm-generic/iomap.h> >+#include <asm/string.h> > > /* > * Convert a virtual cached pointer to an uncached pointer >@@ -227,12 +228,13 @@ memset_io(volatile void __iomem *addr, unsigned >char val, size_t count) > * @src: The (I/O memory) source for the data > * @count: The number of bytes to copy > * >- * Copy a block of data from I/O memory. >+ * Copy a block of data from I/O memory. IO memory is different from >+ * cached memory so we use special memcpy version. > */ > static inline void >memcpy_fromio(void *dst, const volatile void __iomem *src, size_t >count) > { >- memcpy(dst, (const void __force *)src, count); >+ __inline_memcpy(dst, (const void __force *)src, count); > } > > /** >@@ -241,12 +243,13 @@ memcpy_fromio(void *dst, const volatile void >__iomem *src, size_t count) > * @src: The (RAM) source for the data > * @count: The number of bytes to copy > * >- * Copy a block of data to I/O memory. >+ * Copy a block of data to I/O memory. IO memory is different from >+ * cached memory so we use special memcpy version. > */ > static inline void > memcpy_toio(volatile void __iomem *dst, const void *src, size_t count) > { >- memcpy((void __force *)dst, src, count); >+ __inline_memcpy((void __force *)dst, src, count); > } > > /* >diff --git a/arch/x86/include/asm/string_32.h >b/arch/x86/include/asm/string_32.h >index 3d3e8353ee5c..556fa4a975ff 100644 >--- a/arch/x86/include/asm/string_32.h >+++ b/arch/x86/include/asm/string_32.h >@@ -29,6 +29,7 @@ extern char *strchr(const char *s, int c); > #define __HAVE_ARCH_STRLEN > extern size_t strlen(const char *s); > >+#define __inline_memcpy __memcpy >static __always_inline void *__memcpy(void *to, const void *from, >size_t n) > { > int d0, d1, d2;
It isn't really that straightforward IMO. For UC memory transaction size really needs to be specified explicitly at all times and should be part of the API, rather than implicit. For WC/WT/WB device memory, the ordinary memcpy is valid and preferred. -- Sent from my Android device with K-9 Mail. Please excuse my brevity.