Boaz Harrosh wrote on 2016-10-26: > On 10/26/2016 06:50 PM, Brian Boylston wrote: >> Introduce memcpy_nocache() as a memcpy() that avoids the processor cache >> if possible. Without arch-specific support, this defaults to just >> memcpy(). For now, include arch-specific support for x86. >> >> Cc: Ross Zwisler <ross.zwis...@linux.intel.com> >> Cc: Thomas Gleixner <t...@linutronix.de> >> Cc: Ingo Molnar <mi...@redhat.com> >> Cc: "H. Peter Anvin" <h...@zytor.com> >> Cc: <x...@kernel.org> >> Cc: Al Viro <v...@zeniv.linux.org.uk> >> Cc: Dan Williams <dan.j.willi...@intel.com> >> Signed-off-by: Brian Boylston <brian.boyls...@hpe.com> >> Reviewed-by: Toshi Kani <toshi.k...@hpe.com> >> Reported-by: Oliver Moreno <oliver.mor...@hpe.com> >> --- >> arch/x86/include/asm/string_32.h | 3 +++ >> arch/x86/include/asm/string_64.h | 3 +++ >> arch/x86/lib/misc.c | 12 ++++++++++++ >> include/linux/string.h | 15 +++++++++++++++ >> 4 files changed, 33 insertions(+) >> diff --git a/arch/x86/include/asm/string_32.h >> b/arch/x86/include/asm/string_32.h >> index 3d3e835..64f80c0 100644 >> --- a/arch/x86/include/asm/string_32.h >> +++ b/arch/x86/include/asm/string_32.h >> @@ -196,6 +196,9 @@ static inline void *__memcpy3d(void *to, const void >> *from, size_t len) >> >> #endif >> +#define __HAVE_ARCH_MEMCPY_NOCACHE >> +extern void *memcpy_nocache(void *dest, const void *src, size_t count); >> + >> #define __HAVE_ARCH_MEMMOVE >> void *memmove(void *dest, const void *src, size_t n); >> diff --git a/arch/x86/include/asm/string_64.h >> b/arch/x86/include/asm/string_64.h >> index 90dbbd9..a8fdd55 100644 >> --- a/arch/x86/include/asm/string_64.h >> +++ b/arch/x86/include/asm/string_64.h >> @@ -51,6 +51,9 @@ extern void *__memcpy(void *to, const void *from, size_t >> len); >> #define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len)) >> #endif >> +#define __HAVE_ARCH_MEMCPY_NOCACHE >> +extern void *memcpy_nocache(void *dest, const void *src, size_t count); >> + >> #define __HAVE_ARCH_MEMSET >> void *memset(void *s, int c, size_t n); >> void *__memset(void *s, int c, size_t n); >> diff --git a/arch/x86/lib/misc.c b/arch/x86/lib/misc.c >> index 76b373a..c993ab3 100644 >> --- a/arch/x86/lib/misc.c >> +++ b/arch/x86/lib/misc.c >> @@ -1,3 +1,6 @@ >> +#include <linux/export.h> >> +#include <linux/uaccess.h> >> + >> /* >> * Count the digits of @val including a possible sign. >> * >> @@ -19,3 +22,12 @@ int num_digits(int val) >> } >> return d; >> } >> + >> +#ifdef __HAVE_ARCH_MEMCPY_NOCACHE >> +void *memcpy_nocache(void *dest, const void *src, size_t count) >> +{ >> + __copy_from_user_inatomic_nocache(dest, src, count); >> + return dest; >> +} >> +EXPORT_SYMBOL(memcpy_nocache); >> +#endif >> diff --git a/include/linux/string.h b/include/linux/string.h >> index 26b6f6a..7f40c41 100644 >> --- a/include/linux/string.h >> +++ b/include/linux/string.h >> @@ -102,6 +102,21 @@ extern void * memset(void *,int,__kernel_size_t); >> #ifndef __HAVE_ARCH_MEMCPY >> extern void * memcpy(void *,const void *,__kernel_size_t); >> #endif >> + >> +#ifndef __HAVE_ARCH_MEMCPY_NOCACHE >> +/** >> + * memcpy_nocache - Copy one area of memory to another, avoiding the >> + * processor cache if possible >> + * @dest: Where to copy to >> + * @src: Where to copy from >> + * @count: The size of the area. >> + */ >> +static inline void *memcpy_nocache(void *dest, const void *src, size_t >> count) >> +{ >> + return memcpy(dest, src, count); >> +} > > What about memcpy_to_pmem() in linux/pmem.h it already has all the arch > switches. > > Feels bad to add yet just another arch switch over __copy_user_nocache > > Just feels like too many things that do the same thing. Sigh
I agree that this looks like a nicer path. I had considered adjusting copy_from_iter_nocache() to use memcpy_to_pmem(), but lib/iov_iter.c doesn't currently #include linux/pmem.h. Would it be acceptable to add it? Also, I wasn't sure if memcpy_to_pmem() would always mean exactly "memcpy nocache". I had also considered adjusting copy_from_iter_pmem() (also in linux/pmem.h) to just use memcpy_to_pmem() directly, but then it can't use the goodness that is the iterate_and_advance() macro in iov_iter.c. So, I took a shot with a possibly ill-fated memcpy_nocache(). Thoughts on either of the above two? Are these even in line with what you were thinking? Thanks! Brian > > Boaz > >> +#endif >> + >> #ifndef __HAVE_ARCH_MEMMOVE >> extern void * memmove(void *,const void *,__kernel_size_t); >> #endif