On Thu, May 17, 2018 at 12:49 PM, Christophe Leroy <christophe.le...@c-s.fr> wrote: > In my 8xx configuration, I get 208 calls to memcmp() > Within those 208 calls, about half of them have constant sizes, > 46 have a size of 8, 17 have a size of 16, only a few have a > size over 16. Other fixed sizes are mostly 4, 6 and 10. > > This patch inlines calls to memcmp() when size > is constant and lower than or equal to 16 > > In my 8xx configuration, this reduces the number of calls > to memcmp() from 208 to 123 > > The following table shows the number of TB timeticks to perform > a constant size memcmp() before and after the patch depending on > the size > > Before After Improvement > 01: 7577 5682 25% > 02: 41668 5682 86% > 03: 51137 13258 74% > 04: 45455 5682 87% > 05: 58713 13258 77% > 06: 58712 13258 77% > 07: 68183 20834 70% > 08: 56819 15153 73% > 09: 70077 28411 60% > 10: 70077 28411 60% > 11: 79546 35986 55% > 12: 68182 28411 58% > 13: 81440 35986 55% > 14: 81440 39774 51% > 15: 94697 43562 54% > 16: 79546 37881 52% > > Signed-off-by: Christophe Leroy <christophe.le...@c-s.fr> > --- > arch/powerpc/include/asm/string.h | 46 > +++++++++++++++++++++++++++++++++++++++ > 1 file changed, 46 insertions(+) > > diff --git a/arch/powerpc/include/asm/string.h > b/arch/powerpc/include/asm/string.h > index 35f1aaad9b50..80cf0f9605dd 100644 > --- a/arch/powerpc/include/asm/string.h > +++ b/arch/powerpc/include/asm/string.h > @@ -4,6 +4,8 @@ > > #ifdef __KERNEL__ > > +#include <linux/kernel.h> > + > #define __HAVE_ARCH_STRNCPY > #define __HAVE_ARCH_STRNCMP > #define __HAVE_ARCH_MEMSET > @@ -51,10 +53,54 @@ static inline int strncmp(const char *p, const char *q, > __kernel_size_t size) > return __strncmp(p, q, size); > } > > +static inline int __memcmp1(const void *p, const void *q, int off)
Does that change anything if you change void* to char* pointer ? I find void* arithmetic hard to read. > +{ > + return *(u8*)(p + off) - *(u8*)(q + off); > +} > + > +static inline int __memcmp2(const void *p, const void *q, int off) > +{ > + return be16_to_cpu(*(u16*)(p + off)) - be16_to_cpu(*(u16*)(q + off)); > +} > + > +static inline int __memcmp4(const void *p, const void *q, int off) > +{ > + return be32_to_cpu(*(u32*)(p + off)) - be32_to_cpu(*(u32*)(q + off)); > +} > + > +static inline int __memcmp8(const void *p, const void *q, int off) > +{ > + s64 tmp = be64_to_cpu(*(u64*)(p + off)) - be64_to_cpu(*(u64*)(q + > off)); I always assumed 64bits unaligned access would trigger an exception. Is this correct ? > + return tmp >> 32 ? : (int)tmp; > +} > + > +static inline int __memcmp_cst(const void *p,const void *q,__kernel_size_t > size) > +{ > + if (size == 1) > + return __memcmp1(p, q, 0); > + if (size == 2) > + return __memcmp2(p, q, 0); > + if (size == 3) > + return __memcmp2(p, q, 0) ? : __memcmp1(p, q, 2); > + if (size == 4) > + return __memcmp4(p, q, 0); > + if (size == 5) > + return __memcmp4(p, q, 0) ? : __memcmp1(p, q, 4); > + if (size == 6) > + return __memcmp4(p, q, 0) ? : __memcmp2(p, q, 4); > + if (size == 7) > + return __memcmp4(p, q, 0) ? : __memcmp2(p, q, 4) ? : > __memcmp1(p, q, 6); > + return __memcmp8(p, q, 0); > +} > + > static inline int memcmp(const void *p,const void *q,__kernel_size_t size) > { > if (unlikely(!size)) > return 0; > + if (__builtin_constant_p(size) && size <= 8) > + return __memcmp_cst(p, q, size); > + if (__builtin_constant_p(size) && size <= 16) > + return __memcmp8(p, q, 0) ? : __memcmp_cst(p + 8, q + 8, size > - 8); > return __memcmp(p, q, size); > } > > -- > 2.13.3 >