On Thu, 2022-02-17 at 13:19 +0100, Christophe Leroy wrote: > All functions defined as static inline in net/checksum.h are > meant to be inlined for performance reason. > > But since commit ac7c3e4ff401 ("compiler: enable > CONFIG_OPTIMIZE_INLINING forcibly") the compiler is allowed to > uninline functions when it wants. > > Fair enough in the general case, but for tiny performance critical > checksum helpers that's counter-productive.
Thanks. Trivial style notes: > diff --git a/include/net/checksum.h b/include/net/checksum.h [] > @@ -22,7 +22,7 @@ > #include <asm/checksum.h> > > #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER > -static inline > +static __always_inline > __wsum csum_and_copy_from_user (const void __user *src, void *dst, > int len) > { __wsum might be better placed on the previous line. [] > @@ -45,7 +45,7 @@ static __inline__ __wsum csum_and_copy_to_user > #endif > > #ifndef _HAVE_ARCH_CSUM_AND_COPY > -static inline __wsum > +static __always_inline __wsum > csum_partial_copy_nocheck(const void *src, void *dst, int len) To be consistent with the location of the __wsum return value when splitting the function definitions across multiple lines. (like the below) > @@ -88,42 +88,43 @@ static inline __wsum csum_shift(__wsum sum, int offset) > return sum; > } > > -static inline __wsum > +static __always_inline __wsum > csum_block_add(__wsum csum, __wsum csum2, int offset) > { > return csum_add(csum, csum_shift(csum2, offset)); > } > > -static inline __wsum > +static __always_inline __wsum > csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len) > { > return csum_block_add(csum, csum2, offset); > } > > -static inline __wsum > +static __always_inline __wsum > csum_block_sub(__wsum csum, __wsum csum2, int offset) > { > return csum_block_add(csum, ~csum2, offset); > } > > -static inline __wsum csum_unfold(__sum16 n) > +static __always_inline __wsum csum_unfold(__sum16 n) > { > return (__force __wsum)n; > } > [] > -static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum) > +static __always_inline > +__wsum csum_partial_ext(const void *buff, int len, __wsum sum) > { > return csum_partial(buff, len, sum); > } And this __wsum could be moved too. > @@ -150,15 +151,15 @@ void inet_proto_csum_replace16(__sum16 *sum, struct > sk_buff *skb, [] > -static inline __wsum remcsum_adjust(void *ptr, __wsum csum, > +static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum, > int start, int offset) > { > __sum16 *psum = (__sum16 *)(ptr + offset); And this one could be split like the above static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum, int start, int offset)