Experimental results[1] has shown that resorting to several branches and a direct-call is faster than indirect call via retpoline, even when the number of added branches go up 5.
This change adds two additional helpers, to cope with indirect calls with up to 4 available direct call option. We will use them in the next patch. [1] https://linuxplumbersconf.org/event/2/contributions/99/attachments/98/117/lpc18_paper_af_xdp_perf-v2.pdf Signed-off-by: Paolo Abeni <pab...@redhat.com> --- include/linux/indirect_call_wrapper.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index 00d7e8e919c6..7c4cac87eaf7 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h @@ -23,6 +23,16 @@ likely(f == f2) ? f2(__VA_ARGS__) : \ INDIRECT_CALL_1(f, f1, __VA_ARGS__); \ }) +#define INDIRECT_CALL_3(f, f3, f2, f1, ...) \ + ({ \ + likely(f == f3) ? f3(__VA_ARGS__) : \ + INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__); \ + }) +#define INDIRECT_CALL_4(f, f4, f3, f2, f1, ...) \ + ({ \ + likely(f == f4) ? f4(__VA_ARGS__) : \ + INDIRECT_CALL_3(f, f3, f2, f1, __VA_ARGS__); \ + }) #define INDIRECT_CALLABLE_DECLARE(f) f #define INDIRECT_CALLABLE_SCOPE @@ -30,6 +40,8 @@ #else #define INDIRECT_CALL_1(f, f1, ...) f(__VA_ARGS__) #define INDIRECT_CALL_2(f, f2, f1, ...) f(__VA_ARGS__) +#define INDIRECT_CALL_3(f, f3, f2, f1, ...) f(__VA_ARGS__) +#define INDIRECT_CALL_4(f, f4, f3, f2, f1, ...) f(__VA_ARGS__) #define INDIRECT_CALLABLE_DECLARE(f) #define INDIRECT_CALLABLE_SCOPE static #endif -- 2.20.1