On 03/14/2018 02:07 PM, Jakub Jelinek wrote: > On Wed, Mar 14, 2018 at 01:54:39PM +0100, Martin Liška wrote: >> --- a/gcc/builtins.c >> +++ b/gcc/builtins.c >> @@ -3651,13 +3651,24 @@ expand_builtin_memory_copy_args (tree dest, tree >> src, tree len, >> src_mem = get_memory_rtx (src, len); >> set_mem_align (src_mem, src_align); >> >> + bool is_move_done; >> + >> /* Copy word part most expediently. */ > > This comment supposedly belongs right above the emit_block_move_hints call. > >> + bool bail_out_libcall = endp == 1 >> + && targetm.libc_func_speed ((int)BUILT_IN_MEMPCPY) == FAST_SPEED; > > Formatting. && belongs below endp. So either: > bool bail_out_libcall > = (endp == 1 > && ...); > or > bool bail_out_libcall = false; > if (endp == 1 > && ...) > bail_out_libcall = true; > ? > The variable is not named very well, shouldn't it be avoid_libcall or > something similar? Perhaps the variable should have a comment describing > what it is. Do you need separate argument for that bool and > is_move_done, rather than just the flag being that some pointer to bool is > non-NULL?
Can you please explain me how to replace the 2 new arguments? > >> dest_addr = emit_block_move_hints (dest_mem, src_mem, len_rtx, >> CALL_EXPR_TAILCALL (exp) >> && (endp == 0 || target == const0_rtx) >> ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL, >> expected_align, expected_size, >> - min_size, max_size, probable_max_size); >> + min_size, max_size, probable_max_size, >> + bail_out_libcall, &is_move_done); >> + >> + /* Bail out when a mempcpy call would be expanded as libcall and when >> + we have a target that provides a fast implementation >> + of mempcpy routine. */ >> + if (!is_move_done) >> + return NULL_RTX; >> >> if (dest_addr == 0) >> { >> --- a/gcc/config/i386/i386.c >> +++ b/gcc/config/i386/i386.c >> @@ -2733,6 +2733,23 @@ ix86_using_red_zone (void) >> && (!cfun->machine->has_local_indirect_jump >> || cfun->machine->indirect_branch_type == indirect_branch_keep)); >> } >> + > > Missing function comment here. For target hooks, usually there is a copy of > the target.def comment, perhaps with further details on why it is overridden > and what it does. >> --- a/gcc/coretypes.h >> +++ b/gcc/coretypes.h >> @@ -384,6 +384,13 @@ enum excess_precision_type >> EXCESS_PRECISION_TYPE_FAST >> }; >> > > Missing comment describing what it is, plus it the enumerators are too > generic, if it is libc_speed enum, perhaps LIBC_FAST_SPEED etc.? >> --- a/gcc/targhooks.c >> +++ b/gcc/targhooks.c >> @@ -1642,6 +1642,12 @@ no_c99_libc_has_function (enum function_class >> fn_class ATTRIBUTE_UNUSED) >> return false; >> } >> > > Again, missing function comment. > >> +enum libc_speed >> +default_libc_func_speed (int) >> +{ >> + return UNKNOWN_SPEED; >> +} >> + > >> --- a/gcc/testsuite/gcc.dg/string-opt-1.c >> +++ b/gcc/testsuite/gcc.dg/string-opt-1.c >> @@ -48,5 +48,5 @@ main (void) >> return 0; >> } >> >> -/* { dg-final { scan-assembler-not "\<mempcpy\>" } } */ >> -/* { dg-final { scan-assembler "memcpy" } } */ >> +/* { dg-final { scan-assembler-not "\<mempcpy\>" { target { i?86-*-* >> x86_64-*-* } } } } */ >> +/* { dg-final { scan-assembler "memcpy" { target { ! { i?86-*-* x86_64-*-* >> } } } } } */ > > First of all, I don't really understand this, I'd expect both the > two old dg-final lines to be used as is for non-x86 targets and another two > for x86_64, and more importantly, the target hook is only for glibc, not for > musl/bionic etc., nor for non-linux, so you probably need some effective > target for it for whether it is glibc rather than musl/bionic, and use > ...-*-linux* and ...-*-gnu* etc. rather than ...-*-*. Or tweak the dg-fianl > patterns so that it succeeds on both variants of the target hook, but then > does the test test anything at all? I fixed that by preserving the old 2 old-finals and then I added a new for x86_64 target. Apart from the comment above I've fixed all nits and mempcpy is not used when LHS == NULL. Martin > > Jakub >
>From 26979038ce9500015f957afd896146022a38490b Mon Sep 17 00:00:00 2001 From: marxin <mli...@suse.cz> Date: Wed, 14 Mar 2018 09:44:18 +0100 Subject: [PATCH] Introduce new libc_func_speed target hook (PR middle-end/81657). gcc/ChangeLog: 2018-03-14 Martin Liska <mli...@suse.cz> PR middle-end/81657 * builtins.c (expand_builtin_memory_copy_args): Handle situation when libc library provides a fast mempcpy implementation/ * config/i386/i386-protos.h (gnu_libc_func_speed): New. * config/i386/i386.c (enum libc_speed): Likewise. (ix86_libc_func_speed): Likewise. (TARGET_LIBC_FUNC_SPEED): Likewise. * coretypes.h (enum libc_speed): Likewise. * doc/tm.texi: Document new target hook. * doc/tm.texi.in: Likewise. * expr.c (emit_block_move_hints): Handle libc bail out argument. * expr.h (emit_block_move_hints): Add new parameters. * target.def: Add new hook. * targhooks.c (enum libc_speed): New enum. (default_libc_func_speed): Provide a default hook implementation. * targhooks.h (default_libc_func_speed): Likewise. gcc/testsuite/ChangeLog: 2018-03-14 Martin Liska <mli...@suse.cz> * gcc.dg/string-opt-1.c: Adjust scans for i386 and glibc target and others. --- gcc/builtins.c | 16 +++++++++++++++- gcc/config/i386/i386-protos.h | 2 ++ gcc/config/i386/i386.c | 24 ++++++++++++++++++++++++ gcc/coretypes.h | 7 +++++++ gcc/doc/tm.texi | 4 ++++ gcc/doc/tm.texi.in | 1 + gcc/expr.c | 16 +++++++++++++++- gcc/expr.h | 4 +++- gcc/target.def | 7 +++++++ gcc/targhooks.c | 9 +++++++++ gcc/targhooks.h | 1 + gcc/testsuite/gcc.dg/string-opt-1.c | 5 +++-- 12 files changed, 91 insertions(+), 5 deletions(-) diff --git a/gcc/builtins.c b/gcc/builtins.c index 85affa74510..b668045374c 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -3651,13 +3651,27 @@ expand_builtin_memory_copy_args (tree dest, tree src, tree len, src_mem = get_memory_rtx (src, len); set_mem_align (src_mem, src_align); + bool is_move_done; + + /* emit_block_move_hints can generate a library call to memcpy function. + In situations when a libc library provides fast implementation + of mempcpy, then it's better to call mempcpy directly. */ + bool avoid_libcall + = (endp == 1 + && targetm.libc_func_speed ((int)BUILT_IN_MEMPCPY) == LIBC_FAST_SPEED + && target != const0_rtx); + /* Copy word part most expediently. */ dest_addr = emit_block_move_hints (dest_mem, src_mem, len_rtx, CALL_EXPR_TAILCALL (exp) && (endp == 0 || target == const0_rtx) ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL, expected_align, expected_size, - min_size, max_size, probable_max_size); + min_size, max_size, probable_max_size, + avoid_libcall, &is_move_done); + + if (!is_move_done) + return NULL_RTX; if (dest_addr == 0) { diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index ef7c818986f..d3fc515845b 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -47,6 +47,8 @@ extern void ix86_reset_previous_fndecl (void); extern bool ix86_using_red_zone (void); +extern enum libc_speed gnu_libc_func_speed (int fn); + #ifdef RTX_CODE extern int standard_80387_constant_p (rtx); extern const char *standard_80387_constant_opcode (rtx); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index af24c6ec5ba..f4cbecdf099 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2733,6 +2733,27 @@ ix86_using_red_zone (void) && (!cfun->machine->has_local_indirect_jump || cfun->machine->indirect_branch_type == indirect_branch_keep)); } + +/* This hook determines whether a function from libc has a fast implementation + FN is present at the runtime. We override it for i386 and glibc C library + as this combination provides fast implementation of mempcpy function. */ + +enum libc_speed +ix86_libc_func_speed (int fn) +{ + enum built_in_function f = (built_in_function)fn; + + if (!OPTION_GLIBC) + return LIBC_UNKNOWN_SPEED; + + switch (f) + { + case BUILT_IN_MEMPCPY: + return LIBC_FAST_SPEED; + default: + return LIBC_UNKNOWN_SPEED; + } +} /* Return a string that documents the current -m options. The caller is responsible for freeing the string. */ @@ -52061,6 +52082,9 @@ ix86_run_selftests (void) #undef TARGET_WARN_PARAMETER_PASSING_ABI #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi +#undef TARGET_LIBC_FUNC_SPEED +#define TARGET_LIBC_FUNC_SPEED ix86_libc_func_speed + #if CHECKING_P #undef TARGET_RUN_TARGET_SELFTESTS #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests diff --git a/gcc/coretypes.h b/gcc/coretypes.h index 283b4eb33fe..fe618f708f4 100644 --- a/gcc/coretypes.h +++ b/gcc/coretypes.h @@ -384,6 +384,13 @@ enum excess_precision_type EXCESS_PRECISION_TYPE_FAST }; +enum libc_speed +{ + LIBC_FAST_SPEED, + LIBC_SLOW_SPEED, + LIBC_UNKNOWN_SPEED +}; + /* Support for user-provided GGC and PCH markers. The first parameter is a pointer to a pointer, the second a cookie. */ typedef void (*gt_pointer_operator) (void *, void *); diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index bd8b917ba82..0f7c91a22c4 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5501,6 +5501,10 @@ macro, a reasonable default is used. This hook determines whether a function from a class of functions @var{fn_class} is present at the runtime. @end deftypefn +@deftypefn {Target Hook} libc_speed TARGET_LIBC_FUNC_SPEED (int @var{fn}) +This hook determines whether a function from libc has a fast implementation +@var{fn} is present at the runtime. +@end deftypefn @defmac NEXT_OBJC_RUNTIME Set this macro to 1 to use the "NeXT" Objective-C message sending conventions diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index b0207146e8c..4bb2998a8a1 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -3933,6 +3933,7 @@ macro, a reasonable default is used. @end defmac @hook TARGET_LIBC_HAS_FUNCTION +@hook TARGET_LIBC_FUNC_SPEED @defmac NEXT_OBJC_RUNTIME Set this macro to 1 to use the "NeXT" Objective-C message sending conventions diff --git a/gcc/expr.c b/gcc/expr.c index 00660293f72..b6c13652d79 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -1554,6 +1554,8 @@ compare_by_pieces (rtx arg0, rtx arg1, unsigned HOST_WIDE_INT len, MIN_SIZE is the minimal size of block to move MAX_SIZE is the maximal size of block to move, if it can not be represented in unsigned HOST_WIDE_INT, than it is mask of all ones. + If BAIL_OUT_LIBCALL is set true, do not emit library call and set + *IS_MOVE_DONE to false. Return the address of the new block, if memcpy is called and returns it, 0 otherwise. */ @@ -1563,12 +1565,17 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method, unsigned int expected_align, HOST_WIDE_INT expected_size, unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size, - unsigned HOST_WIDE_INT probable_max_size) + unsigned HOST_WIDE_INT probable_max_size, + bool bail_out_libcall, bool *is_move_done) { bool may_use_call; rtx retval = 0; unsigned int align; + /* When not doing a bail out, we always emit a memory move. */ + if (is_move_done) + *is_move_done = true; + gcc_assert (size); if (CONST_INT_P (size) && INTVAL (size) == 0) return 0; @@ -1625,6 +1632,13 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method, && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)) && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (y))) { + if (bail_out_libcall) + { + if (is_move_done) + *is_move_done = false; + return retval; + } + /* Since x and y are passed to a libcall, mark the corresponding tree EXPR as addressable. */ tree y_expr = MEM_EXPR (y); diff --git a/gcc/expr.h b/gcc/expr.h index b3d523bcb24..023bc5aec47 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -110,7 +110,9 @@ extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods, unsigned int, HOST_WIDE_INT, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, - unsigned HOST_WIDE_INT); + unsigned HOST_WIDE_INT, + bool bail_out_libcall = false, + bool *is_move_done = NULL); extern rtx emit_block_cmp_hints (rtx, rtx, rtx, tree, rtx, bool, by_pieces_constfn, void *); extern bool emit_storent_insn (rtx to, rtx from); diff --git a/gcc/target.def b/gcc/target.def index c5b2a1e7e71..3bbddc82776 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -2639,6 +2639,13 @@ DEFHOOK bool, (enum function_class fn_class), default_libc_has_function) +DEFHOOK +(libc_func_speed, + "This hook determines whether a function from libc has a fast implementation\n\ +@var{fn} is present at the runtime.", + libc_speed, (int fn), + default_libc_func_speed) + /* True if new jumps cannot be created, to replace existing ones or not, at the current point in the compilation. */ DEFHOOK diff --git a/gcc/targhooks.c b/gcc/targhooks.c index fafcc6c5196..6e44f6f79cf 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -1642,6 +1642,15 @@ no_c99_libc_has_function (enum function_class fn_class ATTRIBUTE_UNUSED) return false; } +/* This hook determines whether a function from libc has a fast implementation + FN is present at the runtime. */ + +enum libc_speed +default_libc_func_speed (int) +{ + return LIBC_UNKNOWN_SPEED; +} + tree default_builtin_tm_load_store (tree ARG_UNUSED (type)) { diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 8a4393f2ba4..7508673ad0a 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -205,6 +205,7 @@ extern bool default_have_conditional_execution (void); extern bool default_libc_has_function (enum function_class); extern bool no_c99_libc_has_function (enum function_class); extern bool gnu_libc_has_function (enum function_class); +extern enum libc_speed default_libc_func_speed (int); extern tree default_builtin_tm_load_store (tree); diff --git a/gcc/testsuite/gcc.dg/string-opt-1.c b/gcc/testsuite/gcc.dg/string-opt-1.c index 2f060732bf0..7faaadcbb1f 100644 --- a/gcc/testsuite/gcc.dg/string-opt-1.c +++ b/gcc/testsuite/gcc.dg/string-opt-1.c @@ -48,5 +48,6 @@ main (void) return 0; } -/* { dg-final { scan-assembler-not "\<mempcpy\>" } } */ -/* { dg-final { scan-assembler "memcpy" } } */ +/* { dg-final { scan-assembler-not "\<mempcpy\>" { target { ! { i?86-*-gnu* x86_64-*-gnu* i?86-*-linux* x86_64-*-linux* } } } } } */ +/* { dg-final { scan-assembler "memcpy" { target { ! { i?86-*-gnu* x86_64-*-gnu* i?86-*-linux* x86_64-*-linux* } } } } } */ +/* { dg-final { scan-assembler "mempcpy" { target { i?86-*-gnu* x86_64-*-gnu* i?86-*-linux* x86_64-*-linux* } } } } */ -- 2.16.2