Hi. This is fix for the PR that introduces a new target macro. Using the macro one can say that a target has a fast mempcpy and thus it's preferred to be used if possible.
Patch can bootstrap on ppc64le-redhat-linux and survives regression tests. I also tested on x86_64-linux-gnu. Ready to be installed? Martin gcc/ChangeLog: 2018-03-08 Martin Liska <mli...@suse.cz> PR middle-end/81657 * builtins.c (expand_builtin_memory_copy_args): Add new arguments. * config/i386/i386.h (TARGET_HAS_FAST_MEMPCPY_ROUTINE): New macro. * defaults.h (TARGET_HAS_FAST_MEMPCPY_ROUTINE): Likewise. * doc/tm.texi: Likewise. * doc/tm.texi.in: Likewise. * expr.c (compare_by_pieces): Add support for bail out. (emit_block_move_hints): Likewise. * expr.h (emit_block_move_hints): Add new arguments. gcc/testsuite/ChangeLog: 2018-03-09 Martin Liska <mli...@suse.cz> PR middle-end/81657 * gcc.dg/string-opt-1.c: Adjust test to run only on non-x86 target. --- gcc/builtins.c | 13 ++++++++++++- gcc/config/i386/i386.h | 3 +++ gcc/defaults.h | 7 +++++++ gcc/doc/tm.texi | 5 +++++ gcc/doc/tm.texi.in | 5 +++++ gcc/expr.c | 16 +++++++++++++++- gcc/expr.h | 4 +++- gcc/testsuite/gcc.dg/string-opt-1.c | 4 ++-- 8 files changed, 52 insertions(+), 5 deletions(-)
diff --git a/gcc/builtins.c b/gcc/builtins.c index 85affa74510..c2ca36934f7 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -3651,13 +3651,24 @@ expand_builtin_memory_copy_args (tree dest, tree src, tree len, src_mem = get_memory_rtx (src, len); set_mem_align (src_mem, src_align); + bool is_move_done; + /* Copy word part most expediently. */ dest_addr = emit_block_move_hints (dest_mem, src_mem, len_rtx, CALL_EXPR_TAILCALL (exp) && (endp == 0 || target == const0_rtx) ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL, expected_align, expected_size, - min_size, max_size, probable_max_size); + min_size, max_size, probable_max_size, + TARGET_HAS_FAST_MEMPCPY_ROUTINE + && endp == 1, + &is_move_done); + + /* Bail out when a mempcpy call would be expanded as libcall and when + we have a target that provides a fast implementation + of mempcpy routine. */ + if (!is_move_done) + return NULL_RTX; if (dest_addr == 0) { diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index e43edd77b56..8744d706fd7 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1914,6 +1914,9 @@ typedef struct ix86_args { #define CLEAR_RATIO(speed) ((speed) ? MIN (6, ix86_cost->move_ratio) : 2) +/* C library provides fast implementation of mempcpy function. */ +#define TARGET_HAS_FAST_MEMPCPY_ROUTINE 1 + /* Define if shifts truncate the shift count which implies one can omit a sign-extension or zero-extension of a shift count. diff --git a/gcc/defaults.h b/gcc/defaults.h index 78a08a33f12..2e5caac8dcd 100644 --- a/gcc/defaults.h +++ b/gcc/defaults.h @@ -1340,6 +1340,13 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define SET_RATIO(speed) MOVE_RATIO (speed) #endif +/* By default do not generate libcall to mempcpy and rather use + libcall to memcpy and adjustment of return value. */ + +#ifndef TARGET_HAS_FAST_MEMPCPY_ROUTINE +#define TARGET_HAS_FAST_MEMPCPY_ROUTINE 0 +#endif + /* Supply a default definition of STACK_SAVEAREA_MODE for emit_stack_save. Normally move_insn, so Pmode stack pointer. */ diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index bd8b917ba82..0c8a3f3298c 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6627,6 +6627,11 @@ optimized for speed rather than size. If you don't define this, it defaults to the value of @code{MOVE_RATIO}. @end defmac +@defmac TARGET_HAS_FAST_MEMPCPY_ROUTINE +By default do not generate libcall to mempcpy and rather use +libcall to memcpy and adjustment of return value. +@end defmac + @defmac USE_LOAD_POST_INCREMENT (@var{mode}) A C expression used to determine whether a load postincrement is a good thing to use for a given mode. Defaults to the value of diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index b0207146e8c..e7ef85ab78e 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4560,6 +4560,11 @@ optimized for speed rather than size. If you don't define this, it defaults to the value of @code{MOVE_RATIO}. @end defmac +@defmac TARGET_HAS_FAST_MEMPCPY_ROUTINE +By default do not generate libcall to mempcpy and rather use +libcall to memcpy and adjustment of return value. +@end defmac + @defmac USE_LOAD_POST_INCREMENT (@var{mode}) A C expression used to determine whether a load postincrement is a good thing to use for a given mode. Defaults to the value of diff --git a/gcc/expr.c b/gcc/expr.c index 00660293f72..b6c13652d79 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -1554,6 +1554,8 @@ compare_by_pieces (rtx arg0, rtx arg1, unsigned HOST_WIDE_INT len, MIN_SIZE is the minimal size of block to move MAX_SIZE is the maximal size of block to move, if it can not be represented in unsigned HOST_WIDE_INT, than it is mask of all ones. + If BAIL_OUT_LIBCALL is set true, do not emit library call and set + *IS_MOVE_DONE to false. Return the address of the new block, if memcpy is called and returns it, 0 otherwise. */ @@ -1563,12 +1565,17 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method, unsigned int expected_align, HOST_WIDE_INT expected_size, unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size, - unsigned HOST_WIDE_INT probable_max_size) + unsigned HOST_WIDE_INT probable_max_size, + bool bail_out_libcall, bool *is_move_done) { bool may_use_call; rtx retval = 0; unsigned int align; + /* When not doing a bail out, we always emit a memory move. */ + if (is_move_done) + *is_move_done = true; + gcc_assert (size); if (CONST_INT_P (size) && INTVAL (size) == 0) return 0; @@ -1625,6 +1632,13 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method, && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)) && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (y))) { + if (bail_out_libcall) + { + if (is_move_done) + *is_move_done = false; + return retval; + } + /* Since x and y are passed to a libcall, mark the corresponding tree EXPR as addressable. */ tree y_expr = MEM_EXPR (y); diff --git a/gcc/expr.h b/gcc/expr.h index b3d523bcb24..023bc5aec47 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -110,7 +110,9 @@ extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods, unsigned int, HOST_WIDE_INT, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, - unsigned HOST_WIDE_INT); + unsigned HOST_WIDE_INT, + bool bail_out_libcall = false, + bool *is_move_done = NULL); extern rtx emit_block_cmp_hints (rtx, rtx, rtx, tree, rtx, bool, by_pieces_constfn, void *); extern bool emit_storent_insn (rtx to, rtx from); diff --git a/gcc/testsuite/gcc.dg/string-opt-1.c b/gcc/testsuite/gcc.dg/string-opt-1.c index 2f060732bf0..851c8b04a33 100644 --- a/gcc/testsuite/gcc.dg/string-opt-1.c +++ b/gcc/testsuite/gcc.dg/string-opt-1.c @@ -48,5 +48,5 @@ main (void) return 0; } -/* { dg-final { scan-assembler-not "\<mempcpy\>" } } */ -/* { dg-final { scan-assembler "memcpy" } } */ +/* { dg-final { scan-assembler-not "\<mempcpy\>" { target { i?86-*-* x86_64-*-* } } } } */ +/* { dg-final { scan-assembler "memcpy" { target { ! { i?86-*-* x86_64-*-* } } } } } */