Add TARGET_FOLD_MEMCPY_MAX for the maximum number of bytes to fold memcpy. The default is
MOVE_MAX * MOVE_RATIO (optimize_function_for_size_p (cfun)) For x86, it is MOVE_MAX to restore the old behavior before commit 5f6a6c91d7c592cb49f7c519f289777eac09bb74 Author: Richard Earnshaw <rearn...@arm.com> Date: Fri Sep 3 17:06:15 2021 +0100 gimple: allow more folding of memcpy [PR102125] gcc/ PR target/103393 * gimple-fold.cc (gimple_fold_builtin_memory_op): Use targetm.fold_memcpy_max instead of MOVE_MAX * MOVE_RATIO. * target.def: Add fold_memcpy_max. * varasm.cc (default_fold_memcpy_max): New. * varasm.h (default_fold_memcpy_max): Likewise. * config/i386/i386.cc (ix86_fold_memcpy_max): New. (TARGET_FOLD_MEMCPY_MAX): Likewise. * doc/tm.texi.in: Add TARGET_FOLD_MEMCPY_MAX. * doc/tm.texi: Regenerate. gcc/testsuite/ PR target/103393 * gcc.target/i386/pr103393.c: New test. --- gcc/config/i386/i386.cc | 12 ++++++++++++ gcc/doc/tm.texi | 4 ++++ gcc/doc/tm.texi.in | 2 ++ gcc/gimple-fold.cc | 7 ++----- gcc/target.def | 6 ++++++ gcc/testsuite/gcc.target/i386/pr103393.c | 16 ++++++++++++++++ gcc/varasm.cc | 9 +++++++++ gcc/varasm.h | 2 ++ 8 files changed, 53 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr103393.c diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index b2bf90576d5..2198db14dc6 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -23918,6 +23918,15 @@ ix86_push_rounding (poly_int64 bytes) return ROUND_UP (bytes, UNITS_PER_WORD); } +/* Implement the TARGET_FOLD_MEMCPY_MAX hook. Return the maximum number + of bytes to fold memcpy. */ + +static unsigned int +ix86_fold_memcpy_max (void) +{ + return MOVE_MAX; +} + /* Target-specific selftests. */ #if CHECKING_P @@ -24735,6 +24744,9 @@ static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED) #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests #endif /* #if CHECKING_P */ +#undef TARGET_FOLD_MEMCPY_MAX +#define TARGET_FOLD_MEMCPY_MAX ix86_fold_memcpy_max + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-i386.h" diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 49864dd79f8..5c3b65b176e 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -11924,6 +11924,10 @@ statement holding the function call. Returns true if any change was made to the GIMPLE stream. @end deftypefn +@deftypefn {Target Hook} {unsigned int} TARGET_FOLD_MEMCPY_MAX (void) +This target hook returns the maximum number of bytes to fold memcpy. +@end deftypefn + @deftypefn {Target Hook} int TARGET_COMPARE_VERSION_PRIORITY (tree @var{decl1}, tree @var{decl2}) This hook is used to compare the target attributes in two functions to determine which function's features get higher priority. This is used diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 95e5e341f07..169145d0d45 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -7924,6 +7924,8 @@ to by @var{ce_info}. @hook TARGET_GIMPLE_FOLD_BUILTIN +@hook TARGET_FOLD_MEMCPY_MAX + @hook TARGET_COMPARE_VERSION_PRIORITY @hook TARGET_GET_FUNCTION_VERSIONS_DISPATCHER diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index c9179abb27e..31a9684fea4 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -965,14 +965,11 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator *gsi, /* If we can perform the copy efficiently with first doing all loads and then all stores inline it that way. Currently efficiently means that we can load all the memory with a single set operation and that the - total size is less than MOVE_MAX * MOVE_RATIO. */ + total size is less than TARGET_FOLD_MEMCPY_MAX. */ src_align = get_pointer_alignment (src); dest_align = get_pointer_alignment (dest); if (tree_fits_uhwi_p (len) - && (compare_tree_int - (len, (MOVE_MAX - * MOVE_RATIO (optimize_function_for_size_p (cfun)))) - <= 0) + && compare_tree_int (len, targetm.fold_memcpy_max ()) <= 0 /* FIXME: Don't transform copies from strings with known length. Until GCC 9 this prevented a case in gcc.dg/strlenopt-8.c from being handled, and the case was XFAILed for that reason. diff --git a/gcc/target.def b/gcc/target.def index 72c2e1ef756..b88338f5003 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -2489,6 +2489,12 @@ was made to the GIMPLE stream.", bool, (gimple_stmt_iterator *gsi), hook_bool_gsiptr_false) +DEFHOOK +(fold_memcpy_max, + "This target hook returns the maximum number of bytes to fold memcpy.", + unsigned int, (void), + default_fold_memcpy_max) + /* Target hook is used to compare the target attributes in two functions to determine which function's features get higher priority. This is used during function multi-versioning to figure out the order in which two diff --git a/gcc/testsuite/gcc.target/i386/pr103393.c b/gcc/testsuite/gcc.target/i386/pr103393.c new file mode 100644 index 00000000000..7d54ae76561 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr103393.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512" } */ + +struct TestData { + float arr[8]; +}; + +void +cpy (struct TestData *s1, struct TestData *s2 ) +{ + for(int i=0; i<16; ++i) + s1->arr[i] = s2->arr[i]; +} + +/* { dg-final { scan-assembler "jmp\[\\t \]*_?memmove" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "call\[\\t \]*_?memmove" { target ia32 } } } */ diff --git a/gcc/varasm.cc b/gcc/varasm.cc index d3d9daffb5d..49cd05146c0 100644 --- a/gcc/varasm.cc +++ b/gcc/varasm.cc @@ -8509,4 +8509,13 @@ handle_vtv_comdat_section (section *sect, const_tree decl ATTRIBUTE_UNUSED) #endif } + +/* The default implementation of TARGET_FOLD_MEMCPY_MAX. */ + +unsigned int +default_fold_memcpy_max (void) +{ + return MOVE_MAX * MOVE_RATIO (optimize_function_for_size_p (cfun)); +} + #include "gt-varasm.h" diff --git a/gcc/varasm.h b/gcc/varasm.h index d5d8c4e5578..1fcb37e1f66 100644 --- a/gcc/varasm.h +++ b/gcc/varasm.h @@ -79,4 +79,6 @@ extern rtx assemble_static_space (unsigned HOST_WIDE_INT); extern rtx assemble_trampoline_template (void); +extern unsigned int default_fold_memcpy_max (void); + #endif // GCC_VARASM_H -- 2.35.1