Add TARGET_FOLD_MEMCPY_MAX for the maximum number of bytes to fold memcpy.
The default is

MOVE_MAX * MOVE_RATIO (optimize_function_for_size_p (cfun))

For x86, it is MOVE_MAX to restore the old behavior before

commit 5f6a6c91d7c592cb49f7c519f289777eac09bb74
Author: Richard Earnshaw <rearn...@arm.com>
Date:   Fri Sep 3 17:06:15 2021 +0100

    gimple: allow more folding of memcpy [PR102125]

gcc/

        PR target/103393
        * gimple-fold.cc (gimple_fold_builtin_memory_op): Use
        targetm.fold_memcpy_max instead of MOVE_MAX * MOVE_RATIO.
        * target.def: Add fold_memcpy_max.
        * varasm.cc (default_fold_memcpy_max): New.
        * varasm.h (default_fold_memcpy_max): Likewise.
        * config/i386/i386.cc (ix86_fold_memcpy_max): New.
        (TARGET_FOLD_MEMCPY_MAX): Likewise.
        * doc/tm.texi.in: Add TARGET_FOLD_MEMCPY_MAX.
        * doc/tm.texi: Regenerate.

gcc/testsuite/

        PR target/103393
        * gcc.target/i386/pr103393.c: New test.
---
 gcc/config/i386/i386.cc                  | 12 ++++++++++++
 gcc/doc/tm.texi                          |  4 ++++
 gcc/doc/tm.texi.in                       |  2 ++
 gcc/gimple-fold.cc                       |  7 ++-----
 gcc/target.def                           |  6 ++++++
 gcc/testsuite/gcc.target/i386/pr103393.c | 16 ++++++++++++++++
 gcc/varasm.cc                            |  9 +++++++++
 gcc/varasm.h                             |  2 ++
 8 files changed, 53 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103393.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b2bf90576d5..2198db14dc6 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -23918,6 +23918,15 @@ ix86_push_rounding (poly_int64 bytes)
   return ROUND_UP (bytes, UNITS_PER_WORD);
 }
 
+/* Implement the TARGET_FOLD_MEMCPY_MAX hook.  Return the maximum number
+   of bytes to fold memcpy.  */
+
+static unsigned int
+ix86_fold_memcpy_max (void)
+{
+  return MOVE_MAX;
+}
+
 /* Target-specific selftests.  */
 
 #if CHECKING_P
@@ -24735,6 +24744,9 @@ static bool ix86_libc_has_fast_function (int fcode 
ATTRIBUTE_UNUSED)
 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
 #endif /* #if CHECKING_P */
 
+#undef TARGET_FOLD_MEMCPY_MAX
+#define TARGET_FOLD_MEMCPY_MAX ix86_fold_memcpy_max
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-i386.h"
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 49864dd79f8..5c3b65b176e 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11924,6 +11924,10 @@ statement holding the function call.  Returns true if 
any change
 was made to the GIMPLE stream.
 @end deftypefn
 
+@deftypefn {Target Hook} {unsigned int} TARGET_FOLD_MEMCPY_MAX (void)
+This target hook returns the maximum number of bytes to fold memcpy.
+@end deftypefn
+
 @deftypefn {Target Hook} int TARGET_COMPARE_VERSION_PRIORITY (tree 
@var{decl1}, tree @var{decl2})
 This hook is used to compare the target attributes in two functions to
 determine which function's features get higher priority.  This is used
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 95e5e341f07..169145d0d45 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -7924,6 +7924,8 @@ to by @var{ce_info}.
 
 @hook TARGET_GIMPLE_FOLD_BUILTIN
 
+@hook TARGET_FOLD_MEMCPY_MAX
+
 @hook TARGET_COMPARE_VERSION_PRIORITY
 
 @hook TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index c9179abb27e..31a9684fea4 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -965,14 +965,11 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator *gsi,
       /* If we can perform the copy efficiently with first doing all loads and
         then all stores inline it that way.  Currently efficiently means that
         we can load all the memory with a single set operation and that the
-        total size is less than MOVE_MAX * MOVE_RATIO.  */
+        total size is less than TARGET_FOLD_MEMCPY_MAX.  */
       src_align = get_pointer_alignment (src);
       dest_align = get_pointer_alignment (dest);
       if (tree_fits_uhwi_p (len)
-         && (compare_tree_int
-             (len, (MOVE_MAX
-                    * MOVE_RATIO (optimize_function_for_size_p (cfun))))
-             <= 0)
+         && compare_tree_int (len, targetm.fold_memcpy_max ()) <= 0
          /* FIXME: Don't transform copies from strings with known length.
             Until GCC 9 this prevented a case in gcc.dg/strlenopt-8.c
             from being handled, and the case was XFAILed for that reason.
diff --git a/gcc/target.def b/gcc/target.def
index 72c2e1ef756..b88338f5003 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2489,6 +2489,12 @@ was made to the GIMPLE stream.",
  bool, (gimple_stmt_iterator *gsi),
  hook_bool_gsiptr_false)
 
+DEFHOOK
+(fold_memcpy_max,
+ "This target hook returns the maximum number of bytes to fold memcpy.",
+ unsigned int, (void),
+ default_fold_memcpy_max)
+
 /* Target hook is used to compare the target attributes in two functions to
    determine which function's features get higher priority.  This is used
    during function multi-versioning to figure out the order in which two
diff --git a/gcc/testsuite/gcc.target/i386/pr103393.c 
b/gcc/testsuite/gcc.target/i386/pr103393.c
new file mode 100644
index 00000000000..7d54ae76561
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103393.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+struct TestData {
+  float arr[8];
+};
+
+void
+cpy (struct TestData *s1, struct TestData *s2 )
+{
+  for(int i=0; i<16; ++i)
+    s1->arr[i] = s2->arr[i];
+}
+
+/* { dg-final { scan-assembler "jmp\[\\t \]*_?memmove" { target { ! ia32 } } } 
} */
+/* { dg-final { scan-assembler "call\[\\t \]*_?memmove" { target ia32 } } } */
diff --git a/gcc/varasm.cc b/gcc/varasm.cc
index d3d9daffb5d..49cd05146c0 100644
--- a/gcc/varasm.cc
+++ b/gcc/varasm.cc
@@ -8509,4 +8509,13 @@ handle_vtv_comdat_section (section *sect, const_tree 
decl ATTRIBUTE_UNUSED)
 #endif
 }
 
+
+/* The default implementation of TARGET_FOLD_MEMCPY_MAX.  */
+
+unsigned int
+default_fold_memcpy_max (void)
+{
+  return MOVE_MAX * MOVE_RATIO (optimize_function_for_size_p (cfun));
+}
+
 #include "gt-varasm.h"
diff --git a/gcc/varasm.h b/gcc/varasm.h
index d5d8c4e5578..1fcb37e1f66 100644
--- a/gcc/varasm.h
+++ b/gcc/varasm.h
@@ -79,4 +79,6 @@ extern rtx assemble_static_space (unsigned HOST_WIDE_INT);
 
 extern rtx assemble_trampoline_template (void);
 
+extern unsigned int default_fold_memcpy_max (void);
+
 #endif  // GCC_VARASM_H
-- 
2.35.1

Reply via email to