https://gcc.gnu.org/g:881df7a0b1e8e8c1454309fe23c0edd026296b8b

commit r16-3460-g881df7a0b1e8e8c1454309fe23c0edd026296b8b
Author: H.J. Lu <hjl.to...@gmail.com>
Date:   Thu Aug 28 17:55:46 2025 -0700

    x86: Allow by_pieces op when expanding memcpy/memset epilogue
    
    Since
    
    commit 401199377c50045ede560daf3f6e8b51749c2a87
    Author: H.J. Lu <hjl.to...@gmail.com>
    Date:   Tue Jun 17 10:17:17 2025 +0800
    
        x86: Improve vector_loop/unrolled_loop for memset/memcpy
    
    uses move_by_pieces and store_by_pieces to expand memcpy/memset epilogue
    with vector_loop even when targetm.use_by_pieces_infrastructure_p returns
    false, which triggers
    
      gcc_assert (targetm.use_by_pieces_infrastructure_p
                    (len, align,
                     memsetp ? SET_BY_PIECES : STORE_BY_PIECES,
                     optimize_insn_for_speed_p ()));
    
    in store_by_pieces.  Fix it by:
    
    1. Add by_pieces_in_use to machine_function to indicate that by_pieces op
    is currently in use.
    2. Set and clear by_pieces_in_use when expanding memcpy/memset epilogue
    with move_by_pieces and store_by_pieces.
    3. Define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P to return true if
    by_pieces_in_use is true.
    
    gcc/
    
            PR target/121096
            * config/i386/i386-expand.cc (expand_cpymem_epilogue): Set and
            clear by_pieces_in_use when using by_pieces op.
            (expand_setmem_epilogue): Likewise.
            * config/i386/i386.cc (ix86_use_by_pieces_infrastructure_p): New.
            (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): Likewise.
            * config/i386/i386.h (machine_function): Add by_pieces_in_use.
    
    gcc/testsuite/
    
            PR target/121096
            * gcc.target/i386/memcpy-strategy-14.c: New test.
            * gcc.target/i386/memcpy-strategy-15.c: Likewise.
            * gcc.target/i386/memset-strategy-10.c: Likewise.
            * gcc.target/i386/memset-strategy-11.c: Likewise.
            * gcc.target/i386/memset-strategy-12.c: Likewise.
            * gcc.target/i386/memset-strategy-13.c: Likewise.
            * gcc.target/i386/memset-strategy-14.c: Likewise.
            * gcc.target/i386/memset-strategy-15.c: Likewise.
    
    Signed-off-by: H.J. Lu <hjl.to...@gmail.com>

Diff:
---
 gcc/config/i386/i386-expand.cc                     |  4 ++++
 gcc/config/i386/i386.cc                            | 21 +++++++++++++++++
 gcc/config/i386/i386.h                             |  3 +++
 gcc/testsuite/gcc.target/i386/memcpy-strategy-14.c | 10 +++++++++
 gcc/testsuite/gcc.target/i386/memcpy-strategy-15.c | 10 +++++++++
 gcc/testsuite/gcc.target/i386/memset-strategy-10.c | 24 ++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/memset-strategy-11.c |  9 ++++++++
 gcc/testsuite/gcc.target/i386/memset-strategy-12.c |  8 +++++++
 gcc/testsuite/gcc.target/i386/memset-strategy-13.c | 26 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/memset-strategy-14.c |  8 +++++++
 gcc/testsuite/gcc.target/i386/memset-strategy-15.c |  9 ++++++++
 11 files changed, 132 insertions(+)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 6734d9f1464e..1c788ae098ad 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -8241,8 +8241,10 @@ expand_cpymem_epilogue (rtx destmem, rtx srcmem,
       unsigned HOST_WIDE_INT countval = UINTVAL (count);
       unsigned HOST_WIDE_INT epilogue_size = countval % max_size;
       unsigned int destalign = MEM_ALIGN (destmem);
+      cfun->machine->by_pieces_in_use = true;
       move_by_pieces (destmem, srcmem, epilogue_size, destalign,
                      RETURN_BEGIN);
+      cfun->machine->by_pieces_in_use = false;
       return;
     }
   if (max_size > 8)
@@ -8487,9 +8489,11 @@ expand_setmem_epilogue (rtx destmem, rtx destptr, rtx 
value, rtx vec_value,
       unsigned HOST_WIDE_INT countval = UINTVAL (count);
       unsigned HOST_WIDE_INT epilogue_size = countval % max_size;
       unsigned int destalign = MEM_ALIGN (destmem);
+      cfun->machine->by_pieces_in_use = true;
       store_by_pieces (destmem, epilogue_size, setmem_epilogue_gen_val,
                       vec_value ? vec_value : value, destalign, true,
                       RETURN_BEGIN);
+      cfun->machine->by_pieces_in_use = false;
       return;
     }
   if (max_size > 32)
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 1ca6c6121371..471be3e86158 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -11382,6 +11382,23 @@ ix86_address_cost (rtx x, machine_mode, addr_space_t, 
bool)
 
   return cost;
 }
+
+/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
+
+bool
+ix86_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
+                                    unsigned int align,
+                                    enum by_pieces_operation op,
+                                    bool speed_p)
+{
+  /* Return true when we are currently expanding memcpy/memset epilogue
+     with move_by_pieces or store_by_pieces.  */
+  if (cfun->machine->by_pieces_in_use)
+    return true;
+
+  return default_use_by_pieces_infrastructure_p (size, align, op,
+                                                speed_p);
+}
 
 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
    this is used for to form addresses to local data when -fPIC is in
@@ -27934,6 +27951,10 @@ static const scoped_attribute_specs *const 
ix86_attribute_table[] =
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST ix86_address_cost
 
+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
+  ix86_use_by_pieces_infrastructure_p
+
 #undef TARGET_OVERLAP_OP_BY_PIECES_P
 #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
 
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 2eb141bab1ad..ac0ce687f36e 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2954,6 +2954,9 @@ struct GTY(()) machine_function {
   /* True if this is a recursive function.  */
   BOOL_BITFIELD recursive_function : 1;
 
+  /* True if by_pieces op is currently in use.  */
+  BOOL_BITFIELD by_pieces_in_use : 1;
+
   /* The largest alignment, in bytes, of stack slot actually used.  */
   unsigned int max_used_stack_alignment;
 
diff --git a/gcc/testsuite/gcc.target/i386/memcpy-strategy-14.c 
b/gcc/testsuite/gcc.target/i386/memcpy-strategy-14.c
new file mode 100644
index 000000000000..44cd65230292
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memcpy-strategy-14.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -mno-avx -msse2 -mtune=generic -minline-all-stringops 
-mstringop-strategy=vector_loop" } */
+/* { dg-final { scan-assembler-times "movaps" 8 } } */
+
+char a[2048];
+char b[2048];
+void t (void)
+{
+  __builtin_memcpy (a, b, 2048);
+}
diff --git a/gcc/testsuite/gcc.target/i386/memcpy-strategy-15.c 
b/gcc/testsuite/gcc.target/i386/memcpy-strategy-15.c
new file mode 100644
index 000000000000..ea8e4be4ac4d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memcpy-strategy-15.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -mno-avx -msse2 -mtune=generic -minline-all-stringops 
-mstringop-strategy=vector_loop" } */
+/* { dg-final { scan-assembler-times "movups" 8 } } */
+
+char *a;
+char *b;
+void t (void)
+{
+  __builtin_memcpy (a, b, 2048);
+}
diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-10.c 
b/gcc/testsuite/gcc.target/i386/memset-strategy-10.c
new file mode 100644
index 000000000000..d6f2f4ed7ff2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memset-strategy-10.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -march=x86-64 -mstringop-strategy=vector_loop" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } 
{^\t?\.} } } */
+
+/*
+**foo:
+**.LFB[0-9]+:
+**     .cfi_startproc
+**     xorps   %xmm0, %xmm0
+**     xorl    %eax, %eax
+**     movq    %rax, 48\(%(e|r)di\)
+**     movups  %xmm0, \(%(e|r)di\)
+**     movups  %xmm0, 16\(%(e|r)di\)
+**     movups  %xmm0, 32\(%(e|r)di\)
+**     ret
+**...
+*/
+
+void
+foo (char *a)
+{
+  __builtin_memset (a, 0, 56);
+}
diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-11.c 
b/gcc/testsuite/gcc.target/i386/memset-strategy-11.c
new file mode 100644
index 000000000000..851c6faaa09d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memset-strategy-11.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -mno-avx -msse2 -mtune=generic -minline-all-stringops 
-mstringop-strategy=vector_loop" } */
+/* { dg-final { scan-assembler-times "movaps" 4 } } */
+
+char a[2048];
+void t (void)
+{
+  __builtin_memset (a, 0, 2048);
+}
diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-12.c 
b/gcc/testsuite/gcc.target/i386/memset-strategy-12.c
new file mode 100644
index 000000000000..06cac03426a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memset-strategy-12.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -mno-sse -mstringop-strategy=vector_loop" } */
+
+void
+foo (char *a)
+{
+  __builtin_memset (a, 0, 56);
+}
diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-13.c 
b/gcc/testsuite/gcc.target/i386/memset-strategy-13.c
new file mode 100644
index 000000000000..cc2129f60eb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memset-strategy-13.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -mno-sse -mstringop-strategy=unrolled_loop" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } 
{^\t?\.} } } */
+
+/*
+**foo:
+**.LFB[0-9]+:
+**     .cfi_startproc
+**     xorl    %eax, %eax
+**     movq    %rax, \(%(e|r)di\)
+**     movq    %rax, 8\(%(e|r)di\)
+**     movq    %rax, 16\(%(e|r)di\)
+**     movq    %rax, 24\(%(e|r)di\)
+**     movq    %rax, 32\(%(e|r)di\)
+**     movq    %rax, 40\(%(e|r)di\)
+**     movq    %rax, 48\(%(e|r)di\)
+**     ret
+**...
+*/
+
+void
+foo (char *a)
+{
+  __builtin_memset (a, 0, 56);
+}
diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-14.c 
b/gcc/testsuite/gcc.target/i386/memset-strategy-14.c
new file mode 100644
index 000000000000..144235ee0820
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memset-strategy-14.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -march=x86-64 -mstringop-strategy=vector_loop" } */
+
+void
+foo (char *a, int c)
+{
+  __builtin_memset (a, c, 56);
+}
diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-15.c 
b/gcc/testsuite/gcc.target/i386/memset-strategy-15.c
new file mode 100644
index 000000000000..66f9fa600499
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memset-strategy-15.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -mno-avx -msse2 -mtune=generic 
-mstringop-strategy=vector_loop" } */
+/* { dg-final { scan-assembler-times "movups" 4} } */
+
+char *a;
+void t (void)
+{
+  __builtin_memset (a, 0, 2048);
+}

Reply via email to