https://gcc.gnu.org/g:28310b3ff84616cea242cb4cef8f16b4538dbd22

commit r16-2633-g28310b3ff84616cea242cb4cef8f16b4538dbd22
Author: Pengfei Li <pengfei....@arm.com>
Date:   Wed Jul 30 10:54:14 2025 +0100

    vect: Add missing skip-vector check for peeling with versioning [PR121020]
    
    This fixes a miscompilation issue introduced by the enablement of
    combined loop peeling and versioning. A test case that reproduces the
    issue is included in the patch.
    
    When performing loop peeling, GCC usually inserts a skip-vector check.
    This ensures that after peeling, there are enough remaining iterations
    to enter the main vectorized loop. Previously, the check was omitted if
    loop versioning for alignment was applied. It was safe before because
    versioning and peeling for alignment were mutually exclusive.
    
    However, with combined peeling and versioning enabled, this is not safe
    any more. A loop may be peeled and versioned at the same time. Without
    the skip-vector check, the main vectorized loop can be entered even if
    its iteration count is zero. This can cause the loop running many more
    iterations than needed, resulting in incorrect results.
    
    To fix this, the patch updates the condition of omitting the skip-vector
    check to when versioning is performed alone without peeling.
    
    gcc/ChangeLog:
    
            PR tree-optimization/121020
            * tree-vect-loop-manip.cc (vect_do_peeling): Update the
            condition of omitting the skip-vector check.
            * tree-vectorizer.h (LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING):
            Add a helper macro.
    
    gcc/testsuite/ChangeLog:
    
            PR tree-optimization/121020
            * gcc.dg/vect/vect-early-break_138-pr121020.c: New test.

Diff:
---
 .../gcc.dg/vect/vect-early-break_138-pr121020.c    | 54 ++++++++++++++++++++++
 gcc/tree-vect-loop-manip.cc                        |  2 +-
 gcc/tree-vectorizer.h                              |  4 ++
 3 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c
new file mode 100644
index 000000000000..8cb62bf5bc93
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c
@@ -0,0 +1,54 @@
+/* PR tree-optimization/121020 */
+/* { dg-options "-O3 --vect-cost-model=unlimited" } */
+/* { dg-additional-options "-march=znver2" { target x86_64-*-* i?86-*-* } } */
+/* { dg-require-effective-target mmap } */
+/* { dg-require-effective-target vect_early_break } */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include "tree-vect.h"
+
+__attribute__((noipa))
+bool equal (uint64_t *restrict p, uint64_t *restrict q, int length)
+{
+  for (int i = 0; i < length; i++) {
+    if (*(p + i) != *(q + i))
+      return false;
+  }
+  return true;
+}
+
+int main ()
+{
+  check_vect ();
+
+  long pgsz = sysconf (_SC_PAGESIZE);
+  if (pgsz == -1) {
+    fprintf (stderr, "sysconf failed\n");
+    return 0;
+  }
+
+  /* Allocate a whole page of memory.  */
+  void *mem = mmap (NULL, pgsz, PROT_READ | PROT_WRITE,
+                   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  if (mem == MAP_FAILED) {
+    fprintf (stderr, "mmap failed\n");
+    return 0;
+  }
+  uint64_t *p1 = (uint64_t *) mem;
+  uint64_t *p2 = (uint64_t *) mem + 32;
+
+  /* The first 16 elements pointed to by p1 and p2 are the same.  */
+  for (int i = 0; i < 32; i++) {
+    *(p1 + i) = 0;
+    *(p2 + i) = (i < 16 ? 0 : -1);
+  }
+
+  /* All calls to equal should return true.  */
+  for (int len = 0; len < 16; len++) {
+    if (!equal (p1 + 1, p2 + 1, len))
+      __builtin_abort();
+  }
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 2d01a4b0ed1c..7fcbc1ad2eb8 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3295,7 +3295,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
   bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
                      ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
                                  bound_prolog + bound_epilog)
-                     : (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
+                     : (!LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING (loop_vinfo)
                         || vect_epilogues));
 
   /* Epilog loop must be executed if the number of iterations for epilog
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 203e5ad964a6..e19002794324 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1197,6 +1197,10 @@ public:
    || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L)          \
    || LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L))
 
+#define LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING(L)   \
+  ((L)->may_misalign_stmts.length () > 0               \
+   && !LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (L))
+
 #define LOOP_VINFO_NITERS_KNOWN_P(L)          \
   (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)

Reply via email to