commit: 4462db3ded9dc8bf07f321ac7d538e311e0b79ac Author: Sam James <sam <AT> gentoo <DOT> org> AuthorDate: Mon Jul 21 14:01:49 2025 +0000 Commit: Sam James <sam <AT> gentoo <DOT> org> CommitDate: Mon Jul 21 14:01:49 2025 +0000 URL: https://gitweb.gentoo.org/proj/gcc-patches.git/commit/?id=4462db3d
16.0.0: add vect fixes Bug: https://bugs.gentoo.org/959698 Bug: https://gcc.gnu.org/PR121190 Bug: https://gcc.gnu.org/PR121020 Signed-off-by: Sam James <sam <AT> gentoo.org> ...sufficient-alignment-requirement-for-spec.patch | 181 +++++++++++++++++++++ ...ssing-skip-vector-check-for-peeling-with-.patch | 142 ++++++++++++++++ 16.0.0/gentoo/README.history | 5 + 3 files changed, 328 insertions(+) diff --git a/16.0.0/gentoo/86_all_PR121190-vect-Fix-insufficient-alignment-requirement-for-spec.patch b/16.0.0/gentoo/86_all_PR121190-vect-Fix-insufficient-alignment-requirement-for-spec.patch new file mode 100644 index 0000000..9a03cdd --- /dev/null +++ b/16.0.0/gentoo/86_all_PR121190-vect-Fix-insufficient-alignment-requirement-for-spec.patch @@ -0,0 +1,181 @@ +https://bugs.gentoo.org/959698 +https://inbox.sourceware.org/gcc-patches/[email protected]/T/#u + +From e120f6cb794a4d104b37913c918aabe0ae6b2c64 Mon Sep 17 00:00:00 2001 +Message-ID: <e120f6cb794a4d104b37913c918aabe0ae6b2c64.1753106388.git....@gentoo.org> +From: Pengfei Li <[email protected]> +Date: Mon, 21 Jul 2025 11:02:32 +0000 +Subject: [PATCH 1/2] vect: Fix insufficient alignment requirement for + speculative loads [PR121190] + +This patch fixes a segmentation fault issue that can occur in vectorized +loops with an early break. When GCC vectorizes such loops, it may insert +a versioning check to ensure that data references (DRs) with speculative +loads are aligned. The check normally requires DRs to be aligned to the +vector mode size, which prevents generated vector load instructions from +crossing page boundaries. + +However, this is not sufficient when a single scalar load is vectorized +into multiple loads within the same iteration. In such cases, even if +none of the vector loads crosses page boundaries, subsequent loads after +the first one may still access memory beyond current valid page. + +Consider the following loop as an example: + + while (i < MAX_COMPARE) { + if (*(p + i) != *(q + i)) + return i; + i++; + } + +When compiled with "-O3 -march=znver2" on x86, the vectorized loop may +include instructions like: + + vmovdqa (%rcx,%rax), %ymm0 + vmovdqa 32(%rcx,%rax), %ymm1 + vpcmpeqq (%rdx,%rax), %ymm0, %ymm0 + vpcmpeqq 32(%rdx,%rax), %ymm1, %ymm1 + +Note two speculative vector loads are generated for each DR (p and q). +The first vmovdqa and vpcmpeqq are safe due to the vector size (32-byte) +alignment, but the following ones (at offset 32) may not be safe because +they could read from the beginning of the next memory page, potentially +leading to segmentation faults. + +To avoid the issue, this patch increases the alignment requirement for +speculative loads to DR_TARGET_ALIGNMENT. It ensures all vector loads in +the same vector iteration access memory within the same page. + +This patch is bootstrapped and regression-tested on x86_64-linux-gnu, +arm-linux-gnueabihf and aarch64-linux-gnu. + + PR tree-optimization/121190 + +gcc/ChangeLog: + + * tree-vect-data-refs.cc (vect_enhance_data_refs_alignment): + Increase alignment requirement for speculative loads. + +gcc/testsuite/ChangeLog: + +* gcc.dg/vect/vect-early-break_52.c: Update an unsafe test. + * gcc.dg/vect/vect-early-break_137.c-pr121190: New test. +--- + .../vect/vect-early-break_137-pr121190.c | 60 +++++++++++++++++++ + .../gcc.dg/vect/vect-early-break_52.c | 2 +- + gcc/tree-vect-data-refs.cc | 15 ++++- + 3 files changed, 75 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c +new file mode 100644 +index 000000000000..da11146c578e +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c +@@ -0,0 +1,60 @@ ++/* PR tree-optimization/121190 */ ++/* { dg-do run } */ ++/* { dg-options "-O3" } */ ++/* { dg-additional-options "-march=znver2" { target x86_64-*-* i?86-*-* } } */ ++/* { dg-require-effective-target mmap } */ ++/* { dg-require-effective-target vect_early_break } */ ++ ++#include <stdint.h> ++#include <string.h> ++#include <stdio.h> ++#include <sys/mman.h> ++#include <unistd.h> ++ ++#define MAX_COMPARE 5000 ++ ++__attribute__((noipa)) ++int diff (uint64_t *restrict p, uint64_t *restrict q) ++{ ++ int i = 0; ++ while (i < MAX_COMPARE) { ++ if (*(p + i) != *(q + i)) ++ return i; ++ i++; ++ } ++ return -1; ++} ++ ++int main () ++{ ++ long pgsz = sysconf (_SC_PAGESIZE); ++ if (pgsz == -1) { ++ fprintf (stderr, "sysconf failed\n"); ++ return 0; ++ } ++ ++ /* Allocate 2 consecutive pages of memory and let p1 and p2 point to the ++ beginning of each. */ ++ void *mem = mmap (NULL, pgsz * 2, PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); ++ if (mem == MAP_FAILED) { ++ fprintf (stderr, "mmap failed\n"); ++ return 0; ++ } ++ uint64_t *p1 = (uint64_t *) mem; ++ uint64_t *p2 = (uint64_t *) mem + pgsz / sizeof (uint64_t); ++ ++ /* Fill the first page with zeros, except for its last 64 bits. */ ++ memset (p1, 0, pgsz); ++ *(p2 - 1) = -1; ++ ++ /* Make the 2nd page not accessable. */ ++ mprotect (p2, pgsz, PROT_NONE); ++ ++ /* Calls to diff should not read the 2nd page. */ ++ for (int i = 1; i <= 20; i++) { ++ if (diff (p2 - i, p1) != i - 1) ++ __builtin_abort (); ++ } ++} ++ +diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c +index 86a632f2a822..6abfcd6580e4 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c +@@ -18,4 +18,4 @@ int main1 (short X) + } + } + +-/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target { ! "x86_64-*-* i?86-*-*" } } } } */ ++/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target { ! "x86_64-*-* i?86-*-* arm*-*-*" } } } } */ +diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc +index a24ddfbc3841..24048086857f 100644 +--- a/gcc/tree-vect-data-refs.cc ++++ b/gcc/tree-vect-data-refs.cc +@@ -2964,12 +2964,25 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) + break; + } + ++ /* Normally, we require DRs to be aligned to the vector mode size. ++ However, this is not sufficient when a statement involves safe ++ speculative read. In such cases, a single scalar load can be ++ vectorized into multiple vector loads in one loop iteration. ++ Even if the first vector load is safe, subsequent loads might ++ still access an invalid memory page. We increase the alignment ++ requirement to prevent this. */ ++ poly_uint64 required_align_size; ++ if (dr_safe_speculative_read_required (stmt_info)) ++ required_align_size = DR_TARGET_ALIGNMENT (dr_info); ++ else ++ required_align_size = GET_MODE_SIZE (TYPE_MODE (vectype)); ++ + /* At present we don't support versioning for alignment + with variable VF, since there's no guarantee that the + VF is a power of two. We could relax this if we added + a way of enforcing a power-of-two size. */ + unsigned HOST_WIDE_INT size; +- if (!GET_MODE_SIZE (TYPE_MODE (vectype)).is_constant (&size)) ++ if (!required_align_size.is_constant (&size)) + { + do_versioning = false; + break; + +base-commit: b441d735c092f5d60c4a9c7167ed9153003d49fa +-- +2.50.1 + diff --git a/16.0.0/gentoo/87_all_PR121020-vect-Add-missing-skip-vector-check-for-peeling-with-.patch b/16.0.0/gentoo/87_all_PR121020-vect-Add-missing-skip-vector-check-for-peeling-with-.patch new file mode 100644 index 0000000..5aae3a5 --- /dev/null +++ b/16.0.0/gentoo/87_all_PR121020-vect-Add-missing-skip-vector-check-for-peeling-with-.patch @@ -0,0 +1,142 @@ +https://bugs.gentoo.org/959698 +https://inbox.sourceware.org/gcc-patches/[email protected]/T/#u + +From f66323025c47ba09cee296a8a638cfe63d1bdad3 Mon Sep 17 00:00:00 2001 +Message-ID: <f66323025c47ba09cee296a8a638cfe63d1bdad3.1753106388.git....@gentoo.org> +In-Reply-To: <e120f6cb794a4d104b37913c918aabe0ae6b2c64.1753106388.git....@gentoo.org> +References: <e120f6cb794a4d104b37913c918aabe0ae6b2c64.1753106388.git....@gentoo.org> +From: Pengfei Li <[email protected]> +Date: Mon, 21 Jul 2025 11:06:42 +0000 +Subject: [PATCH 2/2] vect: Add missing skip-vector check for peeling with + versioning [PR121020] + +This fixes a miscompilation issue introduced by the enablement of +combined loop peeling and versioning. A test case that reproduces the +issue is included in the patch. + +When performing loop peeling, GCC usually inserts a skip-vector check. +This ensures that after peeling, there are enough remaining iterations +to enter the main vectorized loop. Previously, the check was omitted if +loop versioning for alignment was applied. It was safe before because +versioning and peeling for alignment were mutually exclusive. + +However, with combined peeling and versioning enabled, this is not safe +any more. A loop may be peeled and versioned at the same time. Without +the skip-vector check, the main vectorized loop can be entered even if +its iteration count is zero. This can cause the loop running many more +iterations than needed, resulting in incorrect results. + +To fix this, the patch updates the condition of omitting the skip-vector +check to when versioning is performed alone without peeling. + +This patch is bootstrapped and regression-tested on x86_64-linux-gnu, +arm-linux-gnueabihf and aarch64-linux-gnu. + + PR tree-optimization/121020 + +gcc/ChangeLog: + + * tree-vect-loop-manip.cc (vect_do_peeling): Update the + condition of omitting the skip-vector check. + * tree-vectorizer.h (LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING): + Add a helper macro. + +gcc/testsuite/ChangeLog: + +* gcc.dg/vect/vect-early-break_138-pr121020.c: New test. +--- + .../vect/vect-early-break_138-pr121020.c | 52 +++++++++++++++++++ + gcc/tree-vect-loop-manip.cc | 2 +- + gcc/tree-vectorizer.h | 4 ++ + 3 files changed, 57 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c +new file mode 100644 +index 000000000000..86661e445a83 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c +@@ -0,0 +1,52 @@ ++/* PR tree-optimization/121020 */ ++/* { dg-do run } */ ++/* { dg-options "-O3 --vect-cost-model=unlimited" } */ ++/* { dg-additional-options "-march=znver2" { target x86_64-*-* i?86-*-* } } */ ++/* { dg-require-effective-target mmap } */ ++/* { dg-require-effective-target vect_early_break } */ ++ ++#include <stdint.h> ++#include <stdio.h> ++#include <sys/mman.h> ++#include <unistd.h> ++ ++__attribute__((noipa)) ++bool equal (uint64_t *restrict p, uint64_t *restrict q, int length) ++{ ++ for (int i = 0; i < length; i++) { ++ if (*(p + i) != *(q + i)) ++ return false; ++ } ++ return true; ++} ++ ++int main () ++{ ++ long pgsz = sysconf (_SC_PAGESIZE); ++ if (pgsz == -1) { ++ fprintf (stderr, "sysconf failed\n"); ++ return 0; ++ } ++ ++ /* Allocate a whole page of memory. */ ++ void *mem = mmap (NULL, pgsz, PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); ++ if (mem == MAP_FAILED) { ++ fprintf (stderr, "mmap failed\n"); ++ return 0; ++ } ++ uint64_t *p1 = (uint64_t *) mem; ++ uint64_t *p2 = (uint64_t *) mem + 32; ++ ++ /* The first 16 elements pointed to by p1 and p2 are the same. */ ++ for (int i = 0; i < 32; i++) { ++ *(p1 + i) = 0; ++ *(p2 + i) = (i < 16 ? 0 : -1); ++ } ++ ++ /* All calls to equal should return true. */ ++ for (int len = 0; len < 16; len++) { ++ if (!equal (p1 + 1, p2 + 1, len)) ++ __builtin_abort(); ++ } ++} +diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc +index 2d01a4b0ed1c..7fcbc1ad2eb8 100644 +--- a/gcc/tree-vect-loop-manip.cc ++++ b/gcc/tree-vect-loop-manip.cc +@@ -3295,7 +3295,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, + bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo), + bound_prolog + bound_epilog) +- : (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo) ++ : (!LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING (loop_vinfo) + || vect_epilogues)); + + /* Epilog loop must be executed if the number of iterations for epilog +diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h +index 80f8853733de..69428f1747cb 100644 +--- a/gcc/tree-vectorizer.h ++++ b/gcc/tree-vectorizer.h +@@ -1168,6 +1168,10 @@ public: + || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L) \ + || LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L)) + ++#define LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING(L) \ ++ ((L)->may_misalign_stmts.length () > 0 \ ++ && !LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (L)) ++ + #define LOOP_VINFO_NITERS_KNOWN_P(L) \ + (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0) + +-- +2.50.1 + diff --git a/16.0.0/gentoo/README.history b/16.0.0/gentoo/README.history index d0f985b..7020432 100644 --- a/16.0.0/gentoo/README.history +++ b/16.0.0/gentoo/README.history @@ -1,3 +1,8 @@ +8 ???? + + + 86_all_PR121190-vect-Fix-insufficient-alignment-requirement-for-spec.patch + + 87_all_PR121020-vect-Add-missing-skip-vector-check-for-peeling-with-.patch + 7 21 July 2025 - 86_all_PR120881-x86-64-Add-enable-x86-64-mfentry.patch
