New in this revision:
- Generalize alignment_support_scheme categorization logic from
  previous patch to apply more generally to any memory access satisfying
  following criteria:

1. Safe speculative reading is required and access bounds are unknown;
2. The memory access is not to an invariant address.
3. Alignment may potentially be achievable
    (i.e. non-`dr_unaligned_supported' accesses)

More details given in the patch description below.

Regression tested on aarch64, armhf and x86_64, no regressions.

------

For the vectorization of non-contiguous memory accesses such as the
vectorization of loads from a particular struct member, specifically
when vectorizing with unknown bounds (thus using a pointer and not an
array) it is observed that inadequate alignment checking allows for
the crossing of a page boundary within a single vectorized loop
iteration. This leads to potential segmentation faults in the
resulting binaries.

For example, for the given datatype:

    typedef struct {
      uint64_t a;
      uint64_t b;
      uint32_t flag;
      uint32_t pad;
    } Data;

and a loop such as:

int
foo (Data *ptr) {
  if (ptr == NULL)
    return -1;

  int cnt;
  for (cnt = 0; cnt < MAX; cnt++) {
    if (ptr->flag == 0)
      break;
    ptr++;
  }
  return cnt;
}

the vectorizer yields the following cfg on armhf:

<bb 1>:
_41 = ptr_4(D) + 16;
<bb 2>:
_44 = MEM[(unsigned int *)ivtmp_42];
ivtmp_45 = ivtmp_42 + 24;
_46 = MEM[(unsigned int *)ivtmp_45];
ivtmp_47 = ivtmp_45 + 24;
_48 = MEM[(unsigned int *)ivtmp_47];
ivtmp_49 = ivtmp_47 + 24;
_50 = MEM[(unsigned int *)ivtmp_49];
vect_cst__51 = {_44, _46, _48, _50};
mask_patt_6.17_52 = vect_cst__51 == { 0, 0, 0, 0 };
if (mask_patt_6.17_52 != { 0, 0, 0, 0 })
  goto <bb 4>;
else
  ivtmp_43 = ivtmp_42 + 96;
  goto <bb 2>;
<bb4>
...

without any proper address alignment checks on the starting address
or on whether alignment is preserved across iterations. We therefore
fix the handling of such cases.

To correct this, we modify the logic in `get_load_store_type',
particularly the logic that currently leads to all
VMAT_ELEMENTWISE-type accesses defaulting to `dr_unaligned_supported'.

In the logic responsible for correctly populating both `misalignment'
and `alignment_support_scheme' we introduce a dedicated branch for
handling cases where two conditions are met:

1. Safe speculative reading is required and access bounds are unknown;
2. The memory access is not to an invariant address.

for (1) if access bounds are known, we can avoid carrying out illegal
accesses by epilogue-peeling, as is the case for the
vect-early-break_20.c test, so alignment is less of an issue.  As
for (2), where `*memory_access_type == VMAT_INVARIANT' we needn't
concern ourselves with the possibility of illegal access being
introduced as a consequence of vectorization.

In this new branch, we first calculate the access misalignment and,
with this information, calculate the alignment support scheme.  If the
access is established to be of type `unaligned_unsupported', we leave
the support scheme unchanged as we know alignment is not possible.
Any other case, such as the case for `dr_unaligned_supported', where
ordinarily unaligned accesses would be okay, we apply more stringent
alignment requirements in the form of `dr_aligned'.

gcc/ChangeLog:

        PR tree-optimization/124037
        * tree-vect-stmts.cc (get_load_store_type): Fix
        alignment_support_scheme categorization for early
        break loops.

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/vect-pr124037.c: New.

g++/testsuite/ChangeLog:

        * g++.dg/vect/vect-pr124037.c: New.
---
 gcc/testsuite/g++.dg/vect/vect-pr124037.cc | 38 ++++++++++++++
 gcc/testsuite/gcc.dg/vect/vect-pr124037.c  | 58 ++++++++++++++++++++++
 gcc/tree-vect-stmts.cc                     | 31 ++++++++++--
 3 files changed, 123 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/vect/vect-pr124037.cc
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-pr124037.c

diff --git a/gcc/testsuite/g++.dg/vect/vect-pr124037.cc 
b/gcc/testsuite/g++.dg/vect/vect-pr124037.cc
new file mode 100644
index 00000000000..e25ba02ec9b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/vect-pr124037.cc
@@ -0,0 +1,38 @@
+/* PR tree-optimization/124037 */
+/* { dg-require-effective-target mmap } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-additional-options "-std=c++11" } */
+
+struct Token
+{
+  unsigned t, t1, t2;
+  void *a, *b;
+  unsigned short Kind;
+  unsigned short flags;
+  constexpr Token() : Token(0){}
+  constexpr Token(int a): t(0), t1(0), t2 (0), a(nullptr),b (nullptr),
+                         Kind(a), flags(0) {}
+  bool isNot(int K) const { return Kind != K; }
+};
+
+[[gnu::noipa]]
+unsigned getArgLength(const Token *ArgPtr) {
+  unsigned NumArgTokens = 0;
+  for (; ArgPtr->isNot(0); ++ArgPtr)
+    ++NumArgTokens;
+  return NumArgTokens;
+}
+
+Token tokens[] = {{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},
+                 {1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},
+                 {1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},
+                 {1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},
+                 {1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},
+                 {1},{1},{1},{1},{1},{1},{}};
+
+int main()
+{
+  __builtin_printf("%d\n", getArgLength(tokens));
+}
+/* { dg-final { scan-tree-dump "missed:   not vectorized: relevant stmt not 
supported: _\[0-9\]+ = ArgPtr_\[0-9\]+->Kind;" "vect" } } */
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr124037.c 
b/gcc/testsuite/gcc.dg/vect/vect-pr124037.c
new file mode 100644
index 00000000000..3954e23f456
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-pr124037.c
@@ -0,0 +1,58 @@
+/* PR tree-optimization/124037 */
+/* { dg-require-effective-target mmap } */
+/* { dg-require-effective-target vect_early_break } */
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#define MAX 65536
+
+typedef struct {
+  uint64_t a;
+  uint64_t b;
+  uint32_t flag;
+  uint32_t pad;
+} Data;
+
+int __attribute__ ((noinline))
+foo (Data *ptr) {
+  if (ptr == NULL)
+    return -1;
+
+  int cnt;
+  for (cnt = 0; cnt < MAX; cnt++) {
+    if (ptr->flag == 0)
+      break;
+    ptr++;
+  }
+  return cnt;
+}
+
+int main() {
+  long pgsz = sysconf (_SC_PAGESIZE);
+  if (pgsz == -1) {
+    fprintf (stderr, "sysconf failed\n");
+    return 0;
+  }
+
+  /* Allocate 2 consecutive pages.  */
+  void *mem = mmap (NULL, pgsz * 2, PROT_READ | PROT_WRITE,
+                   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  if (mem == MAP_FAILED) {
+    fprintf (stderr, "mmap failed\n");
+    return 0;
+  }
+
+  memset (mem, 1, pgsz);
+  uint8_t *ptr = (uint8_t*) mem + pgsz;
+  memset (ptr - 16, 0, 16);
+
+  mprotect (ptr, pgsz, PROT_NONE);
+  Data *data = (Data *) (ptr - 80);
+  __builtin_printf("%d\n", foo(data));
+}
+
+/* { dg-final { scan-tree-dump "missed:   not vectorized: relevant stmt not 
supported: _\[0-9\]+ = ptr_\[0-9\]+->flag;" "vect" } } */
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 22285250aa8..98077149136 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2499,10 +2499,33 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
stmt_info,
       || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
     *poffset = neg_ldst_offset;
 
-  if (*memory_access_type == VMAT_ELEMENTWISE
-      || *memory_access_type == VMAT_GATHER_SCATTER_LEGACY
-      || *memory_access_type == VMAT_STRIDED_SLP
-      || *memory_access_type == VMAT_INVARIANT)
+  /* Even though individual VMAT_ELEMENTWISE accesses do not cause alignment
+     problems, loading the whole vector's worth of values in a speculative
+     early-break context might cross a page boundary.  Set the alignment scheme
+     to `dr_aligned' here in order to force checking of whether such accesses
+     meet alignment criteria.  */
+  if (dr_safe_speculative_read_required (stmt_info)
+     && !DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (stmt_info))
+      && *memory_access_type != VMAT_INVARIANT)
+    {
+      if (mat_gather_scatter_p (*memory_access_type)
+         && !first_dr_info)
+       *misalignment = DR_MISALIGNMENT_UNKNOWN;
+      else
+       *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
+
+      *alignment_support_scheme
+       = vect_supportable_dr_alignment
+          (vinfo, first_dr_info, vectype, *misalignment,
+           mat_gather_scatter_p (*memory_access_type));
+
+      if (*alignment_support_scheme != dr_unaligned_unsupported)
+         *alignment_support_scheme = dr_aligned;
+    }
+  else if (*memory_access_type == VMAT_ELEMENTWISE
+          ||*memory_access_type == VMAT_GATHER_SCATTER_LEGACY
+          || *memory_access_type == VMAT_STRIDED_SLP
+          || *memory_access_type == VMAT_INVARIANT)
     {
       *alignment_support_scheme = dr_unaligned_supported;
       *misalignment = DR_MISALIGNMENT_UNKNOWN;
-- 
2.43.0

Reply via email to