Hi!

The following testcase which I wrote in order to test ncopies > 1
handling of the inclusive scan vectorization reveals we don't vectorize
that, because we required MULT_EXPR on the DR_OFFSET, but obviously for
1 byte elements there is none.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux,
committed to trunk.

2019-06-21  Jakub Jelinek  <ja...@redhat.com>

        * tree-vect-data-refs.c (vect_find_stmt_data_reference): Handle
        "omp simd array" arrays with one byte elements.

        * gcc.dg/vect/vect-simd-11.c: New test.
        * gcc.target/i386/sse2-vect-simd-11.c: New test.
        * gcc.target/i386/avx2-vect-simd-11.c: New test.
        * gcc.target/i386/avx512bw-vect-simd-11.c: New test.

--- gcc/tree-vect-data-refs.c.jj        2019-06-20 13:26:29.071150988 +0200
+++ gcc/tree-vect-data-refs.c   2019-06-20 13:55:35.421150589 +0200
@@ -4075,14 +4075,17 @@ vect_find_stmt_data_reference (loop_p lo
          && integer_zerop (DR_STEP (newdr)))
        {
          tree off = DR_OFFSET (newdr);
+         tree step = ssize_int (1);
          STRIP_NOPS (off);
-         if (TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
-             && TREE_CODE (off) == MULT_EXPR
+         if (TREE_CODE (off) == MULT_EXPR
              && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
            {
-             tree step = TREE_OPERAND (off, 1);
+             step = TREE_OPERAND (off, 1);
              off = TREE_OPERAND (off, 0);
              STRIP_NOPS (off);
+           }
+         if (TREE_CODE (DR_INIT (newdr)) == INTEGER_CST)
+           {
              if (CONVERT_EXPR_P (off)
                  && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, 0)))
                      < TYPE_PRECISION (TREE_TYPE (off))))
--- gcc/testsuite/gcc.dg/vect/vect-simd-11.c.jj 2019-06-20 13:49:16.322081280 
+0200
+++ gcc/testsuite/gcc.dg/vect/vect-simd-11.c    2019-06-20 12:58:52.516069619 
+0200
@@ -0,0 +1,186 @@
+/* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { 
target i?86-*-* x86_64-*-* } } } */
+
+#ifndef main
+#include "tree-vect.h"
+#endif
+
+int r, a[1024], b[1024];
+unsigned short r2, b2[1024];
+unsigned char r3, b3[1024];
+
+__attribute__((noipa)) void
+foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
+{
+  #pragma omp simd reduction (inscan, +:r, r2, r3)
+  for (int i = 0; i < 1024; i++)
+    {
+      { r += a[i]; r2 += a[i]; r3 += a[i]; }
+      #pragma omp scan inclusive(r, r2, r3)
+      {
+       b[i] = r;
+       b2[i] = r2;
+       b3[i] = r3;
+      }
+    }
+}
+
+__attribute__((noipa)) int
+bar (unsigned short *s2p, unsigned char *s3p)
+{
+  int s = 0;
+  unsigned short s2 = 0;
+  unsigned char s3 = 0;
+  #pragma omp simd reduction (inscan, +:s, s2, s3)
+  for (int i = 0; i < 1024; i++)
+    {
+      {
+       s += 2 * a[i];
+       s2 += 2 * a[i];
+       s3 += 2 * a[i];
+      }
+      #pragma omp scan inclusive(s, s2, s3)
+      { b[i] = s; b2[i] = s2; b3[i] = s3; }
+    }
+  *s2p = s2;
+  *s3p = s3;
+  return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
+{
+  #pragma omp simd reduction (inscan, +:r, r2, r3) if (simd: 0)
+  for (int i = 0; i < 1024; i++)
+    {
+      {
+       r += a[i];
+       r2 += a[i];
+       r3 += a[i];
+      }
+      #pragma omp scan inclusive(r, r2, r3)
+      {
+       b[i] = r;
+       b2[i] = r2;
+       b3[i] = r3;
+      }
+    }
+}
+
+__attribute__((noipa)) int
+qux (unsigned short *s2p, unsigned char *s3p)
+{
+  int s = 0;
+  unsigned short s2 = 0;
+  unsigned char s3 = 0;
+  #pragma omp simd reduction (inscan, +:s, s2, s3) simdlen (1)
+  for (int i = 0; i < 1024; i++)
+    {
+      { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
+      #pragma omp scan inclusive(s, s2, s3)
+      { b[i] = s; b2[i] = s2; b3[i] = s3; }
+    }
+  *s2p = s2;
+  *s3p = s3;
+  return s;
+}
+
+int
+main ()
+{
+  int s = 0;
+  unsigned short s2;
+  unsigned char s3;
+#ifndef main
+  check_vect ();
+#endif
+  for (int i = 0; i < 1024; ++i)
+    {
+      a[i] = i;
+      b[i] = -1;
+      b2[i] = -1;
+      b3[i] = -1;
+      asm ("" : "+g" (i));
+    }
+  foo (a, b, b2, b3);
+  if (r != 1024 * 1023 / 2
+      || r2 != (unsigned short) r
+      || r3 != (unsigned char) r)
+    abort ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += i;
+      if (b[i] != s
+         || b2[i] != (unsigned short) s
+         || b3[i] != (unsigned char) s)
+       abort ();
+      else
+       {
+         b[i] = 25;
+         b2[i] = 24;
+         b3[i] = 26;
+       }
+    }
+  if (bar (&s2, &s3) != 1024 * 1023)
+    abort ();
+  if (s2 != (unsigned short) (1024 * 1023)
+      || s3 != (unsigned char) (1024 * 1023))
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += 2 * i;
+      if (b[i] != s
+         || b2[i] != (unsigned short) s
+         || b3[i] != (unsigned char) s)
+       abort ();
+      else
+       {
+         b[i] = -1;
+         b2[i] = -1;
+         b3[i] = -1;
+       }
+    }
+  r = 0;
+  r2 = 0;
+  r3 = 0;
+  baz (a, b, b2, b3);
+  if (r != 1024 * 1023 / 2
+      || r2 != (unsigned short) r
+      || r3 != (unsigned char) r)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += i;
+      if (b[i] != s
+         || b2[i] != (unsigned short) s
+         || b3[i] != (unsigned char) s)
+       abort ();
+      else
+       {
+         b[i] = 25;
+         b2[i] = 24;
+         b3[i] = 26;
+       }
+    }
+  s2 = 0;
+  s3 = 0;
+  if (qux (&s2, &s3) != 1024 * 1023)
+    abort ();
+  if (s2 != (unsigned short) (1024 * 1023)
+      || s3 != (unsigned char) (1024 * 1023))
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += 2 * i;
+      if (b[i] != s
+         || b2[i] != (unsigned short) s
+         || b3[i] != (unsigned char) s)
+       abort ();
+    }
+  return 0;
+}
--- gcc/testsuite/gcc.target/i386/sse2-vect-simd-11.c.jj        2019-06-20 
13:50:04.160330704 +0200
+++ gcc/testsuite/gcc.target/i386/sse2-vect-simd-11.c   2019-06-20 
13:50:12.607198170 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" 
} */
+/* { dg-require-effective-target sse2 } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "sse2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-11.c"
+
+static void
+sse2_test (void)
+{
+  do_main ();
+}
--- gcc/testsuite/gcc.target/i386/avx2-vect-simd-11.c.jj        2019-06-20 
13:50:27.008972212 +0200
+++ gcc/testsuite/gcc.target/i386/avx2-vect-simd-11.c   2019-06-20 
13:50:37.264811298 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx2 } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "avx2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-11.c"
+
+static void
+avx2_test (void)
+{
+  do_main ();
+}
--- gcc/testsuite/gcc.target/i386/avx512bw-vect-simd-11.c.jj    2019-06-20 
13:52:33.859983753 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-vect-simd-11.c       2019-06-20 
13:52:49.293742912 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512bw -mprefer-vector-width=512 
-fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "avx512bw-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-11.c"
+
+static void
+avx512bw_test (void)
+{
+  do_main ();
+}

        Jakub

Reply via email to