GCC presently enables the loop vectorizer at lower optimization levels for OpenMP loops with the "simd" specifier than it does for loops without it. The "simd" specifier isn't defined to be purely an optimization hint to the compiler; it also has semantic effects like changing the privatization of the loop variable. It seems reasonable to decouple the additional vectorization from those semantic effects and apply it also to work-sharing loops without the "simd" specifier at the same optimization levels.

I've tested this patch on x86_64-linux-gnu-amdgcn, plain x86_64-linux-gnu, and aarch64-linux-gnu. OK for mainline?

-Sandra
From 15c6f6b6bc396f53474ea380f506a7f74d7a05af Mon Sep 17 00:00:00 2001
From: Sandra Loosemore <san...@codesourcery.com>
Date: Tue, 13 Sep 2022 23:50:27 +0000
Subject: [PATCH] OpenMP: Enable vectorization in all OpenMP loops

This patch marks all OpenMP worksharing loops (not just those with the
simd descriptor) as candidates for vectorization when -ftree-loop-optimize
is active and loop vectorization is not explicitly disabled with
-fno-tree-loop-vectorize.

gcc/ChangeLog:

	* omp-expand.cc (maybe_auto_vectorize_loop): New.
	(expand_omp_for_generic): Call it.
	(expand_omp_for_static_chunk): Likewise.
	(expand_omp_taskloop_for_inner): Likewise.
	(expand_oacc_for): Likewise.

gcc/testsuite/ChangeLog:
	* c-c++-common/gomp/vectorize-1.c: New.
	* c-c++-common/gomp/vectorize-2.c: New.
	* c-c++-common/gomp/vectorize-3.c: New.
	* c-c++-common/gomp/vectorize-s.c: New.
	* gcc.dg/gomp/pr46032-2.c: Compile with -fno-tree-loop-vectorize.
	* gcc.dg/gomp/pr46032-3.c: Likewise.
---
 gcc/omp-expand.cc                             | 23 ++++++++++++++
 gcc/testsuite/c-c++-common/gomp/vectorize-1.c | 31 +++++++++++++++++++
 gcc/testsuite/c-c++-common/gomp/vectorize-2.c | 31 +++++++++++++++++++
 gcc/testsuite/c-c++-common/gomp/vectorize-3.c | 31 +++++++++++++++++++
 gcc/testsuite/c-c++-common/gomp/vectorize-s.c | 31 +++++++++++++++++++
 gcc/testsuite/gcc.dg/gomp/pr46032-2.c         |  2 +-
 gcc/testsuite/gcc.dg/gomp/pr46032-3.c         |  2 +-
 7 files changed, 149 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/gomp/vectorize-1.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/vectorize-2.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/vectorize-3.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/vectorize-s.c

diff --git a/gcc/omp-expand.cc b/gcc/omp-expand.cc
index fcaf4f6d4e9..bc753814102 100644
--- a/gcc/omp-expand.cc
+++ b/gcc/omp-expand.cc
@@ -3711,6 +3711,22 @@ expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
   return cont_bb;
 }
 
+/* Helper function for various subroutines of expand_omp_for.
+   If not -fno-tree-loop-vectorize and -ftree-loop-optimize,
+   hint that we want to vectorize the loop LOOP.  */
+static void
+maybe_auto_vectorize_loop (class loop *loop)
+{
+  if ((flag_tree_loop_vectorize
+       || !OPTION_SET_P (flag_tree_loop_vectorize))
+      && flag_tree_loop_optimize)
+    {
+      loop->force_vectorize = true;
+      cfun->has_force_vectorize_loops = true;
+    }
+}
+
+
 /* A subroutine of expand_omp_for.  Generate code for a parallel
    loop with any schedule.  Given parameters:
 
@@ -4650,6 +4666,7 @@ expand_omp_for_generic (struct omp_region *region,
       new_loop->header = l0_bb;
       new_loop->latch = l2_bb;
       add_loop (new_loop, outer_loop);
+      maybe_auto_vectorize_loop (new_loop);
 
       /* Allocate a loop structure for the original loop unless we already
 	 had one.  */
@@ -4660,6 +4677,7 @@ expand_omp_for_generic (struct omp_region *region,
 	  orig_loop->header = l1_bb;
 	  /* The loop may have multiple latches.  */
 	  add_loop (orig_loop, new_loop);
+	  maybe_auto_vectorize_loop (orig_loop);
 	}
     }
 }
@@ -5551,6 +5569,7 @@ expand_omp_for_static_nochunk (struct omp_region *region,
       if (collapse_bb == NULL)
 	loop->latch = cont_bb;
       add_loop (loop, body_bb->loop_father);
+      maybe_auto_vectorize_loop (loop);
     }
 }
 
@@ -6268,6 +6287,7 @@ expand_omp_for_static_chunk (struct omp_region *region,
       trip_loop->header = iter_part_bb;
       trip_loop->latch = trip_update_bb;
       add_loop (trip_loop, iter_part_bb->loop_father);
+      maybe_auto_vectorize_loop (trip_loop);
 
       if (loop != entry_bb->loop_father)
 	{
@@ -6285,6 +6305,7 @@ expand_omp_for_static_chunk (struct omp_region *region,
 	  if (collapse_bb == NULL)
 	    loop->latch = cont_bb;
 	  add_loop (loop, trip_loop);
+	  maybe_auto_vectorize_loop (loop);
 	}
     }
 }
@@ -7439,6 +7460,7 @@ expand_omp_taskloop_for_inner (struct omp_region *region,
       if (collapse_bb == NULL)
 	loop->latch = cont_bb;
       add_loop (loop, body_bb->loop_father);
+      maybe_auto_vectorize_loop (loop);
     }
 }
 
@@ -8006,6 +8028,7 @@ expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
 	      inner_loop->header = elem_body_bb;
 	      inner_loop->latch = elem_cont_bb;
 	      add_loop (inner_loop, body_loop);
+	      maybe_auto_vectorize_loop (inner_loop);
 	    }
 	}
     }
diff --git a/gcc/testsuite/c-c++-common/gomp/vectorize-1.c b/gcc/testsuite/c-c++-common/gomp/vectorize-1.c
new file mode 100644
index 00000000000..c52b6fd3039
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/vectorize-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O1 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */
+
+/* Test that OMP loops are vectorized at all optimization levels
+   whether or not the "simd" specifier is present.  */
+
+extern void init (int n, int *p);
+extern int g (int n, int *p);
+
+int f1 (void)
+{
+  int a1[32], a2[32], a3[32];
+  init (32, a1);
+  init (32, a2);
+  #pragma omp for
+  for (int i = 0; i < 32; i++)
+    a3[i] = a1[i] + a2[i];
+  return g (4, a3);
+}
+
+int f2 (void)
+{
+  int a1[32], a2[32], a3[32];
+  init (32, a1);
+  init (32, a2);
+  #pragma omp for simd
+  for (int i = 0; i < 32; i++)
+    a3[i] = a1[i] + a2[i];
+  return g (4, a3);
+}
diff --git a/gcc/testsuite/c-c++-common/gomp/vectorize-2.c b/gcc/testsuite/c-c++-common/gomp/vectorize-2.c
new file mode 100644
index 00000000000..78a10e4ff56
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/vectorize-2.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O2 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */
+
+/* Test that OMP loops are vectorized at all optimization levels
+   whether or not the "simd" specifier is present.  */
+
+extern void init (int n, int *p);
+extern int g (int n, int *p);
+
+int f1 (void)
+{
+  int a1[32], a2[32], a3[32];
+  init (32, a1);
+  init (32, a2);
+  #pragma omp for
+  for (int i = 0; i < 32; i++)
+    a3[i] = a1[i] + a2[i];
+  return g (4, a3);
+}
+
+int f2 (void)
+{
+  int a1[32], a2[32], a3[32];
+  init (32, a1);
+  init (32, a2);
+  #pragma omp for simd
+  for (int i = 0; i < 32; i++)
+    a3[i] = a1[i] + a2[i];
+  return g (4, a3);
+}
diff --git a/gcc/testsuite/c-c++-common/gomp/vectorize-3.c b/gcc/testsuite/c-c++-common/gomp/vectorize-3.c
new file mode 100644
index 00000000000..987939fb2d9
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/vectorize-3.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O3 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */
+
+/* Test that OMP loops are vectorized at all optimization levels
+   whether or not the "simd" specifier is present.  */
+
+extern void init (int n, int *p);
+extern int g (int n, int *p);
+
+int f1 (void)
+{
+  int a1[32], a2[32], a3[32];
+  init (32, a1);
+  init (32, a2);
+  #pragma omp for
+  for (int i = 0; i < 32; i++)
+    a3[i] = a1[i] + a2[i];
+  return g (4, a3);
+}
+
+int f2 (void)
+{
+  int a1[32], a2[32], a3[32];
+  init (32, a1);
+  init (32, a2);
+  #pragma omp for simd
+  for (int i = 0; i < 32; i++)
+    a3[i] = a1[i] + a2[i];
+  return g (4, a3);
+}
diff --git a/gcc/testsuite/c-c++-common/gomp/vectorize-s.c b/gcc/testsuite/c-c++-common/gomp/vectorize-s.c
new file mode 100644
index 00000000000..f382c9aeba0
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/vectorize-s.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -Os -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */
+
+/* Test that OMP loops are vectorized at all optimization levels
+   whether or not the "simd" specifier is present.  */
+
+extern void init (int n, int *p);
+extern int g (int n, int *p);
+
+int f1 (void)
+{
+  int a1[32], a2[32], a3[32];
+  init (32, a1);
+  init (32, a2);
+  #pragma omp for
+  for (int i = 0; i < 32; i++)
+    a3[i] = a1[i] + a2[i];
+  return g (4, a3);
+}
+
+int f2 (void)
+{
+  int a1[32], a2[32], a3[32];
+  init (32, a1);
+  init (32, a2);
+  #pragma omp for simd
+  for (int i = 0; i < 32; i++)
+    a3[i] = a1[i] + a2[i];
+  return g (4, a3);
+}
diff --git a/gcc/testsuite/gcc.dg/gomp/pr46032-2.c b/gcc/testsuite/gcc.dg/gomp/pr46032-2.c
index 2e562618489..ce925d1bd89 100644
--- a/gcc/testsuite/gcc.dg/gomp/pr46032-2.c
+++ b/gcc/testsuite/gcc.dg/gomp/pr46032-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fno-tree-vectorize -fopenmp -std=c99 -fipa-pta -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fno-tree-vectorize -fno-tree-loop-vectorize -fopenmp -std=c99 -fipa-pta -fdump-tree-optimized" } */
 
 #define N 2
 
diff --git a/gcc/testsuite/gcc.dg/gomp/pr46032-3.c b/gcc/testsuite/gcc.dg/gomp/pr46032-3.c
index da1ab487385..866b7c9ada5 100644
--- a/gcc/testsuite/gcc.dg/gomp/pr46032-3.c
+++ b/gcc/testsuite/gcc.dg/gomp/pr46032-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fno-tree-vectorize -fopenmp -std=c99 -fipa-pta -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fno-tree-vectorize -fno-tree-loop-vectorize -fopenmp -std=c99 -fipa-pta -fdump-tree-optimized" } */
 
 #define N 2
 
-- 
2.31.1

Reply via email to