huntergr updated this revision to Diff 93841.
huntergr added a reviewer: kkwli0.
huntergr added a comment.

Changed to transform combined constructs to simd in ParseOpenMP.cpp instead of 
creating a new pragma handler. This also made it easier to add support for 
'declare simd': only needed the addition of a check for the option when code 
generating functions to enable it, so I've added a RUN line to test it in the 
'declare simd' codegen tests.


https://reviews.llvm.org/D31417

Files:
  docs/ClangCommandLineReference.rst
  docs/UsersManual.rst
  include/clang/Basic/LangOptions.def
  include/clang/Driver/Options.td
  lib/CodeGen/CodeGenFunction.cpp
  lib/CodeGen/CodeGenModule.cpp
  lib/Driver/ToolChains/Clang.cpp
  lib/Frontend/CompilerInvocation.cpp
  lib/Parse/ParseOpenMP.cpp
  lib/Parse/ParsePragma.cpp
  lib/Sema/SemaExpr.cpp
  lib/Sema/SemaOpenMP.cpp
  test/OpenMP/declare_simd_codegen.cpp
  test/OpenMP/linking.c
  test/OpenMP/simd_only.c

Index: test/OpenMP/simd_only.c
===================================================================
--- /dev/null
+++ test/OpenMP/simd_only.c
@@ -0,0 +1,157 @@
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c -triple aarch64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+// CHECK-LABEL: @simd_plain
+// CHECK-LABEL: omp.inner.for.body:
+// CHECK: load float, float* %arrayidx{{.*}} !llvm.mem.parallel_loop_access
+// CHECK: load float, float* %arrayidx{{.*}} !llvm.mem.parallel_loop_access
+// CHECK: store float %{{.*}}, float* %arrayidx{{.*}} !llvm.mem.parallel_loop_access
+// CHECK: ret void
+void simd_plain(float *a, float *b, float *c, int N) {
+  #pragma omp simd
+  for (int i = 0; i < N; i += 2)
+    a[i] = b[i] * c[i];
+}
+
+// CHECK-LABEL: @simd_safelen_clause
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-LABEL: omp.inner.for.inc:
+// CHECK: br label %omp.inner.for.cond, !llvm.loop
+// CHECK: ret void
+void simd_safelen_clause(float *a, float *b, float *c, int N) {
+  #pragma omp simd safelen(4)
+  for (int i = 0; i < N; i += 2)
+    a[i] = b[i] * c[i];
+}
+
+extern long long initial_val();
+
+// CHECK-LABEL: @simd_simdlen_and_linear_clause
+// CHECK: omp.inner.for.body:
+// CHECK: !llvm.mem.parallel_loop_access
+// CHECK: ret void
+void simd_simdlen_and_linear_clause(float *a, float *b, float *c, int N) {
+  long long lv = initial_val();
+  #pragma omp simd simdlen(2) linear(lv: 4)
+  for (int i = 0; i < N; ++i) {
+    a[lv] = b[lv] * c[lv];
+    lv += 4;
+  }
+}
+
+extern float gfloat;
+
+// CHECK-LABEL: @simd_aligned_and_private_clause
+// CHECK-LABEL: entry:
+// CHECK: %gfloat = alloca float, align 4
+// CHECK: store float 1.000000e+00, float* @gfloat, align 4
+// CHECK-LABEL: omp.inner.for.body:
+// CHECK-NOT: @gfloat
+// CHECK: load{{.*}}!llvm.mem.parallel_loop_access
+// CHECK: store float {{.*}}, float* %gfloat, align 4, !llvm.mem.parallel_loop_access
+// CHECK: %[[FADD:add[0-9]+]] = fadd float %{{[0-9]+}}, 2.000000e+00
+// CHECK: store float %[[FADD]], float* {{.*}}, align 4, !llvm.mem.parallel_loop_access
+// CHECK: ret void
+void simd_aligned_and_private_clause(float *a, float *b, float *c, int N) {
+  gfloat = 1.0f;
+  #pragma omp simd aligned(a:4) private(gfloat)
+  for (int i = 0; i < N; i += 2) {
+    gfloat = b[i] * c[i];
+    a[i] = gfloat + 2.0f;
+  }
+}
+
+// CHECK-LABEL: @simd_lastprivate_and_reduction_clause
+// CHECK-LABEL: entry:
+// CHECK: %[[SUMVAR:sum[0-9]+]] = alloca float, align 4
+// CHECK: store float 0.000000e+00, float* %[[SUMVAR]], align 4
+// CHECK-LABEL: omp.inner.for.body
+// CHECK: %[[LOAD:[0-9]+]] = load float, float* %[[SUMVAR]], align 4, !llvm.mem.parallel_loop_access
+// CHECK: %[[FADD:add[0-9]+]] = fadd float %[[LOAD]], %mul{{.*}}
+// CHECK: store float %[[FADD]], float* %[[SUMVAR]], align 4, !llvm.mem.parallel_loop_access
+// CHECK: store i32{{.*}}, i32* %[[IDXVAR:idx[0-9]+]]
+// CHECK-LABEL: omp.inner.for.end:
+// CHECK-DAG: %[[TMP1:[0-9]+]] = load i32, i32* %[[IDXVAR]], align 4
+// CHECK-DAG: store i32 %[[TMP1]], i32* %idx, align 4
+// CHECK-DAG: %[[INITVAL:[0-9]+]] = load float, float* %sum, align 4
+// CHECK-DAG: %[[TMP2:[0-9]+]] = load float, float* %[[SUMVAR]], align 4
+// CHECK-DAG: %[[SUMMED:add[0-9]+]] = fadd float %[[INITVAL]], %[[TMP2]]
+// CHECK-DAG: store float %[[SUMMED]], float* %sum, align 4
+// CHECK-LABEL: simd.if.end:
+// CHECK: %[[OUTVAL:[0-9]+]] = load float, float* %sum, align 4
+// CHECK: %[[OUTADDR:[0-9]+]] = load float*, float** %a.addr, align 8
+// CHECK: store float %[[OUTVAL]], float* %[[OUTADDR]], align 4
+// CHECK: %[[RETIDX:[0-9]+]] = load i32, i32* %idx, align 4
+// CHECK: ret i32 %[[RETIDX]]
+int simd_lastprivate_and_reduction_clause(float *a, float *b, float *c, int N) {
+  float sum = 0.0f;
+  int idx;
+  #pragma omp simd lastprivate(idx) reduction(+:sum)
+  for (int i = 0; i < N; ++i) {
+    sum += b[i] * c[i];
+    idx = i * 2;
+  }
+
+  *a = sum;
+  return idx;
+}
+
+// CHECK-LABEL: @simd_collapse_clause
+// CHECK: omp.inner.for.body:
+// CHECK-NOT: for.body:
+// CHECK: ret void
+void simd_collapse_clause(float **a, float **b, float **c, int N, int M) {
+  #pragma omp simd collapse(2)
+  for (int i = 0; i < N; ++i)
+    for (int j = 0; j < N; ++j)
+      a[i][j] = b[i][j] * c[i][j];
+}
+
+// Negative tests; no simd directive, so should be normal code.
+
+// CHECK-LABEL: @parallel_for
+// CHECK-NOT: call void {{.*}} @__kmpc_fork_call
+// CHECK-NOT: @.omp_outlined.
+// CHECK-NOT: omp.inner.for.body:
+// CHECK: ret void
+void parallel_for(float *a, float *b, float *c, int N) {
+  #pragma omp parallel for
+  for (int i = 0; i < N; ++i)
+    a[i] = b[i] * c[i];
+}
+
+extern void long_running_func(int);
+
+// CHECK-LABEL: @taskloop
+// CHECK-NOT: call i8* @__kmpc_omp_task_alloc
+// CHECK-NOT: call void @__kmpc_taskloop
+// CHECK: ret void
+void taskloop(int N) {
+  #pragma omp taskloop
+  for (int i = 0; i < N; ++i)
+    long_running_func(i);
+}
+
+// Combined constructs; simd part should work, rest should be ignored.
+
+// CHECK-LABEL: @parallel_for_simd
+// CHECK-NOT: call void {{.*}} @__kmpc_fork_call
+// CHECK-NOT: @.omp_outlined.
+// CHECK: omp.inner.for.body:
+// CHECK: ret void
+void parallel_for_simd(float *a, float *b, float *c, int N) {
+#pragma omp parallel for simd num_threads(2) simdlen(4)
+  for (int i = 0; i < N; ++i)
+    a[i] = b[i] * c[i];
+}
+
+// Make sure there's no declarations for libomp runtime functions
+// CHECK-NOT: declare void @__kmpc
+
+// CHECK-LABEL: !llvm.ident = !{!0}
+
+// simd_safelen_clause width md
+// CHECK-DAG: !{{[0-9]+}} = !{!"llvm.loop.vectorize.width", i32 4}
+// simd_simdlen_clause width md
+// CHECK-DAG: !{{[0-9]+}} = !{!"llvm.loop.vectorize.width", i32 2}
Index: test/OpenMP/linking.c
===================================================================
--- test/OpenMP/linking.c
+++ test/OpenMP/linking.c
@@ -89,3 +89,14 @@
 // CHECK-MSVC-ILINK-64-SAME: -libpath:{{.+}}/../lib
 // CHECK-MSVC-ILINK-64-SAME: -defaultlib:libiomp5md.lib
 //
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -fopenmp-simd -target aarch64-linux-gnu \
+// RUN:   | FileCheck --check-prefix=CHECK-SIMD-ONLY-AA64 %s
+// CHECK-SIMD-ONLY-AA64-NOT: "-l[[DEFAULT_OPENMP_LIB]]"
+// CHECK-SIMD-ONLY-AA64-NOT: "-lpthread"
+//
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -fopenmp-simd -target x86_64-unknown_linux \
+// RUN:   | FileCheck --check-prefix=CHECK-SIMD-ONLY-X64 %s
+// CHECK-SIMD-ONLY-X64-NOT: "-l[[DEFAULT_OPENMP_LIB]]"
+// CHECK-SIMD-ONLY-X64-NOT: "-lpthread"
Index: test/OpenMP/declare_simd_codegen.cpp
===================================================================
--- test/OpenMP/declare_simd_codegen.cpp
+++ test/OpenMP/declare_simd_codegen.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s
 // expected-no-diagnostics
Index: lib/Sema/SemaOpenMP.cpp
===================================================================
--- lib/Sema/SemaOpenMP.cpp
+++ lib/Sema/SemaOpenMP.cpp
@@ -983,7 +983,7 @@
 }
 
 VarDecl *Sema::IsOpenMPCapturedDecl(ValueDecl *D) {
-  assert(LangOpts.OpenMP && "OpenMP is not allowed");
+  assert((LangOpts.OpenMP || LangOpts.OpenMPSimd) && "OpenMP is not allowed");
   D = getCanonicalDecl(D);
 
   // If we are attempting to capture a global variable in a directive with
@@ -1029,7 +1029,7 @@
 }
 
 bool Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level) {
-  assert(LangOpts.OpenMP && "OpenMP is not allowed");
+  assert((LangOpts.OpenMP || LangOpts.OpenMPSimd) && "OpenMP is not allowed");
   return DSAStack->hasExplicitDSA(
       D, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; }, Level);
 }
Index: lib/Sema/SemaExpr.cpp
===================================================================
--- lib/Sema/SemaExpr.cpp
+++ lib/Sema/SemaExpr.cpp
@@ -13961,7 +13961,8 @@
   // Capture global variables if it is required to use private copy of this
   // variable.
   bool IsGlobal = !Var->hasLocalStorage();
-  if (IsGlobal && !(LangOpts.OpenMP && IsOpenMPCapturedDecl(Var)))
+  if (IsGlobal && !((LangOpts.OpenMP || LangOpts.OpenMPSimd) &&
+                    IsOpenMPCapturedDecl(Var)))
     return true;
 
   // Walk up the stack to determine whether we can capture the variable,
Index: lib/Parse/ParsePragma.cpp
===================================================================
--- lib/Parse/ParsePragma.cpp
+++ lib/Parse/ParsePragma.cpp
@@ -213,7 +213,7 @@
 
     PP.AddPragmaHandler("OPENCL", FPContractHandler.get());
   }
-  if (getLangOpts().OpenMP)
+  if (getLangOpts().OpenMP || getLangOpts().OpenMPSimd)
     OpenMPHandler.reset(new PragmaOpenMPHandler());
   else
     OpenMPHandler.reset(new PragmaNoOpenMPHandler());
Index: lib/Parse/ParseOpenMP.cpp
===================================================================
--- lib/Parse/ParseOpenMP.cpp
+++ lib/Parse/ParseOpenMP.cpp
@@ -149,6 +149,33 @@
       DKind = F[i][2];
     }
   }
+
+  // If we're only interested in the simd pragmas, convert any combined
+  // construct with a simd directive to just 'simd' or 'declare simd',
+  // and any other to 'unknown'.
+  if (P.getLangOpts().OpenMPSimd) {
+    switch (DKind) {
+      default:
+        DKind = OMPD_unknown;
+        break;
+      case OMPD_declare_simd:
+        break;
+      case OMPD_simd:
+      case OMPD_parallel_for_simd:
+      case OMPD_for_simd:
+      case OMPD_taskloop_simd:
+      case OMPD_distribute_parallel_for_simd:
+      case OMPD_distribute_simd:
+      case OMPD_target_simd:
+      case OMPD_teams_distribute_simd:
+      case OMPD_teams_distribute_parallel_for_simd:
+      case OMPD_target_teams_distribute_parallel_for_simd:
+      case OMPD_target_teams_distribute_simd:
+        DKind = OMPD_simd;
+        break;
+    }
+  }
+
   return DKind < OMPD_unknown ? static_cast<OpenMPDirectiveKind>(DKind)
                               : OMPD_unknown;
 }
@@ -1015,7 +1042,10 @@
     SkipUntil(tok::annot_pragma_openmp_end);
     break;
   case OMPD_unknown:
-    Diag(Tok, diag::err_omp_unknown_directive);
+    // Don't report unknown directives if we're only looking at simd,
+    // as the filter function will have switched the kind.
+    if (!getLangOpts().OpenMPSimd)
+      Diag(Tok, diag::err_omp_unknown_directive);
     SkipUntil(tok::annot_pragma_openmp_end);
     break;
   }
@@ -1105,6 +1135,18 @@
                                      OpenMPClauseKind CKind, bool FirstClause) {
   OMPClause *Clause = nullptr;
   bool ErrorFound = false;
+
+  // If we're only interpreting 'simd' directives, filter out clauses that
+  // don't apply without an error.
+  if (DKind == OMPD_simd && getLangOpts().OpenMPSimd &&
+      !isAllowedClauseForDirective(DKind, CKind)) {
+
+    if (PP.LookAhead(/*N=*/0).is(tok::l_paren))
+      SkipUntil(tok::r_paren);
+
+    return nullptr;
+  }
+
   // Check if clause is allowed for the given directive.
   if (CKind != OMPC_unknown && !isAllowedClauseForDirective(DKind, CKind)) {
     Diag(Tok, diag::err_omp_unexpected_clause) << getOpenMPClauseName(CKind)
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -2199,6 +2199,7 @@
       Opts.OpenMP && !Args.hasArg(options::OPT_fnoopenmp_use_tls);
   Opts.OpenMPIsDevice =
       Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_is_device);
+  Opts.OpenMPSimd = !Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_simd);
 
   if (Opts.OpenMP) {
     int Version =
Index: lib/Driver/ToolChains/Clang.cpp
===================================================================
--- lib/Driver/ToolChains/Clang.cpp
+++ lib/Driver/ToolChains/Clang.cpp
@@ -3203,7 +3203,9 @@
       // semantic analysis, etc.
       break;
     }
-  }
+  } else if (Args.hasFlag(options::OPT_fopenmp_simd,
+                          options::OPT_fno_openmp_simd, /*Default=*/false))
+    CmdArgs.push_back("-fopenmp-simd");
 
   const SanitizerArgs &Sanitize = getToolChain().getSanitizerArgs();
   Sanitize.addArgs(getToolChain(), Args, CmdArgs, InputType);
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -119,7 +119,7 @@
     createObjCRuntime();
   if (LangOpts.OpenCL)
     createOpenCLRuntime();
-  if (LangOpts.OpenMP)
+  if (LangOpts.OpenMP || LangOpts.OpenMPSimd)
     createOpenMPRuntime();
   if (LangOpts.CUDA)
     createCUDARuntime();
Index: lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- lib/CodeGen/CodeGenFunction.cpp
+++ lib/CodeGen/CodeGenFunction.cpp
@@ -794,7 +794,8 @@
   }
 
   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
-    if (CGM.getLangOpts().OpenMP && FD->hasAttr<OMPDeclareSimdDeclAttr>())
+    if ((CGM.getLangOpts().OpenMP || CGM.getLangOpts().OpenMPSimd)
+        && FD->hasAttr<OMPDeclareSimdDeclAttr>())
       CGM.getOpenMPRuntime().emitDeclareSimdFunction(FD, Fn);
 
   // Add no-jump-tables value.
Index: include/clang/Driver/Options.td
===================================================================
--- include/clang/Driver/Options.td
+++ include/clang/Driver/Options.td
@@ -1261,6 +1261,8 @@
 def fomit_frame_pointer : Flag<["-"], "fomit-frame-pointer">, Group<f_Group>;
 def fopenmp : Flag<["-"], "fopenmp">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
 def fno_openmp : Flag<["-"], "fno-openmp">, Group<f_Group>, Flags<[NoArgumentUnused]>;
+def fopenmp_simd : Flag<["-"], "fopenmp-simd">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
+def fno_openmp_simd : Flag<["-"], "fno-openmp-simd">, Group<f_Group>, Flags<[NoArgumentUnused]>;
 def fopenmp_version_EQ : Joined<["-"], "fopenmp-version=">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
 def fopenmp_EQ : Joined<["-"], "fopenmp=">, Group<f_Group>;
 def fopenmp_use_tls : Flag<["-"], "fopenmp-use-tls">, Group<f_Group>, Flags<[NoArgumentUnused]>;
Index: include/clang/Basic/LangOptions.def
===================================================================
--- include/clang/Basic/LangOptions.def
+++ include/clang/Basic/LangOptions.def
@@ -187,6 +187,7 @@
 LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns")
 LANGOPT(CUDA              , 1, 0, "CUDA")
 LANGOPT(OpenMP            , 32, 0, "OpenMP support and version of OpenMP (31, 40 or 45)")
+LANGOPT(OpenMPSimd        , 1, 0, "OpenMP support for simd and declare simd directives only")
 LANGOPT(OpenMPUseTLS      , 1, 0, "Use TLS for threadprivates or runtime calls")
 LANGOPT(OpenMPIsDevice    , 1, 0, "Generate code only for OpenMP target device")
 LANGOPT(RenderScript      , 1, 0, "RenderScript")
Index: docs/UsersManual.rst
===================================================================
--- docs/UsersManual.rst
+++ docs/UsersManual.rst
@@ -1988,6 +1988,11 @@
 Use `-fopenmp` to enable OpenMP. Support for OpenMP can be disabled with
 `-fno-openmp`.
 
+Use `-fopenmp-simd` to enable OpenMP simd features only, without linking
+the runtime library; for combined constructs
+(e.g. ``#pragma omp parallel for simd``) the non-simd directives and clauses
+will be ignored. This can be disabled with `-fno-openmp-simd`.
+
 Controlling implementation limits
 ---------------------------------
 
Index: docs/ClangCommandLineReference.rst
===================================================================
--- docs/ClangCommandLineReference.rst
+++ docs/ClangCommandLineReference.rst
@@ -1451,6 +1451,8 @@
 
 .. option:: -fopenmp, -fno-openmp
 
+.. option:: -fopenmp-simd, -fno-openmp-simd
+
 .. option:: -fopenmp-dump-offload-linker-script
 
 .. option:: -fopenmp-use-tls
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to