tianshilei1992 updated this revision to Diff 374433.
tianshilei1992 added a comment.
Herald added subscribers: llvm-commits, hiraditya.
Herald added a project: LLVM.

it can emit right mode


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D110286/new/

https://reviews.llvm.org/D110286

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Frontend/CompilerInvocation.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
  llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
  llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
===================================================================
--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2755,15 +2755,21 @@
 
 OpenMPIRBuilder::InsertPointTy
 OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD,
-                                  bool RequiresFullRuntime) {
+                                  bool IsSIMD, bool RequiresFullRuntime) {
   if (!updateToLocation(Loc))
     return Loc.IP;
 
   Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
   Value *Ident = getOrCreateIdent(SrcLocStr);
-  ConstantInt *IsSPMDVal = ConstantInt::getSigned(
-      IntegerType::getInt8Ty(Int8->getContext()),
-      IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
+  int8_t Mode = 0;
+  if (IsSPMD)
+    Mode |= OMP_TGT_EXEC_MODE_SPMD;
+  else
+    Mode |= OMP_TGT_EXEC_MODE_GENERIC;
+  if (IsSIMD)
+    Mode |= OMP_TGT_EXEC_MODE_SIMD;
+  ConstantInt *ModeCI =
+      ConstantInt::getSigned(IntegerType::getInt8Ty(Int8->getContext()), Mode);
   ConstantInt *UseGenericStateMachine =
       ConstantInt::getBool(Int32->getContext(), !IsSPMD);
   ConstantInt *RequiresFullRuntimeVal =
@@ -2773,7 +2779,7 @@
       omp::RuntimeFunction::OMPRTL___kmpc_target_init);
 
   CallInst *ThreadKind = Builder.CreateCall(
-      Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal});
+      Fn, {Ident, ModeCI, UseGenericStateMachine, RequiresFullRuntimeVal});
 
   Value *ExecUserCode = Builder.CreateICmpEQ(
       ThreadKind, ConstantInt::get(ThreadKind->getType(), -1),
@@ -2807,23 +2813,29 @@
 }
 
 void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc,
-                                         bool IsSPMD,
+                                         bool IsSPMD, bool IsSIMD,
                                          bool RequiresFullRuntime) {
   if (!updateToLocation(Loc))
     return;
 
   Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
   Value *Ident = getOrCreateIdent(SrcLocStr);
-  ConstantInt *IsSPMDVal = ConstantInt::getSigned(
-      IntegerType::getInt8Ty(Int8->getContext()),
-      IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
+  int8_t Mode = 0;
+  if (IsSPMD)
+    Mode |= OMP_TGT_EXEC_MODE_SPMD;
+  else
+    Mode |= OMP_TGT_EXEC_MODE_GENERIC;
+  if (IsSIMD)
+    Mode |= OMP_TGT_EXEC_MODE_SIMD;
+  ConstantInt *ModeCI =
+      ConstantInt::getSigned(IntegerType::getInt8Ty(Int8->getContext()), Mode);
   ConstantInt *RequiresFullRuntimeVal =
       ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
 
   Function *Fn = getOrCreateRuntimeFunctionPtr(
       omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
 
-  Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal});
+  Builder.CreateCall(Fn, {Ident, ModeCI, RequiresFullRuntimeVal});
 }
 
 std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts,
Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
===================================================================
--- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1005,14 +1005,16 @@
   /// \param Loc The insert and source location description.
   /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
   /// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
-  InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime);
+  InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD,
+                                 bool IsSIMD, bool RequiresFullRuntime);
 
   /// Create a runtime call for kmpc_target_deinit
   ///
   /// \param Loc The insert and source location description.
   /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
   /// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
-  void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime);
+  void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD,
+                          bool IsSIMD, bool RequiresFullRuntime);
 
   ///}
 
Index: llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
===================================================================
--- llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
@@ -133,7 +133,8 @@
   OMP_TGT_EXEC_MODE_SPMD = 1 << 1,
   OMP_TGT_EXEC_MODE_GENERIC_SPMD =
       OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD,
-  LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ OMP_TGT_EXEC_MODE_GENERIC_SPMD)
+  OMP_TGT_EXEC_MODE_SIMD = 1 << 2,
+  LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ OMP_TGT_EXEC_MODE_SIMD)
 };
 
 } // end namespace omp
Index: clang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -3863,6 +3863,10 @@
       Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_enable_irbuilder);
   bool IsTargetSpecified =
       Opts.OpenMPIsDevice || Args.hasArg(options::OPT_fopenmp_targets_EQ);
+  Opts.OpenMPTargetSimd =
+      IsTargetSpecified &&
+      Args.hasFlag(options::OPT_fopenmp_target_simd,
+                   options::OPT_fno_openmp_target_simd, /*Default=*/false);
   Opts.OpenMPTargetNewRuntime =
       Opts.OpenMPIsDevice &&
       Args.hasArg(options::OPT_fopenmp_target_new_runtime);
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -6694,6 +6694,10 @@
       CmdArgs.push_back("-fopenmp-host-ir-file-path");
       CmdArgs.push_back(Args.MakeArgString(OpenMPDeviceInput->getFilename()));
     }
+    if (Args.hasFlag(options::OPT_fopenmp_target_simd,
+                     options::OPT_fno_openmp_target_simd,
+                     /*Default=*/false))
+      CmdArgs.push_back("-fopenmp-target-simd");
   }
 
   if (Triple.isAMDGPU()) {
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -49,11 +49,11 @@
 
   /// Helper for target directive initialization.
   void emitKernelInit(CodeGenFunction &CGF, EntryFunctionState &EST,
-                      bool IsSPMD);
+                      bool IsSPMD, bool IsSIMD);
 
   /// Helper for target directive finalization.
   void emitKernelDeinit(CodeGenFunction &CGF, EntryFunctionState &EST,
-                        bool IsSPMD);
+                        bool IsSPMD, bool IsSIMD);
 
   /// Helper for generic variables globalization prolog.
   void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc,
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -1029,7 +1029,8 @@
     void Enter(CodeGenFunction &CGF) override {
       auto &RT =
           static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
-      RT.emitKernelInit(CGF, EST, /* IsSPMD */ false);
+      RT.emitKernelInit(CGF, EST, /* IsSPMD */ false,
+                        CGF.CGM.getLangOpts().OpenMPTargetSimd);
       // Skip target region initialization.
       RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
     }
@@ -1037,7 +1038,8 @@
       auto &RT =
           static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
       RT.clearLocThreadIdInsertPt(CGF);
-      RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ false);
+      RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ false,
+                          CGF.CGM.getLangOpts().OpenMPTargetSimd);
     }
   } Action(EST);
   CodeGen.setAction(Action);
@@ -1048,22 +1050,24 @@
 }
 
 void CGOpenMPRuntimeGPU::emitKernelInit(CodeGenFunction &CGF,
-                                        EntryFunctionState &EST, bool IsSPMD) {
+                                        EntryFunctionState &EST, bool IsSPMD,
+                                        bool IsSIMD) {
   CGBuilderTy &Bld = CGF.Builder;
-  Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD, requiresFullRuntime()));
+  Bld.restoreIP(
+      OMPBuilder.createTargetInit(Bld, IsSPMD, requiresFullRuntime(), IsSIMD));
   IsInTargetMasterThreadRegion = IsSPMD;
   if (!IsSPMD)
     emitGenericVarsProlog(CGF, EST.Loc);
 }
 
 void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF,
-                                          EntryFunctionState &EST,
-                                          bool IsSPMD) {
+                                          EntryFunctionState &EST, bool IsSPMD,
+                                          bool IsSIMD) {
   if (!IsSPMD)
     emitGenericVarsEpilog(CGF);
 
   CGBuilderTy &Bld = CGF.Builder;
-  OMPBuilder.createTargetDeinit(Bld, IsSPMD, requiresFullRuntime());
+  OMPBuilder.createTargetDeinit(Bld, IsSPMD, requiresFullRuntime(), IsSIMD);
 }
 
 void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
@@ -1088,13 +1092,15 @@
                          CGOpenMPRuntimeGPU::EntryFunctionState &EST)
         : RT(RT), EST(EST) {}
     void Enter(CodeGenFunction &CGF) override {
-      RT.emitKernelInit(CGF, EST, /* IsSPMD */ true);
+      RT.emitKernelInit(CGF, EST, /* IsSPMD */ true,
+                        CGF.CGM.getLangOpts().OpenMPTargetSimd);
       // Skip target region initialization.
       RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
     }
     void Exit(CodeGenFunction &CGF) override {
       RT.clearLocThreadIdInsertPt(CGF);
-      RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ true);
+      RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ true,
+                          CGF.CGM.getLangOpts().OpenMPTargetSimd);
     }
   } Action(*this, EST);
   CodeGen.setAction(Action);
@@ -1111,13 +1117,19 @@
 // 'generic', the runtime reserves one warp for the master, otherwise, all
 // warps participate in parallel work.
 static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
-                                     bool Mode) {
+                                     bool IsSPMD, bool IsSIMD) {
+  int8_t Mode = 0;
+  if (IsSPMD)
+    Mode |= OMP_TGT_EXEC_MODE_SPMD;
+  else
+    Mode |= OMP_TGT_EXEC_MODE_GENERIC;
+  if (IsSIMD)
+    Mode |= OMP_TGT_EXEC_MODE_SIMD;
+
   auto *GVMode = new llvm::GlobalVariable(
       CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
       llvm::GlobalValue::WeakAnyLinkage,
-      llvm::ConstantInt::get(CGM.Int8Ty, Mode ? OMP_TGT_EXEC_MODE_SPMD
-                                              : OMP_TGT_EXEC_MODE_GENERIC),
-      Twine(Name, "_exec_mode"));
+      llvm::ConstantInt::get(CGM.Int8Ty, Mode), Twine(Name, "_exec_mode"));
   CGM.addCompilerUsedGlobal(GVMode);
 }
 
@@ -1152,15 +1164,16 @@
 
   assert(!ParentName.empty() && "Invalid target region parent name!");
 
-  bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D);
-  if (Mode)
+  bool IsSPMD = supportsSPMDExecutionMode(CGM.getContext(), D);
+  if (IsSPMD)
     emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
                    CodeGen);
   else
     emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
                       CodeGen);
 
-  setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
+  setPropertyExecutionMode(CGM, OutlinedFn->getName(), IsSPMD,
+                           CGM.getLangOpts().OpenMPTargetSimd);
 }
 
 namespace {
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -2406,9 +2406,12 @@
   Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
 def fopenmp_simd : Flag<["-"], "fopenmp-simd">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>,
   HelpText<"Emit OpenMP code only for SIMD-based constructs.">;
+def fopenmp_target_simd : Flag<["-"], "fopenmp-target-simd">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>,
+  HelpText<"Emit OpenMP target offloading code that supports SIMD execution.">;
 def fopenmp_enable_irbuilder : Flag<["-"], "fopenmp-enable-irbuilder">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>,
   HelpText<"Use the experimental OpenMP-IR-Builder codegen path.">;
 def fno_openmp_simd : Flag<["-"], "fno-openmp-simd">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
+def fno_openmp_target_simd : Flag<["-"], "fno-openmp-target-simd">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
 def fopenmp_cuda_mode : Flag<["-"], "fopenmp-cuda-mode">, Group<f_Group>,
   Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
 def fno_openmp_cuda_mode : Flag<["-"], "fno-openmp-cuda-mode">, Group<f_Group>,
Index: clang/include/clang/Basic/LangOptions.def
===================================================================
--- clang/include/clang/Basic/LangOptions.def
+++ clang/include/clang/Basic/LangOptions.def
@@ -233,6 +233,7 @@
 LANGOPT(OpenMP            , 32, 0, "OpenMP support and version of OpenMP (31, 40 or 45)")
 LANGOPT(OpenMPExtensions  , 1, 1, "Enable all Clang extensions for OpenMP directives and clauses")
 LANGOPT(OpenMPSimd        , 1, 0, "Use SIMD only OpenMP support.")
+LANGOPT(OpenMPTargetSimd  , 1, 0, "Use OpenMP target offloading SIMD support.")
 LANGOPT(OpenMPUseTLS      , 1, 0, "Use TLS for threadprivates or runtime calls")
 LANGOPT(OpenMPIsDevice    , 1, 0, "Generate code only for OpenMP target device")
 LANGOPT(OpenMPCUDAMode    , 1, 0, "Generate code for OpenMP pragmas in SIMT/SPMD mode")
Index: clang/docs/ClangCommandLineReference.rst
===================================================================
--- clang/docs/ClangCommandLineReference.rst
+++ clang/docs/ClangCommandLineReference.rst
@@ -2037,6 +2037,10 @@
 
 Emit OpenMP code only for SIMD-based constructs.
 
+.. option:: -fopenmp-target-simd, -fno-openmp-target-simd
+
+Emit OpenMP target offloading code that supports SIMD execution.
+
 .. option:: -fopenmp-version=<arg>
 
 .. option:: -fopenmp-extensions, -fno-openmp-extensions
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to