hliao created this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D63143

Files:
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/CC1Options.td
  clang/include/clang/Sema/Sema.h
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/lib/Sema/SemaLambda.cpp
  clang/test/CodeGenCUDA/unnamed-types.cu

Index: clang/test/CodeGenCUDA/unnamed-types.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -fcuda-force-lambda-odr -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-force-lambda-odr -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
+
+#include "Inputs/cuda.h"
+
+// HOST: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+
+__device__ float d0(float x) {
+  return [](float x) { return x + 2.f; }(x);
+}
+
+__device__ float d1(float x) {
+  return [](float x) { return x * 2.f; }(x);
+}
+
+// DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_(
+template <typename F>
+__global__ void k0(float *p, F f) {
+  p[0] = f(p[0]) + d0(p[1]) + d1(p[2]);
+}
+
+void f0(float *p) {
+  [](float *p) {
+    *p = 1.f;
+  }(p);
+}
+
+void f1(float *p) {
+  [](float *p) {
+    k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; });
+  }(p);
+}
+// HOST: @__hip_register_globals
+// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
Index: clang/lib/Sema/SemaLambda.cpp
===================================================================
--- clang/lib/Sema/SemaLambda.cpp
+++ clang/lib/Sema/SemaLambda.cpp
@@ -272,9 +272,8 @@
   return false;
 }
 
-MangleNumberingContext *
-Sema::getCurrentMangleNumberContext(const DeclContext *DC,
-                                    Decl *&ManglingContextDecl) {
+MangleNumberingContext *Sema::getCurrentMangleNumberContext(
+    const DeclContext *DC, Decl *&ManglingContextDecl, bool SkpNoODRChk) {
   // Compute the context for allocating mangling numbers in the current
   // expression, if the ABI requires them.
   ManglingContextDecl = ExprEvalContexts.back().ManglingContextDecl;
@@ -322,7 +321,8 @@
   case Normal: {
     //  -- the bodies of non-exported nonspecialized template functions
     //  -- the bodies of inline functions
-    if ((IsInNonspecializedTemplate &&
+    if (SkpNoODRChk ||
+        (IsInNonspecializedTemplate &&
          !(ManglingContextDecl && isa<ParmVarDecl>(ManglingContextDecl))) ||
         isInInlineFunction(CurContext)) {
       ManglingContextDecl = nullptr;
@@ -337,7 +337,7 @@
 
   case StaticDataMember:
     //  -- the initializers of nonspecialized static members of template classes
-    if (!IsInNonspecializedTemplate) {
+    if (!SkpNoODRChk && !IsInNonspecializedTemplate) {
       ManglingContextDecl = nullptr;
       return nullptr;
     }
@@ -441,9 +441,9 @@
     Class->setLambdaMangling(Mangling->first, Mangling->second);
   } else {
     Decl *ManglingContextDecl;
-    if (MangleNumberingContext *MCtx =
-            getCurrentMangleNumberContext(Class->getDeclContext(),
-                                          ManglingContextDecl)) {
+    if (MangleNumberingContext *MCtx = getCurrentMangleNumberContext(
+            Class->getDeclContext(), ManglingContextDecl,
+            getLangOpts().CUDAForceLambdaODR)) {
       unsigned ManglingNumber = MCtx->getManglingNumber(Method);
       Class->setLambdaMangling(ManglingNumber, ManglingContextDecl);
     }
Index: clang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -2431,6 +2431,9 @@
   if (Args.hasArg(OPT_fno_cuda_host_device_constexpr))
     Opts.CUDAHostDeviceConstexpr = 0;
 
+  if (Args.hasArg(OPT_fcuda_force_lambda_odr))
+    Opts.CUDAForceLambdaODR = 1;
+
   if (Opts.CUDAIsDevice && Args.hasArg(OPT_fcuda_approx_transcendentals))
     Opts.CUDADeviceApproxTranscendentals = 1;
 
Index: clang/include/clang/Sema/Sema.h
===================================================================
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -1090,10 +1090,10 @@
   /// block literal.
   /// \param[out] ManglingContextDecl - Returns the ManglingContextDecl
   /// associated with the context, if relevant.
-  MangleNumberingContext *getCurrentMangleNumberContext(
-    const DeclContext *DC,
-    Decl *&ManglingContextDecl);
-
+  MangleNumberingContext *
+  getCurrentMangleNumberContext(const DeclContext *DC,
+                                Decl *&ManglingContextDecl,
+                                bool SkpNoODRChk = false);
 
   /// SpecialMemberOverloadResult - The overloading result for a special member
   /// function.
Index: clang/include/clang/Driver/CC1Options.td
===================================================================
--- clang/include/clang/Driver/CC1Options.td
+++ clang/include/clang/Driver/CC1Options.td
@@ -863,6 +863,8 @@
   HelpText<"Allow variadic functions in CUDA device code.">;
 def fno_cuda_host_device_constexpr : Flag<["-"], "fno-cuda-host-device-constexpr">,
   HelpText<"Don't treat unattributed constexpr functions as __host__ __device__.">;
+def fcuda_force_lambda_odr : Flag<["-"], "fcuda-force-lambda-odr">,
+  HelpText<"Force lambda naming following one definition rule (ODR).">;
 
 //===----------------------------------------------------------------------===//
 // OpenMP Options
Index: clang/include/clang/Basic/LangOptions.def
===================================================================
--- clang/include/clang/Basic/LangOptions.def
+++ clang/include/clang/Basic/LangOptions.def
@@ -219,6 +219,7 @@
 LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr functions as __host__ __device__")
 LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions")
 LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code")
+LANGOPT(CUDAForceLambdaODR, 1, 0, "force lambda naming following one definition rule (ODR)")
 
 LANGOPT(SYCLIsDevice      , 1, 0, "Generate code for SYCL device")
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D63143: [HIP] Enforce... Michael Liao via Phabricator via cfe-commits

Reply via email to