Author: Tom Honermann
Date: 2025-04-17T09:14:45-04:00
New Revision: 0348ff515854438cab8a48b79e8839cb99d48701

URL: 
https://github.com/llvm/llvm-project/commit/0348ff515854438cab8a48b79e8839cb99d48701
DIFF: 
https://github.com/llvm/llvm-project/commit/0348ff515854438cab8a48b79e8839cb99d48701.diff

LOG: [SYCL] Basic code generation for SYCL kernel caller offload entry point 
functions. (#133030)

A function declared with the `sycl_kernel_entry_point` attribute,
sometimes called a SYCL kernel entry point function, specifies a pattern
from which the parameters and body of an offload entry point function,
sometimes called a SYCL kernel caller function, are derived.

SYCL kernel caller functions are emitted during SYCL device compilation.
Their parameters and body are derived from the `SYCLKernelCallStmt`
statement and `OutlinedFunctionDecl` declaration associated with their
corresponding SYCL kernel entry point function. A distinct SYCL kernel
caller function is generated for each SYCL kernel entry point function
defined as a non-inline function or ODR-used in the translation unit.

The name of each SYCL kernel caller function is parameterized by the
SYCL kernel name type specified by the `sycl_kernel_entry_point`
attribute attached to the corresponding SYCL kernel entry point
function. For the moment, the Itanium ABI mangled name for typeinfo data
(`_ZTS<type>`) is used to name these functions; a future change will
switch to a more appropriate naming scheme.

The calling convention used for a SYCL kernel caller function is target
dependent. Support for AMDGCN, NVPTX, and SPIR targets is currently
provided. These functions are required to observe the language
restrictions for SYCL devices as specified by the SYCL 2020
specification; this includes a forward progress guarantee and prohibits
recursion.

Only SYCL kernel caller functions, functions declared as
`SYCL_EXTERNAL`, and functions directly or indirectly referenced from
those functions should be emitted during device compilation. Pruning of
other declarations has not yet been implemented.

---------

Co-authored-by: Elizabeth Andrews <elizabeth.andr...@intel.com>

Added: 
    clang/lib/CodeGen/CodeGenSYCL.cpp
    clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp

Modified: 
    clang/include/clang/AST/SYCLKernelInfo.h
    clang/lib/AST/ASTContext.cpp
    clang/lib/CodeGen/CGCall.cpp
    clang/lib/CodeGen/CMakeLists.txt
    clang/lib/CodeGen/CodeGenModule.cpp
    clang/lib/CodeGen/CodeGenModule.h
    clang/lib/CodeGen/CodeGenTypes.h
    clang/lib/CodeGen/Targets/NVPTX.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/AST/SYCLKernelInfo.h 
b/clang/include/clang/AST/SYCLKernelInfo.h
index 4a4827e601053..3825af86c14e3 100644
--- a/clang/include/clang/AST/SYCLKernelInfo.h
+++ b/clang/include/clang/AST/SYCLKernelInfo.h
@@ -22,9 +22,10 @@ namespace clang {
 class SYCLKernelInfo {
 public:
   SYCLKernelInfo(CanQualType KernelNameType,
-                 const FunctionDecl *KernelEntryPointDecl)
+                 const FunctionDecl *KernelEntryPointDecl,
+                 const std::string &KernelName)
       : KernelNameType(KernelNameType),
-        KernelEntryPointDecl(KernelEntryPointDecl) {}
+        KernelEntryPointDecl(KernelEntryPointDecl), KernelName(KernelName) {}
 
   CanQualType getKernelNameType() const { return KernelNameType; }
 
@@ -32,9 +33,12 @@ class SYCLKernelInfo {
     return KernelEntryPointDecl;
   }
 
+  const std::string &GetKernelName() const { return KernelName; }
+
 private:
   CanQualType KernelNameType;
   const FunctionDecl *KernelEntryPointDecl;
+  std::string KernelName;
 };
 
 } // namespace clang

diff  --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index bf24704e48eaa..860e6ec0fb47e 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -12825,6 +12825,15 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) {
     if (!FD->doesThisDeclarationHaveABody())
       return FD->doesDeclarationForceExternallyVisibleDefinition();
 
+    // Function definitions with the sycl_kernel_entry_point attribute are
+    // required during device compilation so that SYCL kernel caller offload
+    // entry points are emitted.
+    if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelEntryPointAttr>())
+      return true;
+
+    // FIXME: Functions declared with SYCL_EXTERNAL are required during
+    // device compilation.
+
     // Constructors and destructors are required.
     if (FD->hasAttr<ConstructorAttr>() || FD->hasAttr<DestructorAttr>())
       return true;
@@ -14832,9 +14841,36 @@ void 
ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
   }
 }
 
-static SYCLKernelInfo BuildSYCLKernelInfo(CanQualType KernelNameType,
+static SYCLKernelInfo BuildSYCLKernelInfo(ASTContext &Context,
+                                          CanQualType KernelNameType,
                                           const FunctionDecl *FD) {
-  return {KernelNameType, FD};
+  // Host and device compilation may use 
diff erent ABIs and 
diff erent ABIs
+  // may allocate name mangling discriminators 
diff erently. A discriminator
+  // override is used to ensure consistent discriminator allocation across
+  // host and device compilation.
+  auto DeviceDiscriminatorOverrider =
+      [](ASTContext &Ctx, const NamedDecl *ND) -> UnsignedOrNone {
+    if (const auto *RD = dyn_cast<CXXRecordDecl>(ND))
+      if (RD->isLambda())
+        return RD->getDeviceLambdaManglingNumber();
+    return std::nullopt;
+  };
+  std::unique_ptr<MangleContext> MC{ItaniumMangleContext::create(
+      Context, Context.getDiagnostics(), DeviceDiscriminatorOverrider)};
+
+  // Construct a mangled name for the SYCL kernel caller offload entry point.
+  // FIXME: The Itanium typeinfo mangling (_ZTS<type>) is currently used to
+  // name the SYCL kernel caller offload entry point function. This mangling
+  // does not suffice to clearly identify symbols that correspond to SYCL
+  // kernel caller functions, nor is this mangling natural for targets that
+  // use a non-Itanium ABI.
+  std::string Buffer;
+  Buffer.reserve(128);
+  llvm::raw_string_ostream Out(Buffer);
+  MC->mangleCanonicalTypeName(KernelNameType, Out);
+  std::string KernelName = Out.str();
+
+  return {KernelNameType, FD, KernelName};
 }
 
 void ASTContext::registerSYCLEntryPointFunction(FunctionDecl *FD) {
@@ -14855,8 +14891,8 @@ void 
ASTContext::registerSYCLEntryPointFunction(FunctionDecl *FD) {
           declaresSameEntity(FD, IT->second.getKernelEntryPointDecl())) &&
          "SYCL kernel name conflict");
   (void)IT;
-  SYCLKernels.insert(
-      std::make_pair(KernelNameType, BuildSYCLKernelInfo(KernelNameType, FD)));
+  SYCLKernels.insert(std::make_pair(
+      KernelNameType, BuildSYCLKernelInfo(*this, KernelNameType, FD)));
 }
 
 const SYCLKernelInfo &ASTContext::getSYCLKernelInfo(QualType T) const {

diff  --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index bc1035163a8eb..8cb27420dd911 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -739,6 +739,17 @@ 
CodeGenTypes::arrangeBuiltinFunctionDeclaration(CanQualType resultType,
                                  RequiredArgs::All);
 }
 
+const CGFunctionInfo &
+CodeGenTypes::arrangeSYCLKernelCallerDeclaration(QualType resultType,
+                                                 const FunctionArgList &args) {
+  CanQualTypeList argTypes = getArgTypesForDeclaration(Context, args);
+
+  return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None,
+                                 argTypes,
+                                 FunctionType::ExtInfo(CC_OpenCLKernel),
+                                 /*paramInfos=*/{}, RequiredArgs::All);
+}
+
 /// Arrange a call to a C++ method, passing the given arguments.
 ///
 /// numPrefixArgs is the number of ABI-specific prefix arguments we have. It

diff  --git a/clang/lib/CodeGen/CMakeLists.txt 
b/clang/lib/CodeGen/CMakeLists.txt
index dc5b2a35583b4..c377ac0786747 100644
--- a/clang/lib/CodeGen/CMakeLists.txt
+++ b/clang/lib/CodeGen/CMakeLists.txt
@@ -102,6 +102,7 @@ add_clang_library(clangCodeGen
   CodeGenFunction.cpp
   CodeGenModule.cpp
   CodeGenPGO.cpp
+  CodeGenSYCL.cpp
   CodeGenTBAA.cpp
   CodeGenTypes.cpp
   ConstantInitBuilder.cpp

diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index 26e09fe239242..83d8d4f758195 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -3309,6 +3309,27 @@ void CodeGenModule::EmitDeferred() {
   CurDeclsToEmit.swap(DeferredDeclsToEmit);
 
   for (GlobalDecl &D : CurDeclsToEmit) {
+    // Functions declared with the sycl_kernel_entry_point attribute are
+    // emitted normally during host compilation. During device compilation,
+    // a SYCL kernel caller offload entry point function is generated and
+    // emitted in place of each of these functions.
+    if (const auto *FD = D.getDecl()->getAsFunction()) {
+      if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelEntryPointAttr>() &&
+          FD->isDefined()) {
+        // Functions with an invalid sycl_kernel_entry_point attribute are
+        // ignored during device compilation.
+        if (!FD->getAttr<SYCLKernelEntryPointAttr>()->isInvalidAttr()) {
+          // Generate and emit the SYCL kernel caller function.
+          EmitSYCLKernelCaller(FD, getContext());
+          // Recurse to emit any symbols directly or indirectly referenced
+          // by the SYCL kernel caller function.
+          EmitDeferred();
+        }
+        // Do not emit the sycl_kernel_entry_point attributed function.
+        continue;
+      }
+    }
+
     // We should call GetAddrOfGlobal with IsForDefinition set to true in order
     // to get GlobalValue with exactly the type we need, not something that
     // might had been created for another decl with the same mangled name but
@@ -3644,6 +3665,10 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl 
*Global) {
     // Defer until all versions have been semantically checked.
     if (FD->hasAttr<TargetVersionAttr>() && !FD->isMultiVersion())
       return false;
+    // Defer emission of SYCL kernel entry point functions during device
+    // compilation.
+    if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelEntryPointAttr>())
+      return false;
   }
   if (const auto *VD = dyn_cast<VarDecl>(Global)) {
     if (Context.getInlineVariableDefinitionKind(VD) ==

diff  --git a/clang/lib/CodeGen/CodeGenModule.h 
b/clang/lib/CodeGen/CodeGenModule.h
index 46de3d868f901..9a0bc675e0baa 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1972,6 +1972,11 @@ class CodeGenModule : public CodeGenTypeCache {
   /// .gcda files in a way that persists in .bc files.
   void EmitCoverageFile();
 
+  /// Given a sycl_kernel_entry_point attributed function, emit the
+  /// corresponding SYCL kernel caller offload entry point function.
+  void EmitSYCLKernelCaller(const FunctionDecl *KernelEntryPointFn,
+                            ASTContext &Ctx);
+
   /// Determine whether the definition must be emitted; if this returns \c
   /// false, the definition can be emitted lazily if it's used.
   bool MustBeEmitted(const ValueDecl *D);

diff  --git a/clang/lib/CodeGen/CodeGenSYCL.cpp 
b/clang/lib/CodeGen/CodeGenSYCL.cpp
new file mode 100644
index 0000000000000..b9a96fe8ab838
--- /dev/null
+++ b/clang/lib/CodeGen/CodeGenSYCL.cpp
@@ -0,0 +1,72 @@
+//===--------- CodeGenSYCL.cpp - Code for SYCL kernel generation 
----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code required for generation of SYCL kernel caller offload
+// entry point functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenFunction.h"
+#include "CodeGenModule.h"
+
+using namespace clang;
+using namespace CodeGen;
+
+static void SetSYCLKernelAttributes(llvm::Function *Fn, CodeGenFunction &CGF) {
+  // SYCL 2020 device language restrictions require forward progress and
+  // disallow recursion.
+  Fn->setDoesNotRecurse();
+  if (CGF.checkIfFunctionMustProgress())
+    Fn->addFnAttr(llvm::Attribute::MustProgress);
+}
+
+void CodeGenModule::EmitSYCLKernelCaller(const FunctionDecl 
*KernelEntryPointFn,
+                                         ASTContext &Ctx) {
+  assert(Ctx.getLangOpts().SYCLIsDevice &&
+         "SYCL kernel caller offload entry point functions can only be emitted"
+         " during device compilation");
+
+  const auto *KernelEntryPointAttr =
+      KernelEntryPointFn->getAttr<SYCLKernelEntryPointAttr>();
+  assert(KernelEntryPointAttr && "Missing sycl_kernel_entry_point attribute");
+  assert(!KernelEntryPointAttr->isInvalidAttr() &&
+         "sycl_kernel_entry_point attribute is invalid");
+
+  // Find the SYCLKernelCallStmt.
+  SYCLKernelCallStmt *KernelCallStmt =
+      cast<SYCLKernelCallStmt>(KernelEntryPointFn->getBody());
+
+  // Retrieve the SYCL kernel caller parameters from the OutlinedFunctionDecl.
+  FunctionArgList Args;
+  const OutlinedFunctionDecl *OutlinedFnDecl =
+      KernelCallStmt->getOutlinedFunctionDecl();
+  Args.append(OutlinedFnDecl->param_begin(), OutlinedFnDecl->param_end());
+
+  // Compute the function info and LLVM function type.
+  const CGFunctionInfo &FnInfo =
+      getTypes().arrangeSYCLKernelCallerDeclaration(Ctx.VoidTy, Args);
+  llvm::FunctionType *FnTy = getTypes().GetFunctionType(FnInfo);
+
+  // Retrieve the generated name for the SYCL kernel caller function.
+  CanQualType KernelNameType =
+      Ctx.getCanonicalType(KernelEntryPointAttr->getKernelName());
+  const SYCLKernelInfo &KernelInfo = Ctx.getSYCLKernelInfo(KernelNameType);
+  auto *Fn = llvm::Function::Create(FnTy, llvm::Function::ExternalLinkage,
+                                    KernelInfo.GetKernelName(), &getModule());
+
+  // Emit the SYCL kernel caller function.
+  CodeGenFunction CGF(*this);
+  SetLLVMFunctionAttributes(GlobalDecl(), FnInfo, Fn, false);
+  SetSYCLKernelAttributes(Fn, CGF);
+  CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, FnInfo, Args,
+                    SourceLocation(), SourceLocation());
+  CGF.EmitFunctionBody(OutlinedFnDecl->getBody());
+  setDSOLocal(Fn);
+  SetLLVMFunctionAttributesForDefinition(cast<Decl>(OutlinedFnDecl), Fn);
+  CGF.FinishFunction();
+}

diff  --git a/clang/lib/CodeGen/CodeGenTypes.h 
b/clang/lib/CodeGen/CodeGenTypes.h
index 307048bcc510d..29f6f1ec80bc3 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -229,6 +229,13 @@ class CodeGenTypes {
   const CGFunctionInfo &arrangeBuiltinFunctionCall(QualType resultType,
                                                    const CallArgList &args);
 
+  /// A SYCL kernel caller function is an offload device entry point function
+  /// with a target device dependent calling convention such as amdgpu_kernel,
+  /// ptx_kernel, or spir_kernel.
+  const CGFunctionInfo &
+  arrangeSYCLKernelCallerDeclaration(QualType resultType,
+                                     const FunctionArgList &args);
+
   /// Objective-C methods are C functions with some implicit parameters.
   const CGFunctionInfo &arrangeObjCMethodDeclaration(const ObjCMethodDecl *MD);
   const CGFunctionInfo &arrangeObjCMessageSendSignature(const ObjCMethodDecl 
*MD,

diff  --git a/clang/lib/CodeGen/Targets/NVPTX.cpp 
b/clang/lib/CodeGen/Targets/NVPTX.cpp
index f617e645a9eaf..25ab28c54b659 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -77,6 +77,10 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
     return true;
   }
 
+  unsigned getOpenCLKernelCallingConv() const override {
+    return llvm::CallingConv::PTX_Kernel;
+  }
+
   // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
   // resulting MDNode to the nvvm.annotations MDNode.
   static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,

diff  --git a/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp 
b/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp
new file mode 100644
index 0000000000000..195f1d9d26d7d
--- /dev/null
+++ b/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp
@@ -0,0 +1,184 @@
+// RUN: %clang_cc1 -fsycl-is-host -emit-llvm -triple x86_64-unknown-linux-gnu 
-std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-HOST,CHECK-HOST-LINUX %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple 
x86_64-unknown-linux-gnu -triple amdgcn-amd-amdhsa -std=c++17 %s -o - | 
FileCheck --check-prefixes=CHECK-DEVICE,CHECK-AMDGCN %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple 
x86_64-unknown-linux-gnu -triple nvptx-nvidia-cuda -std=c++17 %s -o - | 
FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple 
x86_64-unknown-linux-gnu -triple nvptx64-nvidia-cuda -std=c++17 %s -o - | 
FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple 
x86_64-unknown-linux-gnu -triple spir-unknown-unknown -std=c++17 %s -o - | 
FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple 
x86_64-unknown-linux-gnu -triple spir64-unknown-unknown -std=c++17 %s -o - | 
FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple 
x86_64-unknown-linux-gnu -triple spirv32-unknown-unknown -std=c++17 %s -o - | 
FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple 
x86_64-unknown-linux-gnu -triple spirv64-unknown-unknown -std=c++17 %s -o - | 
FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
+// RUN: %clang_cc1 -fsycl-is-host -emit-llvm -triple x86_64-pc-windows-msvc 
-std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-HOST,CHECK-HOST-WINDOWS %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple 
x86_64-pc-windows-msvc -triple amdgcn-amd-amdhsa -std=c++17 %s -o - | FileCheck 
--check-prefixes=CHECK-DEVICE,CHECK-AMDGCN %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple 
x86_64-pc-windows-msvc -triple nvptx-nvidia-cuda -std=c++17 %s -o - | FileCheck 
--check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple 
x86_64-pc-windows-msvc -triple nvptx64-nvidia-cuda -std=c++17 %s -o - | 
FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple 
x86_64-pc-windows-msvc -triple spir-unknown-unknown -std=c++17 %s -o - | 
FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple 
x86_64-pc-windows-msvc -triple spir64-unknown-unknown -std=c++17 %s -o - | 
FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple 
x86_64-pc-windows-msvc -triple spirv32-unknown-unknown -std=c++17 %s -o - | 
FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple 
x86_64-pc-windows-msvc -triple spirv64-unknown-unknown -std=c++17 %s -o - | 
FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
+
+// Test the generation of SYCL kernel caller functions. These functions are
+// generated from functions declared with the sycl_kernel_entry_point attribute
+// and emited during device compilation. They are not emitted during device
+// compilation.
+
+struct single_purpose_kernel_name;
+struct single_purpose_kernel {
+  void operator()() const {}
+};
+
+[[clang::sycl_kernel_entry_point(single_purpose_kernel_name)]]
+void single_purpose_kernel_task(single_purpose_kernel kernelFunc) {
+  kernelFunc();
+}
+
+template <typename KernelName, typename KernelType>
+[[clang::sycl_kernel_entry_point(KernelName)]]
+void kernel_single_task(KernelType kernelFunc) {
+  kernelFunc(42);
+}
+
+int main() {
+  single_purpose_kernel obj;
+  single_purpose_kernel_task(obj);
+  int capture;
+  auto lambda = [=](auto) { (void) capture; };
+  kernel_single_task<decltype(lambda)>(lambda);
+}
+
+// Verify that SYCL kernel caller functions are not emitted during host
+// compilation.
+//
+// CHECK-HOST-NOT: _ZTS26single_purpose_kernel_name
+// CHECK-HOST-NOT: _ZTSZ4mainE18lambda_kernel_name
+
+// Verify that sycl_kernel_entry_point attributed functions are not emitted
+// during device compilation.
+//
+// CHECK-DEVICE-NOT: single_purpose_kernel_task
+// CHECK-DEVICE-NOT: kernel_single_task
+
+// Verify that no code is generated for the bodies of sycl_kernel_entry_point
+// attributed functions during host compilation. ODR-use of these functions may
+// require them to be emitted, but they have no effect if called.
+//
+// CHECK-HOST-LINUX:      define dso_local void 
@_Z26single_purpose_kernel_task21single_purpose_kernel() #{{[0-9]+}} {
+// CHECK-HOST-LINUX-NEXT: entry:
+// CHECK-HOST-LINUX-NEXT:   %kernelFunc = alloca 
%struct.single_purpose_kernel, align 1
+// CHECK-HOST-LINUX-NEXT:   ret void
+// CHECK-HOST-LINUX-NEXT: }
+//
+// CHECK-HOST-LINUX:      define internal void 
@_Z18kernel_single_taskIZ4mainEUlT_E_S1_EvT0_(i32 %kernelFunc.coerce) 
#{{[0-9]+}} {
+// CHECK-HOST-LINUX-NEXT: entry:
+// CHECK-HOST-LINUX-NEXT:   %kernelFunc = alloca %class.anon, align 4
+// CHECK-HOST-LINUX-NEXT:   %coerce.dive = getelementptr inbounds nuw 
%class.anon, ptr %kernelFunc, i32 0, i32 0
+// CHECK-HOST-LINUX-NEXT:   store i32 %kernelFunc.coerce, ptr %coerce.dive, 
align 4
+// CHECK-HOST-LINUX-NEXT:   ret void
+// CHECK-HOST-LINUX-NEXT: }
+//
+// CHECK-HOST-WINDOWS:      define dso_local void 
@"?single_purpose_kernel_task@@YAXUsingle_purpose_kernel@@@Z"(i8 
%kernelFunc.coerce) #{{[0-9]+}} {
+// CHECK-HOST-WINDOWS-NEXT: entry:
+// CHECK-HOST-WINDOWS-NEXT:   %kernelFunc = alloca 
%struct.single_purpose_kernel, align 1
+// CHECK-HOST-WINDOWS-NEXT:   %coerce.dive = getelementptr inbounds nuw 
%struct.single_purpose_kernel, ptr %kernelFunc, i32 0, i32 0
+// CHECK-HOST-WINDOWS-NEXT:   store i8 %kernelFunc.coerce, ptr %coerce.dive, 
align 1
+// CHECK-HOST-WINDOWS-NEXT:   ret void
+// CHECK-HOST-WINDOWS-NEXT: }
+//
+// CHECK-HOST-WINDOWS:      define internal void 
@"??$kernel_single_task@V<lambda_1>@?0??main@@9@V1?0??2@9@@@YAXV<lambda_1>@?0??main@@9@@Z"(i32
 %kernelFunc.coerce) #{{[0-9]+}} {
+// CHECK-HOST-WINDOWS-NEXT: entry:
+// CHECK-HOST-WINDOWS-NEXT:   %kernelFunc = alloca %class.anon, align 4
+// CHECK-HOST-WINDOWS-NEXT:   %coerce.dive = getelementptr inbounds nuw 
%class.anon, ptr %kernelFunc, i32 0, i32 0
+// CHECK-HOST-WINDOWS-NEXT:   store i32 %kernelFunc.coerce, ptr %coerce.dive, 
align 4
+// CHECK-HOST-WINDOWS-NEXT:   ret void
+// CHECK-HOST-WINDOWS-NEXT: }
+
+// Verify that SYCL kernel caller functions are emitted for each device target.
+//
+// FIXME: The following set of matches are used to skip over the declaration of
+// main(). main() shouldn't be emitted in device code, but that pruning isn't
+// performed yet.
+// CHECK-DEVICE:      Function Attrs: convergent mustprogress noinline 
norecurse nounwind optnone
+// CHECK-DEVICE-NEXT: define {{[a-z_ ]*}}noundef i32 @main() #0
+
+// IR for the SYCL kernel caller function generated for
+// single_purpose_kernel_task with single_purpose_kernel_name as the SYCL 
kernel
+// name type.
+//
+// CHECK-AMDGCN:      Function Attrs: convergent mustprogress noinline 
norecurse nounwind optnone
+// CHECK-AMDGCN-NEXT: define dso_local amdgpu_kernel void 
@_ZTS26single_purpose_kernel_name
+// CHECK-AMDGCN-SAME:   (ptr addrspace(4) noundef 
byref(%struct.single_purpose_kernel) align 1 %0) #[[AMDGCN_ATTR0:[0-9]+]] {
+// CHECK-AMDGCN-NEXT: entry:
+// CHECK-AMDGCN-NEXT:   %coerce = alloca %struct.single_purpose_kernel, align 
1, addrspace(5)
+// CHECK-AMDGCN-NEXT:   %kernelFunc = addrspacecast ptr addrspace(5) %coerce 
to ptr
+// CHECK-AMDGCN-NEXT:   call void @llvm.memcpy.p0.p4.i64(ptr align 1 
%kernelFunc, ptr addrspace(4) align 1 %0, i64 1, i1 false)
+// CHECK-AMDGCN-NEXT:   call void @_ZNK21single_purpose_kernelclEv
+// CHECK-AMDGCN-SAME:     (ptr noundef nonnull align 1 dereferenceable(1) 
%kernelFunc) #[[AMDGCN_ATTR1:[0-9]+]]
+// CHECK-AMDGCN-NEXT:   ret void
+// CHECK-AMDGCN-NEXT: }
+// CHECK-AMDGCN:      define linkonce_odr void @_ZNK21single_purpose_kernelclEv
+//
+// CHECK-NVPTX:       Function Attrs: convergent mustprogress noinline 
norecurse nounwind optnone
+// CHECK-NVPTX-NEXT:  define dso_local ptx_kernel void 
@_ZTS26single_purpose_kernel_name
+// CHECK-NVPTX-SAME:    (ptr noundef byval(%struct.single_purpose_kernel) 
align 1 %kernelFunc) #[[NVPTX_ATTR0:[0-9]+]] {
+// CHECK-NVPTX-NEXT:  entry:
+// CHECK-NVPTX-NEXT:    call void @_ZNK21single_purpose_kernelclEv
+// CHECK-NVPTX-SAME:      (ptr noundef nonnull align 1 dereferenceable(1) 
%kernelFunc) #[[NVPTX_ATTR1:[0-9]+]]
+// CHECK-NVPTX-NEXT:    ret void
+// CHECK-NVPTX-NEXT:  }
+// CHECK-NVPTX:       define linkonce_odr void @_ZNK21single_purpose_kernelclEv
+//
+// CHECK-SPIR:        Function Attrs: convergent mustprogress noinline 
norecurse nounwind optnone
+// CHECK-SPIR-NEXT:   define {{[a-z_ ]*}}spir_kernel void 
@_ZTS26single_purpose_kernel_name
+// CHECK-SPIR-SAME:     (ptr noundef byval(%struct.single_purpose_kernel) 
align 1 %kernelFunc) #[[SPIR_ATTR0:[0-9]+]] {
+// CHECK-SPIR-NEXT:   entry:
+// CHECK-SPIR-NEXT:     %kernelFunc.ascast = addrspacecast ptr %kernelFunc to 
ptr addrspace(4)
+// CHECK-SPIR-NEXT:     call spir_func void @_ZNK21single_purpose_kernelclEv
+// CHECK-SPIR-SAME:       (ptr addrspace(4) noundef align 1 
dereferenceable_or_null(1) %kernelFunc.ascast) #[[SPIR_ATTR1:[0-9]+]]
+// CHECK-SPIR-NEXT:     ret void
+// CHECK-SPIR-NEXT:   }
+// CHECK-SPIR:        define linkonce_odr spir_func void 
@_ZNK21single_purpose_kernelclEv
+
+// IR for the SYCL kernel caller function generated for kernel_single_task with
+// lambda_kernel_name as the SYCL kernel name type.
+//
+// CHECK-AMDGCN:      Function Attrs: convergent mustprogress noinline 
norecurse nounwind optnone
+// CHECK-AMDGCN-NEXT: define dso_local amdgpu_kernel void @_ZTSZ4mainEUlT_E_
+// CHECK-AMDGCN-SAME:   (i32 %kernelFunc.coerce) #[[AMDGCN_ATTR0]] {
+// CHECK-AMDGCN-NEXT: entry:
+// CHECK-AMDGCN-NEXT:   %kernelFunc = alloca %class.anon, align 4, addrspace(5)
+// CHECK-AMDGCN-NEXT:   %kernelFunc1 = addrspacecast ptr addrspace(5) 
%kernelFunc to ptr
+// CHECK-AMDGCN-NEXT:   %coerce.dive = getelementptr inbounds nuw %class.anon, 
ptr %kernelFunc1, i32 0, i32 0
+// CHECK-AMDGCN-NEXT:   store i32 %kernelFunc.coerce, ptr %coerce.dive, align 4
+// CHECK-AMDGCN-NEXT:   call void @_ZZ4mainENKUlT_E_clIiEEDaS_
+// CHECK-AMDGCN-SAME:     (ptr noundef nonnull align 4 dereferenceable(4) 
%kernelFunc1, i32 noundef 42) #[[AMDGCN_ATTR1]]
+// CHECK-AMDGCN-NEXT:   ret void
+// CHECK-AMDGCN-NEXT: }
+// CHECK-AMDGCN:      define internal void @_ZZ4mainENKUlT_E_clIiEEDaS_
+//
+// CHECK-NVPTX:       Function Attrs: convergent mustprogress noinline 
norecurse nounwind optnone
+// CHECK-NVPTX-NEXT:  define dso_local ptx_kernel void @_ZTSZ4mainEUlT_E_
+// CHECK-NVPTX-SAME:    (ptr noundef byval(%class.anon) align 4 %kernelFunc) 
#[[NVPTX_ATTR0]] {
+// CHECK-NVPTX-NEXT:  entry:
+// CHECK-NVPTX-NEXT:    call void @_ZZ4mainENKUlT_E_clIiEEDaS_
+// CHECK-NVPTX-SAME:      (ptr noundef nonnull align 4 dereferenceable(4) 
%kernelFunc, i32 noundef 42) #[[NVPTX_ATTR1]]
+// CHECK-NVPTX-NEXT:    ret void
+// CHECK-NVPTX-NEXT:  }
+// CHECK-NVPTX:       define internal void @_ZZ4mainENKUlT_E_clIiEEDaS_
+//
+// CHECK-SPIR:        Function Attrs: convergent mustprogress noinline 
norecurse nounwind optnone
+// CHECK-SPIR-NEXT:   define {{[a-z_ ]*}}spir_kernel void @_ZTSZ4mainEUlT_E_
+// CHECK-SPIR-SAME:     (ptr noundef byval(%class.anon) align 4 %kernelFunc) 
#[[SPIR_ATTR0]] {
+// CHECK-SPIR-NEXT:   entry:
+// CHECK-SPIR-NEXT:     %kernelFunc.ascast = addrspacecast ptr %kernelFunc to 
ptr addrspace(4)
+// CHECK-SPIR-NEXT:     call spir_func void @_ZZ4mainENKUlT_E_clIiEEDaS_
+// CHECK-SPIR-SAME:       (ptr addrspace(4) noundef align 4 
dereferenceable_or_null(4) %kernelFunc.ascast, i32 noundef 42) #[[SPIR_ATTR1]]
+// CHECK-SPIR-NEXT:     ret void
+// CHECK-SPIR-NEXT:   }
+// CHECK-SPIR:        define internal spir_func void 
@_ZZ4mainENKUlT_E_clIiEEDaS_
+
+// CHECK-AMDGCN: #[[AMDGCN_ATTR0]] = { convergent mustprogress noinline 
norecurse nounwind optnone "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" }
+// CHECK-AMDGCN: #[[AMDGCN_ATTR1]] = { convergent nounwind }
+//
+// CHECK-NVPTX: #[[NVPTX_ATTR0]] = { convergent mustprogress noinline 
norecurse nounwind optnone "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" "target-features"="+ptx32" }
+// CHECK-NVPTX: #[[NVPTX_ATTR1]] = { convergent nounwind }
+//
+// CHECK-SPIR: #[[SPIR_ATTR0]] = { convergent mustprogress noinline norecurse 
nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK-SPIR: #[[SPIR_ATTR1]] = { convergent nounwind }


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to