This revision was automatically updated to reflect the committed changes.
Closed by commit rL325264: [OpenCL] Fix __enqueue_block for block with captures 
(authored by yaxunl, committed by ).
Herald added subscribers: llvm-commits, nhaehnle.

Changed prior to commit:
  https://reviews.llvm.org/D43240?vs=134041&id=134439#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D43240

Files:
  cfe/trunk/lib/CodeGen/CGBlocks.cpp
  cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp
  cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h
  cfe/trunk/lib/CodeGen/CodeGenFunction.h
  cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
  cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl

Index: cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
===================================================================
--- cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -29,6 +29,10 @@
 // COMMON: define internal spir_func void [[INV_G]](i8 addrspace(4)* %{{.*}}, i8 addrspace(3)* %{{.*}})
 const bl_t block_G = (bl_t) ^ (local void *a) {};
 
+void callee(int id, __global int *out) {
+  out[id] = id;
+}
+
 // COMMON-LABEL: define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %{{.*}}, i32 addrspace(1)* %b, i32 %i)
 kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // COMMON: %default_queue = alloca %opencl.queue_t*
@@ -282,6 +286,21 @@
   // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   block_A();
 
+  void (^block_C)(void) = ^{
+    callee(i, a);
+  };
+
+  // Emits block literal on stack and block kernel [[INVLK3]].
+  // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)*
+  // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
+  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
+  enqueue_kernel(default_queue, flags, ndrange, block_C);
+
   // Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INV9]].
   // COMMON: call i32 @__get_kernel_work_group_size_impl(
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK9:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
@@ -333,6 +352,7 @@
 // COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}})
 // COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
 // COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}})
+// COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}})
 // COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
 // COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
 // COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})
Index: cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
===================================================================
--- cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -2,6 +2,10 @@
 
 typedef struct {int a;} ndrange_t;
 
+void callee(long id, global long *out) {
+  out[id] = id;
+}
+
 // CHECK-LABEL: define amdgpu_kernel void @test
 kernel void test(global char *a, char b, global long *c, long d) {
   queue_t default_queue;
@@ -24,6 +28,12 @@
                  c[0] = d;
                  ((local int*)lp)[0] = 1;
                  }, 100);
+
+  void (^block)(void) = ^{
+    callee(d, c);
+  };
+
+  enqueue_kernel(default_queue, flags, ndrange, block);
 }
 
 // CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i8 }>)
@@ -42,4 +52,7 @@
 // CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, i8 addrspace(3)*)
 // CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
 
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel(<{ i32, i32, i8*, i64, i64 addrspace(1)* }>)
+// CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
+
 // CHECK: attributes #[[ATTR]] = { nounwind "enqueued-block" }
Index: cfe/trunk/lib/CodeGen/CodeGenFunction.h
===================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h
@@ -1583,10 +1583,7 @@
   /// \return an LLVM value which is a pointer to a struct which contains
   /// information about the block, including the block invoke function, the
   /// captured variables, etc.
-  /// \param InvokeF will contain the block invoke function if it is not
-  /// nullptr.
-  llvm::Value *EmitBlockLiteral(const BlockExpr *,
-                                llvm::Function **InvokeF = nullptr);
+  llvm::Value *EmitBlockLiteral(const BlockExpr *);
   static void destroyBlockInfos(CGBlockInfo *info);
 
   llvm::Function *GenerateBlockFunction(GlobalDecl GD,
@@ -3010,11 +3007,8 @@
   LValue EmitOMPSharedLValue(const Expr *E);
 
 private:
-  /// Helpers for blocks. Returns invoke function by \p InvokeF if it is not
-  /// nullptr. It should be called without \p InvokeF if the caller does not
-  /// need invoke function to be returned.
-  llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info,
-                                llvm::Function **InvokeF = nullptr);
+  /// Helpers for blocks.
+  llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info);
 
   /// struct with the values to be passed to the OpenMP loop-related functions
   struct OMPLoopArguments {
Index: cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp
===================================================================
--- cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp
+++ cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -112,37 +112,51 @@
       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
 }
 
+/// Record emitted llvm invoke function and llvm block literal for the
+/// corresponding block expression.
+void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
+                                      llvm::Function *InvokeF,
+                                      llvm::Value *Block) {
+  assert(EnqueuedBlockMap.find(E) == EnqueuedBlockMap.end() &&
+         "Block expression emitted twice");
+  assert(isa<llvm::Function>(InvokeF) && "Invalid invoke function");
+  assert(Block->getType()->isPointerTy() && "Invalid block literal type");
+  EnqueuedBlockMap[E].InvokeFunc = InvokeF;
+  EnqueuedBlockMap[E].BlockArg = Block;
+  EnqueuedBlockMap[E].Kernel = nullptr;
+}
+
 CGOpenCLRuntime::EnqueuedBlockInfo
 CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
+  CGF.EmitScalarExpr(E);
+
   // The block literal may be assigned to a const variable. Chasing down
   // to get the block literal.
   if (auto DR = dyn_cast<DeclRefExpr>(E)) {
     E = cast<VarDecl>(DR->getDecl())->getInit();
   }
+  E = E->IgnoreImplicit();
   if (auto Cast = dyn_cast<CastExpr>(E)) {
     E = Cast->getSubExpr();
   }
   auto *Block = cast<BlockExpr>(E);
 
-  // The same block literal may be enqueued multiple times. Cache it if
-  // possible.
-  auto Loc = EnqueuedBlockMap.find(Block);
-  if (Loc != EnqueuedBlockMap.end()) {
-    return Loc->second;
+  assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() &&
+         "Block expression not emitted");
+
+  // Do not emit the block wrapper again if it has been emitted.
+  if (EnqueuedBlockMap[Block].Kernel) {
+    return EnqueuedBlockMap[Block];
   }
 
-  // Emit block literal as a common block expression and get the block invoke
-  // function.
-  llvm::Function *Invoke;
-  auto *V = CGF.EmitBlockLiteral(cast<BlockExpr>(Block), &Invoke);
   auto *F = CGF.getTargetHooks().createEnqueuedBlockKernel(
-      CGF, Invoke, V->stripPointerCasts());
+      CGF, EnqueuedBlockMap[Block].InvokeFunc,
+      EnqueuedBlockMap[Block].BlockArg->stripPointerCasts());
 
   // The common part of the post-processing of the kernel goes here.
   F->addFnAttr(llvm::Attribute::NoUnwind);
   F->setCallingConv(
       CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel));
-  EnqueuedBlockInfo Info{F, V};
-  EnqueuedBlockMap[Block] = Info;
-  return Info;
+  EnqueuedBlockMap[Block].Kernel = F;
+  return EnqueuedBlockMap[Block];
 }
Index: cfe/trunk/lib/CodeGen/CGBlocks.cpp
===================================================================
--- cfe/trunk/lib/CodeGen/CGBlocks.cpp
+++ cfe/trunk/lib/CodeGen/CGBlocks.cpp
@@ -740,40 +740,31 @@
 }
 
 /// Emit a block literal expression in the current function.
-llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr,
-                                               llvm::Function **InvokeF) {
+llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) {
   // If the block has no captures, we won't have a pre-computed
   // layout for it.
   if (!blockExpr->getBlockDecl()->hasCaptures()) {
     // The block literal is emitted as a global variable, and the block invoke
     // function has to be extracted from its initializer.
     if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) {
-      if (InvokeF) {
-        auto *GV = cast<llvm::GlobalVariable>(
-            cast<llvm::Constant>(Block)->stripPointerCasts());
-        auto *BlockInit = cast<llvm::ConstantStruct>(GV->getInitializer());
-        *InvokeF = cast<llvm::Function>(
-            BlockInit->getAggregateElement(2)->stripPointerCasts());
-      }
       return Block;
     }
     CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName());
     computeBlockInfo(CGM, this, blockInfo);
     blockInfo.BlockExpression = blockExpr;
-    return EmitBlockLiteral(blockInfo, InvokeF);
+    return EmitBlockLiteral(blockInfo);
   }
 
   // Find the block info for this block and take ownership of it.
   std::unique_ptr<CGBlockInfo> blockInfo;
   blockInfo.reset(findAndRemoveBlockInfo(&FirstBlockInfo,
                                          blockExpr->getBlockDecl()));
 
   blockInfo->BlockExpression = blockExpr;
-  return EmitBlockLiteral(*blockInfo, InvokeF);
+  return EmitBlockLiteral(*blockInfo);
 }
 
-llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
-                                               llvm::Function **InvokeF) {
+llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
   bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL;
   auto GenVoidPtrTy =
       IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy;
@@ -788,8 +779,6 @@
   BlockCGF.SanOpts = SanOpts;
   auto *InvokeFn = BlockCGF.GenerateBlockFunction(
       CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal);
-  if (InvokeF)
-    *InvokeF = InvokeFn;
   auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy);
 
   // If there is nothing to capture, we can emit this as a global block.
@@ -1024,6 +1013,11 @@
   llvm::Value *result = Builder.CreatePointerCast(
       blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType()));
 
+  if (IsOpenCL) {
+    CGM.getOpenCLRuntime().recordBlockInfo(blockInfo.BlockExpression, InvokeFn,
+                                           result);
+  }
+
   return result;
 }
 
@@ -1287,6 +1281,10 @@
   llvm::Constant *Result =
       llvm::ConstantExpr::getPointerCast(literal, RequiredType);
   CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result);
+  if (CGM.getContext().getLangOpts().OpenCL)
+    CGM.getOpenCLRuntime().recordBlockInfo(
+        blockInfo.BlockExpression,
+        cast<llvm::Function>(blockFn->stripPointerCasts()), Result);
   return Result;
 }
 
Index: cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h
===================================================================
--- cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h
+++ cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h
@@ -23,6 +23,7 @@
 
 namespace clang {
 
+class BlockExpr;
 class Expr;
 class VarDecl;
 
@@ -39,8 +40,9 @@
 
   /// Structure for enqueued block information.
   struct EnqueuedBlockInfo {
-    llvm::Function *Kernel; /// Enqueued block kernel.
-    llvm::Value *BlockArg;  /// The first argument to enqueued block kernel.
+    llvm::Function *InvokeFunc; /// Block invoke function.
+    llvm::Function *Kernel;     /// Enqueued block kernel.
+    llvm::Value *BlockArg;      /// The first argument to enqueued block kernel.
   };
   /// Maps block expression to block information.
   llvm::DenseMap<const Expr *, EnqueuedBlockInfo> EnqueuedBlockMap;
@@ -76,6 +78,15 @@
   /// \return enqueued block information for enqueued block.
   EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF,
                                             const Expr *E);
+
+  /// \brief Record invoke function and block literal emitted during normal
+  /// codegen for a block expression. The information is used by
+  /// emitOpenCLEnqueuedBlock to emit wrapper kernel.
+  ///
+  /// \param InvokeF invoke function emitted for the block expression.
+  /// \param Block block literal emitted for the block expression.
+  void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
+                       llvm::Value *Block);
 };
 
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to