Author: svenvh Date: Tue Oct 2 06:02:24 2018 New Revision: 343582 URL: http://llvm.org/viewvc/llvm-project?rev=343582&view=rev Log: Revert r326937 "[OpenCL] Remove block invoke function from emitted block literal struct"
This reverts r326937 as it broke block argument handling in OpenCL. See the discussion on https://reviews.llvm.org/D43783 . The next commit will add a test case that revealed the issue. Modified: cfe/trunk/lib/CodeGen/CGBlocks.cpp cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl cfe/trunk/test/CodeGenOpenCL/blocks.cl cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl cfe/trunk/test/SemaOpenCL/block-array-capturing.cl Modified: cfe/trunk/lib/CodeGen/CGBlocks.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBlocks.cpp?rev=343582&r1=343581&r2=343582&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGBlocks.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBlocks.cpp Tue Oct 2 06:02:24 2018 @@ -446,12 +446,25 @@ static void initializeForBlockHeader(Cod assert(elementTypes.empty()); if (CGM.getLangOpts().OpenCL) { - // The header is basically 'struct { int; int; + // The header is basically 'struct { int; int; generic void *; // custom_fields; }'. Assert that struct is packed. + auto GenericAS = + CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic); + auto GenPtrAlign = + CharUnits::fromQuantity(CGM.getTarget().getPointerAlign(GenericAS) / 8); + auto GenPtrSize = + CharUnits::fromQuantity(CGM.getTarget().getPointerWidth(GenericAS) / 8); + assert(CGM.getIntSize() <= GenPtrSize); + assert(CGM.getIntAlign() <= GenPtrAlign); + assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign)); elementTypes.push_back(CGM.IntTy); /* total size */ elementTypes.push_back(CGM.IntTy); /* align */ - unsigned Offset = 2 * CGM.getIntSize().getQuantity(); - unsigned BlockAlign = CGM.getIntAlign().getQuantity(); + elementTypes.push_back( + CGM.getOpenCLRuntime() + .getGenericVoidPointerType()); /* invoke function */ + unsigned Offset = + 2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity(); + unsigned BlockAlign = GenPtrAlign.getQuantity(); if (auto *Helper = CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ { @@ -906,12 +919,20 @@ llvm::Value *CodeGenFunction::EmitBlockL llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL; + auto GenVoidPtrTy = + IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy; + LangAS GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default; + auto GenVoidPtrSize = CharUnits::fromQuantity( + CGM.getTarget().getPointerWidth( + CGM.getContext().getTargetAddressSpace(GenVoidPtrAddr)) / + 8); // Using the computed layout, generate the actual block function. bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda(); CodeGenFunction BlockCGF{CGM, true}; BlockCGF.SanOpts = SanOpts; auto *InvokeFn = BlockCGF.GenerateBlockFunction( CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal); + auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy); // If there is nothing to capture, we can emit this as a global block. if (blockInfo.CanBeGlobal) @@ -987,12 +1008,11 @@ llvm::Value *CodeGenFunction::EmitBlockL llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()), getIntSize(), "block.align"); } - if (!IsOpenCL) { - addHeaderField(llvm::ConstantExpr::getBitCast(InvokeFn, VoidPtrTy), - getPointerSize(), "block.invoke"); + addHeaderField(blockFn, GenVoidPtrSize, "block.invoke"); + if (!IsOpenCL) addHeaderField(descriptor, getPointerSize(), "block.descriptor"); - } else if (auto *Helper = - CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { + else if (auto *Helper = + CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { for (auto I : Helper->getCustomFieldValues(*this, blockInfo)) { addHeaderField( I.first, @@ -1196,23 +1216,38 @@ llvm::Type *CodeGenModule::getBlockDescr } llvm::Type *CodeGenModule::getGenericBlockLiteralType() { - assert(!getLangOpts().OpenCL && "OpenCL does not need this"); - if (GenericBlockLiteralType) return GenericBlockLiteralType; llvm::Type *BlockDescPtrTy = getBlockDescriptorType(); - // struct __block_literal_generic { - // void *__isa; - // int __flags; - // int __reserved; - // void (*__invoke)(void *); - // struct __block_descriptor *__descriptor; - // }; - GenericBlockLiteralType = - llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy, - IntTy, IntTy, VoidPtrTy, BlockDescPtrTy); + if (getLangOpts().OpenCL) { + // struct __opencl_block_literal_generic { + // int __size; + // int __align; + // __generic void *__invoke; + // /* custom fields */ + // }; + SmallVector<llvm::Type *, 8> StructFields( + {IntTy, IntTy, getOpenCLRuntime().getGenericVoidPointerType()}); + if (auto *Helper = getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { + for (auto I : Helper->getCustomFieldTypes()) + StructFields.push_back(I); + } + GenericBlockLiteralType = llvm::StructType::create( + StructFields, "struct.__opencl_block_literal_generic"); + } else { + // struct __block_literal_generic { + // void *__isa; + // int __flags; + // int __reserved; + // void (*__invoke)(void *); + // struct __block_descriptor *__descriptor; + // }; + GenericBlockLiteralType = + llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy, + IntTy, IntTy, VoidPtrTy, BlockDescPtrTy); + } return GenericBlockLiteralType; } @@ -1223,21 +1258,27 @@ RValue CodeGenFunction::EmitBlockCallExp E->getCallee()->getType()->getAs<BlockPointerType>(); llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee()); - llvm::Value *FuncPtr = nullptr; - if (!CGM.getLangOpts().OpenCL) { - // Get a pointer to the generic block literal. - llvm::Type *BlockLiteralTy = - llvm::PointerType::get(CGM.getGenericBlockLiteralType(), 0); - - // Bitcast the callee to a block literal. - BlockPtr = - Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); - - // Get the function pointer from the literal. - FuncPtr = - Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3); - } + // Get a pointer to the generic block literal. + // For OpenCL we generate generic AS void ptr to be able to reuse the same + // block definition for blocks with captures generated as private AS local + // variables and without captures generated as global AS program scope + // variables. + unsigned AddrSpace = 0; + if (getLangOpts().OpenCL) + AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic); + + llvm::Type *BlockLiteralTy = + llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace); + + // Bitcast the callee to a block literal. + BlockPtr = + Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); + + // Get the function pointer from the literal. + llvm::Value *FuncPtr = + Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, + CGM.getLangOpts().OpenCL ? 2 : 3); // Add the block literal. CallArgList Args; @@ -1260,11 +1301,7 @@ RValue CodeGenFunction::EmitBlockCallExp EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); // Load the function. - llvm::Value *Func; - if (CGM.getLangOpts().OpenCL) - Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee()); - else - Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); + llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); const FunctionType *FuncTy = FnType->castAs<FunctionType>(); const CGFunctionInfo &FnInfo = @@ -1380,14 +1417,14 @@ static llvm::Constant *buildGlobalBlock( // Reserved fields.addInt(CGM.IntTy, 0); - - // Function - fields.add(blockFn); } else { fields.addInt(CGM.IntTy, blockInfo.BlockSize.getQuantity()); fields.addInt(CGM.IntTy, blockInfo.BlockAlign.getQuantity()); } + // Function + fields.add(blockFn); + if (!IsOpenCL) { // Descriptor fields.add(buildBlockDescriptor(CGM, blockInfo)); Modified: cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp?rev=343582&r1=343581&r2=343582&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp (original) +++ cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp Tue Oct 2 06:02:24 2018 @@ -118,25 +118,6 @@ llvm::PointerType *CGOpenCLRuntime::getG CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); } -// Get the block literal from an expression derived from the block expression. -// OpenCL v2.0 s6.12.5: -// Block variable declarations are implicitly qualified with const. Therefore -// all block variables must be initialized at declaration time and may not be -// reassigned. -static const BlockExpr *getBlockExpr(const Expr *E) { - if (auto Cast = dyn_cast<CastExpr>(E)) { - E = Cast->getSubExpr(); - } - if (auto DR = dyn_cast<DeclRefExpr>(E)) { - E = cast<VarDecl>(DR->getDecl())->getInit(); - } - E = E->IgnoreImplicit(); - if (auto Cast = dyn_cast<CastExpr>(E)) { - E = Cast->getSubExpr(); - } - return cast<BlockExpr>(E); -} - /// Record emitted llvm invoke function and llvm block literal for the /// corresponding block expression. void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, @@ -151,15 +132,21 @@ void CGOpenCLRuntime::recordBlockInfo(co EnqueuedBlockMap[E].Kernel = nullptr; } -llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) { - return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc; -} - CGOpenCLRuntime::EnqueuedBlockInfo CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) { CGF.EmitScalarExpr(E); - const BlockExpr *Block = getBlockExpr(E); + // The block literal may be assigned to a const variable. Chasing down + // to get the block literal. + if (auto DR = dyn_cast<DeclRefExpr>(E)) { + E = cast<VarDecl>(DR->getDecl())->getInit(); + } + E = E->IgnoreImplicit(); + if (auto Cast = dyn_cast<CastExpr>(E)) { + E = Cast->getSubExpr(); + } + auto *Block = cast<BlockExpr>(E); + assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() && "Block expression not emitted"); Modified: cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h?rev=343582&r1=343581&r2=343582&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h (original) +++ cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h Tue Oct 2 06:02:24 2018 @@ -91,10 +91,6 @@ public: /// \param Block block literal emitted for the block expression. void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF, llvm::Value *Block); - - /// \return LLVM block invoke function emitted for an expression derived from - /// the block expression. - llvm::Function *getInvokeFunction(const Expr *E); }; } Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl?rev=343582&r1=343581&r2=343582&view=diff ============================================================================== --- cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl Tue Oct 2 06:02:24 2018 @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -emit-llvm -o - -triple amdgcn | FileCheck %s +// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -emit-llvm -o - -triple amdgcn | FileCheck %s --check-prefix=CHECK typedef struct {int a;} ndrange_t; @@ -36,23 +36,23 @@ kernel void test(global char *a, char b, enqueue_kernel(default_queue, flags, ndrange, block); } -// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(1)*, i8 }>) +// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i8 }>) // CHECK-SAME: #[[ATTR:[0-9]+]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}} // CHECK: entry: -// CHECK: %1 = alloca <{ i32, i32, i8 addrspace(1)*, i8 }>, align 8, addrspace(5) -// CHECK: store <{ i32, i32, i8 addrspace(1)*, i8 }> %0, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %1, align 8 -// CHECK: %2 = addrspacecast <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %1 to i8* +// CHECK: %1 = alloca <{ i32, i32, i8*, i8 addrspace(1)*, i8 }>, align 8, addrspace(5) +// CHECK: store <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> %0, <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> addrspace(5)* %1, align 8 +// CHECK: %2 = addrspacecast <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> addrspace(5)* %1 to i8* // CHECK: call void @__test_block_invoke(i8* %2) // CHECK: ret void // CHECK:} -// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>) +// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>) // CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}} -// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(<{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, i8 addrspace(3)*) +// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, i8 addrspace(3)*) // CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}} -// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel(<{ i32, i32, i64, i64 addrspace(1)* }>) +// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel(<{ i32, i32, i8*, i64, i64 addrspace(1)* }>) // CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}} // CHECK: attributes #[[ATTR]] = { nounwind "enqueued-block" } Modified: cfe/trunk/test/CodeGenOpenCL/blocks.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/blocks.cl?rev=343582&r1=343581&r2=343582&view=diff ============================================================================== --- cfe/trunk/test/CodeGenOpenCL/blocks.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/blocks.cl Tue Oct 2 06:02:24 2018 @@ -3,7 +3,10 @@ // RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -debug-info-kind=limited -triple spir-unknown-unknown | FileCheck -check-prefixes=CHECK-DEBUG %s // RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -debug-info-kind=limited -triple amdgcn-amd-amdhsa | FileCheck -check-prefixes=CHECK-DEBUG %s -// COMMON: @__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 } +// SPIR: %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } +// AMDGCN: %struct.__opencl_block_literal_generic = type { i32, i32, i8* } +// SPIR: @__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @block_A_block_invoke to i8*) to i8 addrspace(4)*) } +// AMDGCN: @__block_literal_global = internal addrspace(1) constant { i32, i32, i8* } { i32 16, i32 8, i8* bitcast (void (i8*, i8 addrspace(3)*)* @block_A_block_invoke to i8*) } // COMMON-NOT: .str // SPIR-LABEL: define internal {{.*}}void @block_A_block_invoke(i8 addrspace(4)* %.block_descriptor, i8 addrspace(3)* %a) @@ -19,32 +22,44 @@ void foo(){ // COMMON-NOT: %block.flags // COMMON-NOT: %block.reserved // COMMON-NOT: %block.descriptor - // SPIR: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block:.*]], i32 0, i32 0 - // AMDGCN: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 0 - // SPIR: store i32 12, i32* %[[block_size]] - // AMDGCN: store i32 12, i32 addrspace(5)* %[[block_size]] - // SPIR: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block]], i32 0, i32 1 - // AMDGCN: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(5)* %[[block]], i32 0, i32 1 + // SPIR: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %block, i32 0, i32 0 + // AMDGCN: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %block, i32 0, i32 0 + // SPIR: store i32 16, i32* %[[block_size]] + // AMDGCN: store i32 20, i32 addrspace(5)* %[[block_size]] + // SPIR: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %block, i32 0, i32 1 + // AMDGCN: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %block, i32 0, i32 1 // SPIR: store i32 4, i32* %[[block_align]] - // AMDGCN: store i32 4, i32 addrspace(5)* %[[block_align]] - // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block]], i32 0, i32 2 + // AMDGCN: store i32 8, i32 addrspace(5)* %[[block_align]] + // SPIR: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block:.*]], i32 0, i32 2 + // SPIR: store i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %[[block_invoke]] + // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3 // SPIR: %[[i_value:.*]] = load i32, i32* %i // SPIR: store i32 %[[i_value]], i32* %[[block_captured]], - // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i32 }>* %[[block]] to i32 ()* + // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()* // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)* // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]], - // SPIR: %[[block_literal:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]] - // SPIR: %[[blk_gen_ptr:.*]] = bitcast i32 () addrspace(4)* %[[block_literal]] to i8 addrspace(4)* - // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]]) - // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(5)* %[[block]], i32 0, i32 2 + // SPIR: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]] + // SPIR: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)* + // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2 + // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)* + // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]] + // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)* + // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]]) + // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2 + // AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]] + // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3 // AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i // AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]], - // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)* + // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)* // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()* // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]], - // AMDGCN: %[[block_literal:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]] - // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast i32 ()* %[[block_literal]] to i8* - // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]]) + // AMDGCN: %[[blk_gen_ptr:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]] + // AMDGCN: %[[block_literal:.*]] = bitcast i32 ()* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic* + // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2 + // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8* + // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]] + // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)* + // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]]) int (^ block_B)(void) = ^{ return i; @@ -53,12 +68,12 @@ void foo(){ } // SPIR-LABEL: define internal {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor) -// SPIR: %[[block:.*]] = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 }> addrspace(4)* -// SPIR: %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(4)* %[[block]], i32 0, i32 2 +// SPIR: %[[block:.*]] = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 }> addrspace(4)* +// SPIR: %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }> addrspace(4)* %[[block]], i32 0, i32 3 // SPIR: %[[block_capture:.*]] = load i32, i32 addrspace(4)* %[[block_capture_addr]] // AMDGCN-LABEL: define internal {{.*}}i32 @__foo_block_invoke(i8* %.block_descriptor) -// AMDGCN: %[[block:.*]] = bitcast i8* %.block_descriptor to <{ i32, i32, i32 }>* -// AMDGCN: %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block]], i32 0, i32 2 +// AMDGCN: %[[block:.*]] = bitcast i8* %.block_descriptor to <{ i32, i32, i8*, i32 }>* +// AMDGCN: %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }>* %[[block]], i32 0, i32 3 // AMDGCN: %[[block_capture:.*]] = load i32, i32* %[[block_capture_addr]] // COMMON-NOT: define{{.*}}@__foo_block_invoke_kernel Modified: cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl?rev=343582&r1=343581&r2=343582&view=diff ============================================================================== --- cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl Tue Oct 2 06:02:24 2018 @@ -7,25 +7,27 @@ typedef void (^bl_t)(local void *); typedef struct {int a;} ndrange_t; +// COMMON: %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } + // For a block global variable, first emit the block literal as a global variable, then emit the block variable itself. -// COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*) +// COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INV_G:@[^ ]+]] to i8*) to i8 addrspace(4)*) } +// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*) // For anonymous blocks without captures, emit block literals as global variable. -// COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG2:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG3:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG4:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG5:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG6:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG7:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG8:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG9:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG10:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG11:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } +// COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG2:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG3:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG4:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG5:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG6:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG7:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG8:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVG8:@[^ ]+]] to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG9:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INVG9:@[^ ]+]] to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG10:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG11:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } // Emits block literal [[BL_GLOBAL]], invoke function [[INV_G]] and global block variable @block_G -// COMMON: define internal spir_func void [[INV_G:.*]](i8 addrspace(4)* %{{.*}}, i8 addrspace(3)* %{{.*}}) +// COMMON: define internal spir_func void [[INV_G]](i8 addrspace(4)* %{{.*}}, i8 addrspace(3)* %{{.*}}) const bl_t block_G = (bl_t) ^ (local void *a) {}; void callee(int id, __global int *out) { @@ -74,8 +76,9 @@ kernel void device_side_enqueue(global i // Emits block literal on stack and block kernel [[INVLK1]]. // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags - // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()* - // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()* + // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke + // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()* + // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()* // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)* // COMMON-LABEL: call i32 @__enqueue_kernel_basic( // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}}, @@ -91,7 +94,8 @@ kernel void device_side_enqueue(global i // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)* // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* - // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()* + // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke + // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()* // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)* // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], @@ -117,7 +121,7 @@ kernel void device_side_enqueue(global i // COMMON-LABEL: call i32 @__enqueue_kernel_varargs( // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, // B32-SAME: i32* %[[TMP]]) // B64-SAME: i64* %[[TMP]]) enqueue_kernel(default_queue, flags, ndrange, @@ -142,7 +146,7 @@ kernel void device_side_enqueue(global i // COMMON-LABEL: call i32 @__enqueue_kernel_varargs( // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, // B32-SAME: i32* %[[TMP]]) // B64-SAME: i64* %[[TMP]]) enqueue_kernel(default_queue, flags, ndrange, @@ -169,7 +173,7 @@ kernel void device_side_enqueue(global i // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]], // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, // B32-SAME: i32* %[[TMP]]) // B64-SAME: i64* %[[TMP]]) enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event, @@ -196,7 +200,7 @@ kernel void device_side_enqueue(global i // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK4:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG4]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG4]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, // B32-SAME: i32* %[[TMP]]) // B64-SAME: i64* %[[TMP]]) enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event, @@ -221,7 +225,7 @@ kernel void device_side_enqueue(global i // COMMON-LABEL: call i32 @__enqueue_kernel_varargs // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK5:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG5]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG5]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, // B32-SAME: i32* %[[TMP]]) // B64-SAME: i64* %[[TMP]]) enqueue_kernel(default_queue, flags, ndrange, @@ -253,7 +257,7 @@ kernel void device_side_enqueue(global i // COMMON-LABEL: call i32 @__enqueue_kernel_varargs // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK6:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG6]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG6]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, // B32-SAME: i32* %[[TMP]]) // B64-SAME: i64* %[[TMP]]) enqueue_kernel(default_queue, flags, ndrange, @@ -277,7 +281,7 @@ kernel void device_side_enqueue(global i // COMMON-LABEL: call i32 @__enqueue_kernel_varargs // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK7:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG7]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG7]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, // B32-SAME: i32* %[[TMP]]) // B64-SAME: i64* %[[TMP]]) enqueue_kernel(default_queue, flags, ndrange, @@ -289,19 +293,21 @@ kernel void device_side_enqueue(global i // Emits global block literal [[BLG8]] and invoke function [[INVG8]]. // The full type of these expressions are long (and repeated elsewhere), so we // capture it as part of the regex for convenience and clarity. - // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A + // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A void (^const block_A)(void) = ^{ return; }; // Emits global block literal [[BLG9]] and invoke function [[INVG9]]. - // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B + // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B void (^const block_B)(local void *) = ^(local void *a) { return; }; // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. - // COMMON: call spir_func void [[INVG8:.*]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) + // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* + // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) block_A(); // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]]. @@ -310,17 +316,19 @@ kernel void device_side_enqueue(global i // COMMON-LABEL: call i32 @__enqueue_kernel_basic( // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) enqueue_kernel(default_queue, flags, ndrange, block_A); // Uses block kernel [[INVGK8]] and global block literal [[BLG8]]. // COMMON: call i32 @__get_kernel_work_group_size_impl( // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) unsigned size = get_kernel_work_group_size(block_A); // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted. - // COMMON: call spir_func void [[INVG8]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) + // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* + // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) block_A(); void (^block_C)(void) = ^{ @@ -328,6 +336,7 @@ kernel void device_side_enqueue(global i }; // Emits block literal on stack and block kernel [[INVLK3]]. + // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)* @@ -337,34 +346,34 @@ kernel void device_side_enqueue(global i // COMMON-SAME: i8 addrspace(4)* [[BL_I8]]) enqueue_kernel(default_queue, flags, ndrange, block_C); - // Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INVG9]]. + // Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INV9]]. // COMMON: call i32 @__get_kernel_work_group_size_impl( // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK9:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG9]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to i8 addrspace(1)*) to i8 addrspace(4)*)) size = get_kernel_work_group_size(block_B); // Uses global block literal [[BLG8]] and block kernel [[INVGK8]]. Make sure no redundant block literal ind invoke functions are emitted. // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl( // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) size = get_kernel_preferred_work_group_size_multiple(block_A); // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]]. // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl( // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*)) size = get_kernel_preferred_work_group_size_multiple(block_G); // Emits global block literal [[BLG10]] and block kernel [[INVGK10]]. // COMMON: call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t* {{[^,]+}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK10:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG10]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG10]] to i8 addrspace(1)*) to i8 addrspace(4)*)) size = get_kernel_max_sub_group_size_for_ndrange(ndrange, ^(){}); // Emits global block literal [[BLG11]] and block kernel [[INVGK11]]. // COMMON: call i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t* {{[^,]+}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK11:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG11]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG11]] to i8 addrspace(1)*) to i8 addrspace(4)*)) size = get_kernel_sub_group_count_for_ndrange(ndrange, ^(){}); } @@ -386,7 +395,7 @@ kernel void device_side_enqueue(global i // COMMON: } // COMMON: define internal spir_kernel void [[INVGK7]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) // COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}}) -// COMMON: define internal spir_func void [[INVG9:.*]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}}) +// COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}}) // COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}}) // COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}}) // COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) Modified: cfe/trunk/test/SemaOpenCL/block-array-capturing.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/block-array-capturing.cl?rev=343582&r1=343581&r2=343582&view=diff ============================================================================== --- cfe/trunk/test/SemaOpenCL/block-array-capturing.cl (original) +++ cfe/trunk/test/SemaOpenCL/block-array-capturing.cl Tue Oct 2 06:02:24 2018 @@ -4,7 +4,7 @@ typedef int (^block_t)(); int block_typedef_kernel(global int* res) { - // CHECK: %{{.*}} = alloca <{ i32, i32, [3 x i32] }> + // CHECK: %{{.*}} = alloca <{ i32, i32, i8 addrspace(4)*, [3 x i32] }> int a[3] = {1, 2, 3}; // CHECK: call void @llvm.memcpy{{.*}} block_t b = ^() { return a[0]; }; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits