mshockwave removed a reviewer: bader. mshockwave updated this revision to Diff 73254. mshockwave added a comment.
@Anastasia Sorry for late responding, I'd just attach a new version of patch that fixes block function use cases as normal lambda functions. But testing code is not included in this revision, we're still working on it. About the questions you asked in the previous comment, I'm going to explain them from another aspect: How will one implement __enqueue_kernel_XXX? It might be classified into two categories: 1. Library implementation like libclc/libclcxx written in OpenCL-C 2. Implement builtins directly in compiler. If we choose the first one, which most of people would do regarding its simplicity and flexibility, and we want to fetch captured variables inside the implementation of __enqueue_kernel_XXX, the possible approach would be: void* block_as_voidptr = (void*)arg_child_kernel; block_literal_ty *block = (block_literal_ty*)block_as_voidptr; block->capA; block->capB; This seems promise, but what exactly `block_literal_ty` looks like? We all know that `block_literal_ty` would look similar to: typedef struct { /* * Fields of block header. * e.g. isa, block_descriptor... */ int capA; int capB; ... } block_literal_ty; But since we're discussing a static type language, the definition of this struct must be known. However, the EXACT appearence of `block_literal_ty` would vary among programs, or even functions. That's the thing cap_copy_helper want to aid. Of course there is another library approach: Keep the child kernel's invoke_function prototype untouched, pass block_literal variable(in void pointer type) as its first function argument. Since instructions for extracting captured variables had been generated during the codegen of invoke_function body. Also, we don't need to tackle any captured variables inside __enqueue_kernel_XXX. However, the OpenCL spec says that global address space is the only address space shared between parent and child kernel; and the block_literal variable itself, is allocated as private(stack) variable in parent kernel. So we need to copy the block_literal variable(not its pointer) into some global space. Nevertheless, OpenCL doesn't allow dynamic-sized memory in global space, so we need to define a block of static size memory, perhaps array, in our library implementation. Here is the place might require global memory management since static size implies potential risk of running out pre-allocated space. Regarding the improvement proposed by us which "flatten" captured variables into invoke_function argument list and block_literal pointer wouldn't be passed as first argument(to invoke_function) anymore. The reason why it doesn't require global memory management is that we can retrieve captured variables with cap_num field and cap_copy_helper routine INSIDE __enqueue_kernel_XXX and passed those captures as arguments to child kernel, rather than saving block_literal variable globally and postpone the retrieving actions until invoke_function, the child kernel body. https://reviews.llvm.org/D24715 Files: lib/CodeGen/CGBlocks.cpp lib/CodeGen/CGBlocks.h lib/CodeGen/CGExpr.cpp lib/CodeGen/CodeGenFunction.cpp lib/CodeGen/CodeGenFunction.h lib/CodeGen/CodeGenModule.h
Index: lib/CodeGen/CodeGenModule.h =================================================================== --- lib/CodeGen/CodeGenModule.h +++ lib/CodeGen/CodeGenModule.h @@ -464,6 +464,7 @@ llvm::Type *BlockDescriptorType = nullptr; llvm::Type *GenericBlockLiteralType = nullptr; + llvm::Type *GenericOCLBlockLiteralType = nullptr; struct { int GlobalUniqueCount; @@ -768,6 +769,8 @@ /// The type of a generic block literal. llvm::Type *getGenericBlockLiteralType(); + llvm::Type *getGenericOCLBlockLiteralType(); + /// Gets the address of a block which requires no captures. llvm::Constant *GetAddrOfGlobalBlock(const BlockExpr *BE, const char *); Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -288,6 +288,7 @@ const CodeGen::CGBlockInfo *BlockInfo; llvm::Value *BlockPointer; + bool IsOCLChildKernelInvoke; llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; FieldDecl *LambdaThisCaptureField; @@ -1341,8 +1342,12 @@ const DeclMapTy &ldm, bool IsLambdaConversionToBlock); + llvm::Constant* GenerateBlockFunctionWrapper(const CGBlockInfo &blockInfo, + const llvm::Function *invokeFunc); + llvm::Constant *GenerateCopyHelperFunction(const CGBlockInfo &blockInfo); llvm::Constant *GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo); + llvm::Constant *GenerateOCLCapturesCopyFunction(const CGBlockInfo &blockInfo); llvm::Constant *GenerateObjCAtomicSetterCopyHelperFunction( const ObjCPropertyImplDecl *PID); llvm::Constant *GenerateObjCAtomicGetterCopyHelperFunction( Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -46,7 +46,8 @@ SanOpts(CGM.getLangOpts().Sanitize), IsSanitizerScope(false), CurFuncIsThunk(false), AutoreleaseResult(false), SawAsmBlock(false), IsOutlinedSEHHelper(false), - BlockInfo(nullptr), BlockPointer(nullptr), + BlockInfo(nullptr), BlockPointer(nullptr), + IsOCLChildKernelInvoke(false), LambdaThisCaptureField(nullptr), NormalCleanupDest(nullptr), NextCleanupDestIndex(1), FirstBlockInfo(nullptr), EHResumeBlock(nullptr), ExceptionSlot(nullptr), EHSelectorSlot(nullptr), Index: lib/CodeGen/CGExpr.cpp =================================================================== --- lib/CodeGen/CGExpr.cpp +++ lib/CodeGen/CGExpr.cpp @@ -2104,7 +2104,9 @@ if (E->refersToEnclosingVariableOrCapture()) { if (auto *FD = LambdaCaptureFields.lookup(VD)) return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue); - else if (CapturedStmtInfo) { + else if (CapturedStmtInfo && + !(CGM.getLangOpts().OpenCL && + CGM.getLangOpts().OpenCLVersion >= 200)) { auto it = LocalDeclMap.find(VD); if (it != LocalDeclMap.end()) { if (auto RefTy = VD->getType()->getAs<ReferenceType>()) { Index: lib/CodeGen/CGBlocks.h =================================================================== --- lib/CodeGen/CGBlocks.h +++ lib/CodeGen/CGBlocks.h @@ -189,6 +189,8 @@ return reinterpret_cast<llvm::Value*>(Data); } + ImplicitParamDecl* FunctionArgDecl; + static Capture makeIndex(unsigned index, CharUnits offset) { Capture v; v.Data = (index << 1) | 1; @@ -200,7 +202,13 @@ Capture v; v.Data = reinterpret_cast<uintptr_t>(value); return v; - } + } + + /* + ~Capture(){ + if(FunctionArgDecl) delete FunctionArgDecl; + } + */ }; /// CanBeGlobal - True if the block can be global, i.e. it has @@ -225,6 +233,8 @@ /// The mapping of allocated indexes within the block. llvm::DenseMap<const VarDecl*, Capture> Captures; + llvm::DenseMap<const ImplicitParamDecl*, llvm::Value*> FunctionArgCaptures; + Address LocalAddress; llvm::StructType *StructureType; const BlockDecl *Block; Index: lib/CodeGen/CGBlocks.cpp =================================================================== --- lib/CodeGen/CGBlocks.cpp +++ lib/CodeGen/CGBlocks.cpp @@ -45,7 +45,8 @@ /// Build the given block as a global block. static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, const CGBlockInfo &blockInfo, - llvm::Constant *blockFn); + llvm::Constant *blockFn, + llvm::Constant *blockInvokeWrapper); /// Build the helper function to copy a block. static llvm::Constant *buildCopyHelper(CodeGenModule &CGM, @@ -309,16 +310,28 @@ assert((2 * CGM.getIntSize()).isMultipleOf(CGM.getPointerAlign())); info.BlockAlign = CGM.getPointerAlign(); - info.BlockSize = 3 * CGM.getPointerSize() + 2 * CGM.getIntSize(); + + bool isOCL2X = (CGM.getLangOpts().OpenCL && + CGM.getLangOpts().OpenCLVersion >= 200); + auto blockHeaderSize = (isOCL2X)? BlockHeaderSize + 3 : BlockHeaderSize; + + info.BlockSize = (isOCL2X)? + 5 * CGM.getPointerSize() + 3 * CGM.getIntSize() : + 3 * CGM.getPointerSize() + 2 * CGM.getIntSize(); assert(elementTypes.empty()); elementTypes.push_back(CGM.VoidPtrTy); elementTypes.push_back(CGM.IntTy); elementTypes.push_back(CGM.IntTy); elementTypes.push_back(CGM.VoidPtrTy); elementTypes.push_back(CGM.getBlockDescriptorType()); + if(isOCL2X){ + elementTypes.push_back(CGM.IntTy); + elementTypes.push_back(CGM.VoidPtrTy); + elementTypes.push_back(CGM.VoidPtrTy); + } - assert(elementTypes.size() == BlockHeaderSize); + assert(elementTypes.size() == blockHeaderSize); } /// Compute the layout of the given block. Attempts to lay the block @@ -702,11 +715,27 @@ = CodeGenFunction(CGM, true).GenerateBlockFunction(CurGD, blockInfo, LocalDeclMap, isLambdaConv); + llvm::Function *targetFn = dyn_cast<llvm::Function>(blockFn); blockFn = llvm::ConstantExpr::getBitCast(blockFn, VoidPtrTy); + llvm::Constant *oclCapExtractFn = nullptr; + llvm::Constant *oclInvokeWrapperFn = nullptr; + if(getLangOpts().OpenCL && getLangOpts().OpenCLVersion >= 200){ + oclCapExtractFn + = CodeGenFunction(CGM, true).GenerateOCLCapturesCopyFunction(blockInfo); + oclCapExtractFn = llvm::ConstantExpr::getBitCast(oclCapExtractFn, VoidPtrTy); + + oclInvokeWrapperFn = CodeGenFunction(CGM, true) + .GenerateBlockFunctionWrapper(blockInfo, + const_cast<const llvm::Function*>(targetFn)); + oclInvokeWrapperFn = llvm::ConstantExpr::getBitCast(oclInvokeWrapperFn, + VoidPtrTy); + //llvm::ConstantExpr::getBitCast(oclInvokeWrapperFn, VoidPtrTy); + } + // If there is nothing to capture, we can emit this as a global block. if (blockInfo.CanBeGlobal) - return buildGlobalBlock(CGM, blockInfo, blockFn); + return buildGlobalBlock(CGM, blockInfo, blockFn, oclInvokeWrapperFn); // Otherwise, we have to emit this as a local block. @@ -755,6 +784,24 @@ getIntSize(), "block.reserved"); addHeaderField(blockFn, getPointerSize(), "block.invoke"); addHeaderField(descriptor, getPointerSize(), "block.descriptor"); + if(getLangOpts().OpenCL && + getLangOpts().OpenCLVersion >= 200){ + addHeaderField(llvm::ConstantInt::get(IntTy, blockInfo.Captures.size()), + getIntSize(), "block.ocl.cap_num"); + + if(oclCapExtractFn) + addHeaderField(oclCapExtractFn, getPointerSize(), "block.ocl.cap_extract"); + else + addHeaderField(llvm::ConstantPointerNull::get(VoidPtrTy), + getPointerSize(), "block.ocl.cap_extract"); + + if(oclInvokeWrapperFn) + addHeaderField(oclInvokeWrapperFn, getPointerSize(), + "block.ocl.invoke_wrapper"); + else + addHeaderField(llvm::ConstantPointerNull::get(VoidPtrTy), + getPointerSize(), "block.ocl.invoke_wrapper"); + } } // Finally, capture all the values into the block. @@ -791,7 +838,11 @@ // special; we'll simply emit it directly. src = Address::invalid(); } else if (CI.isByRef()) { - if (BlockInfo && CI.isNested()) { + if (BlockInfo && CI.isNested() && + !(CGM.getLangOpts().OpenCL && CGM.getLangOpts().OpenCLVersion >= 200 + /*OpenCL 2.x doesn't set up BlockPointer. + Captured by reference is also not allowed by spec*/)) { + // We need to use the capture from the enclosing block. const CGBlockInfo::Capture &enclosingCapture = BlockInfo->getCapture(variable); @@ -960,24 +1011,63 @@ return GenericBlockLiteralType; } +llvm::Type* CodeGenModule::getGenericOCLBlockLiteralType(){ + if(GenericOCLBlockLiteralType) + return GenericOCLBlockLiteralType; + + llvm::Type *BlockDescPtrTy = getBlockDescriptorType(); + + // struct __block_literal_generic { + // void *__isa; + // int __flags; + // int __reserved; + // void (*__invoke)(void *); + // struct __block_descriptor *__descriptor; + // int ocl_cap_num; + // void (*ocl_cap_copy)(void*,int,void*); + // void (*ocl_invoke_wrapper)(void*,...); + // }; + GenericOCLBlockLiteralType = + llvm::StructType::create("struct.ocl.__block_literal_generic", + VoidPtrTy, IntTy, IntTy, VoidPtrTy, BlockDescPtrTy, + IntTy, VoidPtrTy, VoidPtrTy, nullptr); + return GenericOCLBlockLiteralType; +} + RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue) { const BlockPointerType *BPT = E->getCallee()->getType()->getAs<BlockPointerType>(); llvm::Value *Callee = EmitScalarExpr(E->getCallee()); + bool isOCL2X = (getLangOpts().OpenCL && + getLangOpts().OpenCLVersion >= 200); + // Get a pointer to the generic block literal. llvm::Type *BlockLiteralTy = - llvm::PointerType::getUnqual(CGM.getGenericBlockLiteralType()); + llvm::PointerType::getUnqual((isOCL2X)? + CGM.getGenericOCLBlockLiteralType() : + CGM.getGenericBlockLiteralType()); // Bitcast the callee to a block literal. llvm::Value *BlockLiteral = Builder.CreateBitCast(Callee, BlockLiteralTy, "block.literal"); // Get the function pointer from the literal. - llvm::Value *FuncPtr = - Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockLiteral, 3); + llvm::Value *FuncPtr; + if(isOCL2X){ + // Invoke function wrapper + FuncPtr = + Builder.CreateStructGEP(CGM.getGenericOCLBlockLiteralType(), + BlockLiteral, + 7); + }else{ + FuncPtr = + Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), + BlockLiteral, + 3); + } BlockLiteral = Builder.CreateBitCast(BlockLiteral, VoidPtrTy); @@ -1012,13 +1102,29 @@ assert(BlockInfo && "evaluating block ref without block information?"); const CGBlockInfo::Capture &capture = BlockInfo->getCapture(variable); + if(CGM.getLangOpts().OpenCL && + CGM.getLangOpts().OpenCLVersion >= 200){ + + const ImplicitParamDecl* capFuncArgDecl = const_cast<const ImplicitParamDecl*>(capture.FunctionArgDecl); + auto itResult = BlockInfo->FunctionArgCaptures.find(capFuncArgDecl); + assert(itResult != BlockInfo->FunctionArgCaptures.end() && + "no entry for capture as function argument"); + llvm::Value* capValue = itResult->second; + + Address addr = CreateTempAlloca(capValue->getType(), + getContext().getDeclAlign(variable)); + Builder.CreateStore(capValue, addr); + return addr; + } + // Handle constant captures. if (capture.isConstant()) return LocalDeclMap.find(variable)->second; Address addr = Builder.CreateStructGEP(LoadBlockStruct(), capture.getIndex(), capture.getOffset(), "block.capture.addr"); + if (isByRef) { // addr should be a void** right now. Load, then cast the result // to byref*. @@ -1051,45 +1157,91 @@ // Using that metadata, generate the actual block function. llvm::Constant *blockFn; + llvm::Constant *oclInvokeWrapper = nullptr; { CodeGenFunction::DeclMapTy LocalDeclMap; blockFn = CodeGenFunction(*this).GenerateBlockFunction(GlobalDecl(), blockInfo, LocalDeclMap, false); + if(getLangOpts().OpenCL && getLangOpts().OpenCLVersion >= 200){ + llvm::Function *TFn = dyn_cast<llvm::Function>(blockFn); + oclInvokeWrapper = CodeGenFunction(*this) + .GenerateBlockFunctionWrapper(blockInfo, + const_cast<const llvm::Function*>(TFn)); + oclInvokeWrapper = llvm::ConstantExpr::getBitCast(oclInvokeWrapper, + VoidPtrTy); + } } blockFn = llvm::ConstantExpr::getBitCast(blockFn, VoidPtrTy); - return buildGlobalBlock(*this, blockInfo, blockFn); + return buildGlobalBlock(*this, blockInfo, blockFn, oclInvokeWrapper); } static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, const CGBlockInfo &blockInfo, - llvm::Constant *blockFn) { + llvm::Constant *blockFn, + llvm::Constant *blockFnWrapper) { assert(blockInfo.CanBeGlobal); + bool isOCL2X = (CGM.getLangOpts().OpenCL && + CGM.getLangOpts().OpenCLVersion >= 200); + // Generate the constants for the block literal initializer. - llvm::Constant *fields[BlockHeaderSize]; - - // isa - fields[0] = CGM.getNSConcreteGlobalBlock(); - - // __flags + llvm::Constant *init; BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE; if (blockInfo.UsesStret) flags |= BLOCK_USE_STRET; - - fields[1] = llvm::ConstantInt::get(CGM.IntTy, flags.getBitMask()); - // Reserved - fields[2] = llvm::Constant::getNullValue(CGM.IntTy); + if(!isOCL2X){ + llvm::Constant *fields[BlockHeaderSize]; + + // isa + fields[0] = CGM.getNSConcreteGlobalBlock(); + + // __flags + fields[1] = llvm::ConstantInt::get(CGM.IntTy, flags.getBitMask()); + + // Reserved + fields[2] = llvm::Constant::getNullValue(CGM.IntTy); - // Function - fields[3] = blockFn; + // Function + fields[3] = blockFn; - // Descriptor - fields[4] = buildBlockDescriptor(CGM, blockInfo); + // Descriptor + fields[4] = buildBlockDescriptor(CGM, blockInfo); + + init = llvm::ConstantStruct::getAnon(fields); + }else{ + llvm::Constant *fields[BlockHeaderSize + 3]; + + assert(blockFnWrapper && "Block invoke function wrapper not built yet"); + + // isa + fields[0] = CGM.getNSConcreteGlobalBlock(); + + // __flags + fields[1] = llvm::ConstantInt::get(CGM.IntTy, flags.getBitMask()); + + // Reserved + fields[2] = llvm::Constant::getNullValue(CGM.IntTy); - llvm::Constant *init = llvm::ConstantStruct::getAnon(fields); + // Function + fields[3] = blockFn; + + // Descriptor + fields[4] = buildBlockDescriptor(CGM, blockInfo); + + // Captured variables amount + fields[5] = llvm::ConstantInt::get(CGM.IntTy, 0); + + // Captured variables extraction function + fields[6] = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); + + // Block invoke wrapper function + fields[7] = blockFnWrapper; + + init = llvm::ConstantStruct::getAnon(fields); + } llvm::GlobalVariable *literal = new llvm::GlobalVariable(CGM.getModule(), @@ -1111,6 +1263,21 @@ llvm::Value *arg) { assert(BlockInfo && "not emitting prologue of block invocation function?!"); + if(CGM.getLangOpts().OpenCL && + CGM.getLangOpts().OpenCLVersion >= 200){ + + if(IsOCLChildKernelInvoke){ + /* + * Store the llvm::Value* type captured variables, which is passed + * as arguments to the block invoke function + */ + const_cast<CGBlockInfo*>(BlockInfo)-> + FunctionArgCaptures.insert(std::make_pair(D, arg)); + } + + return; + } + llvm::Value *localAddr = nullptr; if (CGM.getCodeGenOpts().OptimizationLevel == 0) { // Allocate a stack slot to let the debug info survive the RA. @@ -1131,6 +1298,7 @@ SourceLocation StartLoc = BlockInfo->getBlockExpr()->getBody()->getLocStart(); ApplyDebugLocation Scope(*this, StartLoc); + // Instead of messing around with LocalDeclMap, just set the value // directly as BlockPointer. BlockPointer = Builder.CreateBitCast(arg, @@ -1157,6 +1325,10 @@ BlockInfo = &blockInfo; + if(getLangOpts().OpenCL && getLangOpts().OpenCLVersion >= 200){ + IsOCLChildKernelInvoke = true; + } + // Arrange for local static and local extern declarations to appear // to be local to this function as well, in case they're directly // referenced in a block. @@ -1171,14 +1343,25 @@ // Build the argument list. FunctionArgList args; + /* // The first argument is the block pointer. Just take it as a void* // and cast it later. QualType selfTy = getContext().VoidPtrTy; IdentifierInfo *II = &CGM.getContext().Idents.get(".block_descriptor"); - ImplicitParamDecl selfDecl(getContext(), const_cast<BlockDecl*>(blockDecl), SourceLocation(), II, selfTy); args.push_back(&selfDecl); + */ + for(auto& capturePair : blockInfo.Captures) { + const VarDecl* capVarDecl = capturePair.getFirst(); + ImplicitParamDecl* capParamDecl = ImplicitParamDecl::Create(getContext(), const_cast<BlockDecl*>(blockDecl), + SourceLocation(), + capVarDecl->getIdentifier(), + capVarDecl->getType()); + auto& capture = capturePair.getSecond(); + const_cast<CGBlockInfo::Capture&>(capture).FunctionArgDecl = capParamDecl; + args.push_back( const_cast<const ImplicitParamDecl*>(capParamDecl) ); + } // Now add the rest of the parameters. args.append(blockDecl->param_begin(), blockDecl->param_end()); @@ -1207,7 +1390,8 @@ // At -O0 we generate an explicit alloca for the BlockPointer, so the RA // won't delete the dbg.declare intrinsics for captured variables. llvm::Value *BlockPointerDbgLoc = BlockPointer; - if (CGM.getCodeGenOpts().OptimizationLevel == 0) { + if (CGM.getCodeGenOpts().OptimizationLevel == 0 && + !(CGM.getLangOpts().OpenCL && CGM.getLangOpts().OpenCLVersion >= 200)) { // Allocate a stack slot for it, so we can point the debugger to it Address Alloca = CreateTempAlloca(BlockPointer->getType(), getPointerAlign(), @@ -1221,7 +1405,9 @@ // If we have a C++ 'this' reference, go ahead and force it into // existence now. - if (blockDecl->capturesCXXThis()) { + if (blockDecl->capturesCXXThis() && + !(CGM.getLangOpts().OpenCL && CGM.getLangOpts().OpenCLVersion >= 200 + /*OpenCL 2.x doesn't set up BlockPointer*/)) { Address addr = Builder.CreateStructGEP(LoadBlockStruct(), blockInfo.CXXThisIndex, blockInfo.CXXThisOffset, "block.captured-this"); @@ -1298,6 +1484,88 @@ FinishFunction(cast<CompoundStmt>(blockDecl->getBody())->getRBracLoc()); + if(getLangOpts().OpenCL && getLangOpts().OpenCLVersion >= 200){ + IsOCLChildKernelInvoke = false; + } + + return fn; +} + +llvm::Constant* +CodeGenFunction::GenerateBlockFunctionWrapper(const CGBlockInfo &blockInfo, + const llvm::Function *invokeFunc){ + + const BlockDecl* blockDecl = blockInfo.Block; + + BlockInfo = &blockInfo; + + FunctionArgList args; + + QualType selfTy = getContext().VoidPtrTy; + IdentifierInfo *II = &CGM.getContext().Idents.get(".block_descriptor"); + ImplicitParamDecl selfDecl(getContext(), const_cast<BlockDecl*>(blockDecl), + SourceLocation(), II, selfTy); + args.push_back(&selfDecl); + + args.append(blockDecl->param_begin(), blockDecl->param_end()); + + const FunctionProtoType *fnType = blockInfo.getBlockExpr()->getFunctionType(); + const CGFunctionInfo &fnInfo = + CGM.getTypes().arrangeBlockFunctionDeclaration(fnType, args); + + llvm::FunctionType *fnLLVMType = CGM.getTypes().GetFunctionType(fnInfo); + + llvm::Function *fn = + llvm::Function::Create(fnLLVMType, + llvm::GlobalValue::InternalLinkage, + "__ocl_block_invoke_wrapper", + &CGM.getModule()); + + IdentifierInfo *FII = &CGM.getContext().Idents.get("__ocl_block_invoke_wrapper"); + FunctionDecl *FD = FunctionDecl::Create(getContext(), + getContext().getTranslationUnitDecl(), + SourceLocation(), + SourceLocation(), FII, + fnType->getReturnType(), + nullptr, SC_Static, + false, + false); + + CGM.SetInternalFunctionAttributes(nullptr, fn, fnInfo); + + StartFunction(FD, fnType->getReturnType(), + fn, fnInfo, args); + + if(!fn->getReturnType()->isVoidTy()) + ReturnValue = CreateDefaultAlignTempAlloca(fn->getReturnType()); + + auto itBlockCtx = fn->arg_begin(); + // Used by LoadBlockStruct() + BlockPointer = Builder.CreateBitCast(&(*itBlockCtx), + blockInfo.StructureType->getPointerTo(), + "block"); + + llvm::SmallVector<llvm::Value*, 8> targetArgs; + for(const auto& capPair : blockInfo.Captures){ + const auto& capture = capPair.getSecond(); + Address capturePtr = Builder.CreateStructGEP(LoadBlockStruct(), + capture.getIndex(), + capture.getOffset()); + llvm::Value* captureVal = Builder.CreateLoad(capturePtr, "block_cap"); + targetArgs.push_back(captureVal); + } + auto itParam = fn->arg_begin(); + for(++itParam; itParam != fn->arg_end(); ++itParam){ + targetArgs.push_back(&(*itParam)); + } + + auto* targetInvoke = Builder.CreateCall(const_cast<llvm::Function*>(invokeFunc), + llvm::ArrayRef<llvm::Value*>(targetArgs)); + if(!fn->getReturnType()->isVoidTy()) + Builder.CreateStore(targetInvoke, ReturnValue); + + FinishFunction(); + return fn; } @@ -1634,6 +1902,134 @@ return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); } +// size_t __ocl_block_captures_copy_helper(void* block, uint indexOfArg, uint8* dst) +llvm::Constant * +CodeGenFunction::GenerateOCLCapturesCopyFunction(const CGBlockInfo &blockInfo){ + ASTContext &C = getContext(); + + BlockInfo = &blockInfo; + + FunctionArgList args; + + ImplicitParamDecl blockCtxDecl(getContext(), + nullptr,/*Decl Ctx*/ + SourceLocation(), + nullptr,/*II*/ + C.VoidPtrTy); + args.push_back(&blockCtxDecl); + + ImplicitParamDecl indexDecl(getContext(), + nullptr/*Decl Ctx*/, + SourceLocation(), + nullptr/*II*/, + C.UnsignedIntTy); + args.push_back(&indexDecl); + + ImplicitParamDecl destDecl(getContext(), + nullptr,/*Decl Ctx*/ + SourceLocation(), + nullptr,/*II*/ + C.VoidPtrTy); + args.push_back(&destDecl); + + auto retType = C.UnsignedIntTy; + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(retType, args); + + llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI); + + llvm::Function *Fn = + llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage, + "__ocl_block_captures_copy_helper", &CGM.getModule()); + + IdentifierInfo *II + = &CGM.getContext().Idents.get("__ocl_block_captures_copy_helper"); + + FunctionDecl *FD = FunctionDecl::Create(C, + C.getTranslationUnitDecl(), + SourceLocation(), + SourceLocation(), II, retType, + nullptr, SC_Static, + false, + false); + + CGM.SetInternalFunctionAttributes(nullptr, Fn, FI); + + StartFunction(FD, retType, Fn, FI, args); + + ReturnValue = CreateDefaultAlignTempAlloca(IntTy); + + auto* bbRet = createBasicBlock(".ret"); + auto* bbDefault = createBasicBlock(".default"); + auto itBlock = Fn->arg_begin(); + auto itArgIndex = itBlock; + ++itArgIndex; + // Used by LoadBlockStruct() + BlockPointer = Builder.CreateBitCast(&(*itBlock), + blockInfo.StructureType->getPointerTo(), + "block"); + auto itDest = itArgIndex; + ++itDest; + llvm::Argument* destVal = &(*itDest); + + // A portable sizeof() + auto getTypeSize = [&](llvm::Type* type, QualType qualTy) -> llvm::Value* { + unsigned elementNum = 1; + llvm::Type* elementTy = type; + if(auto* vecType = dyn_cast<llvm::VectorType>(type)){ + elementNum = vecType->getNumElements(); + elementTy = vecType->getScalarType(); + } + + llvm::PointerType* typePtr + = elementTy->getPointerTo(C.getTargetAddressSpace(qualTy)); + llvm::Value* nilPtr = llvm::ConstantPointerNull::get(typePtr); + llvm::Value* ptrOffset = Builder.CreateGEP(nilPtr, + Builder.getInt32(elementNum)); + return Builder.CreatePtrToInt(ptrOffset, IntTy); + }; + + auto* switchInst = Builder.CreateSwitch(&(*itArgIndex), bbDefault, + blockInfo.Captures.size() + 1); + + unsigned int indexCounter = 0; + for(const auto& capturePair : blockInfo.Captures){ + auto* bbCase = createBasicBlock(".case"); + switchInst->addCase(Builder.getInt32(indexCounter), bbCase); + EmitBlock(bbCase); + + const auto* captureDecl = capturePair.getFirst(); + const auto& capture = capturePair.getSecond(); + Address capturePtr = Builder.CreateStructGEP(LoadBlockStruct(), + capture.getIndex(), + capture.getOffset()); + + llvm::Type* captureType = capturePtr.getElementType(); + llvm::Value* resultVal = getTypeSize(captureType, captureDecl->getType()); + assert(resultVal != nullptr && "Can't get size of captured variable"); + Builder.CreateMemCpy(Address(destVal, getPointerAlign()), + capturePtr, + resultVal); + + Builder.CreateStore(resultVal, ReturnValue); + EmitBranch(bbRet); + + indexCounter++; + } + + // Create default BB which returns null pointer + EmitBlock(bbDefault); + Builder.CreateStore(Builder.getInt32(0), + ReturnValue); + EmitBranch(bbRet); + + EmitBlock(bbRet, true); + + FinishFunction(); + + return Fn; +} + namespace { /// Emits the copy/dispose helper functions for a __block object of id type.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits