Author: abataev Date: Mon Jul 17 06:30:36 2017 New Revision: 308174 URL: http://llvm.org/viewvc/llvm-project?rev=308174&view=rev Log: [OPENMP] Codegen for reduction clauses in 'taskloop' directives.
Adds codegen for taskloop-based directives. Added: cfe/trunk/test/OpenMP/taskloop_reduction_codegen.cpp cfe/trunk/test/OpenMP/taskloop_simd_reduction_codegen.cpp Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp cfe/trunk/lib/Sema/SemaOpenMP.cpp cfe/trunk/test/OpenMP/for_reduction_codegen.cpp cfe/trunk/test/OpenMP/for_reduction_codegen_UDR.cpp cfe/trunk/test/OpenMP/taskloop_codegen.cpp cfe/trunk/test/OpenMP/taskloop_firstprivate_codegen.cpp cfe/trunk/test/OpenMP/taskloop_lastprivate_codegen.cpp cfe/trunk/test/OpenMP/taskloop_private_codegen.cpp cfe/trunk/test/OpenMP/taskloop_simd_codegen.cpp cfe/trunk/test/OpenMP/taskloop_simd_firstprivate_codegen.cpp cfe/trunk/test/OpenMP/taskloop_simd_lastprivate_codegen.cpp cfe/trunk/test/OpenMP/taskloop_simd_private_codegen.cpp Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=308174&r1=308173&r2=308174&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original) +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Mon Jul 17 06:30:36 2017 @@ -643,6 +643,12 @@ enum OpenMPRTLFunction { // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 // *vec); OMPRTL__kmpc_doacross_wait, + // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void + // *data); + OMPRTL__kmpc_task_reduction_init, + // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void + // *d); + OMPRTL__kmpc_task_reduction_get_th_data, // // Offloading related calls @@ -766,8 +772,8 @@ static void emitInitWithReductionInitial /// \param SrcAddr Address of the original array. static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, const Expr *Init, + const OMPDeclareReductionDecl *DRD, Address SrcAddr = Address::invalid()) { - auto *DRD = getReductionInit(Init); // Perform element-by-element initialization. QualType ElementTy; @@ -869,19 +875,17 @@ LValue ReductionCodeGen::emitSharedLValu return LValue(); } -void ReductionCodeGen::emitAggregateInitialization(CodeGenFunction &CGF, - unsigned N, - Address PrivateAddr, - LValue SharedLVal) { +void ReductionCodeGen::emitAggregateInitialization( + CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, + const OMPDeclareReductionDecl *DRD) { // Emit VarDecl with copy init for arrays. // Get the address of the original variable captured in current // captured region. auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); - auto *DRD = getReductionInit(ClausesData[N].ReductionOp); EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), DRD ? ClausesData[N].ReductionOp : PrivateVD->getInit(), - SharedLVal.getAddress()); + DRD, SharedLVal.getAddress()); } ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, @@ -890,6 +894,7 @@ ReductionCodeGen::ReductionCodeGen(Array ClausesData.reserve(Shareds.size()); SharedAddresses.reserve(Shareds.size()); Sizes.reserve(Shareds.size()); + BaseDecls.reserve(Shareds.size()); auto IPriv = Privates.begin(); auto IRed = ReductionOps.begin(); for (const auto *Ref : Shareds) { @@ -912,20 +917,30 @@ void ReductionCodeGen::emitAggregateType QualType PrivateType = PrivateVD->getType(); bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); if (!AsArraySection && !PrivateType->isVariablyModifiedType()) { - Sizes.emplace_back(nullptr); + Sizes.emplace_back( + CGF.getTypeSize( + SharedAddresses[N].first.getType().getNonReferenceType()), + nullptr); return; } llvm::Value *Size; + llvm::Value *SizeInChars; + llvm::Type *ElemType = + cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) + ->getElementType(); + auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); if (AsArraySection) { Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), SharedAddresses[N].first.getPointer()); Size = CGF.Builder.CreateNUWAdd( Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); + SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); } else { - Size = CGF.getTypeSize( + SizeInChars = CGF.getTypeSize( SharedAddresses[N].first.getType().getNonReferenceType()); + Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); } - Sizes.emplace_back(Size); + Sizes.emplace_back(SizeInChars, Size); CodeGenFunction::OpaqueValueMapping OpaqueMap( CGF, cast<OpaqueValueExpr>( @@ -941,7 +956,7 @@ void ReductionCodeGen::emitAggregateType QualType PrivateType = PrivateVD->getType(); bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); if (!AsArraySection && !PrivateType->isVariablyModifiedType()) { - assert(!Size && !Sizes[N] && + assert(!Size && !Sizes[N].second && "Size should be nullptr for non-variably modified redution " "items."); return; @@ -971,7 +986,7 @@ void ReductionCodeGen::emitInitializatio SharedType, SharedAddresses[N].first.getBaseInfo()); if (isa<OMPArraySectionExpr>(ClausesData[N].Ref) || CGF.getContext().getAsArrayType(PrivateVD->getType())) { - emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal); + emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, PrivateAddr, SharedLVal.getAddress(), @@ -1091,6 +1106,11 @@ Address ReductionCodeGen::adjustPrivateA return PrivateAddr; } +bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { + auto *DRD = getReductionInit(ClausesData[N].ReductionOp); + return DRD && DRD->getInitializer(); +} + LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { return CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(getThreadIDVariable()), @@ -1948,6 +1968,26 @@ CGOpenMPRuntime::createRuntimeFunction(u RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); break; } + case OMPRTL__kmpc_task_reduction_init: { + // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void + // *data); + llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = + CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); + break; + } + case OMPRTL__kmpc_task_reduction_get_th_data: { + // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void + // *d); + llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); + break; + } case OMPRTL__tgt_target: { // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t @@ -2298,6 +2338,27 @@ llvm::Function *CGOpenMPRuntime::emitThr return nullptr; } +Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, + QualType VarType, + StringRef Name) { + llvm::Twine VarName(Name, ".artificial."); + llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); + llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName); + llvm::Value *Args[] = { + emitUpdateLocation(CGF, SourceLocation()), + getThreadID(CGF, SourceLocation()), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), + CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, + /*IsSigned=*/false), + getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")}; + return Address( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), + VarLVType->getPointerTo(/*AddrSpace=*/0)), + CGM.getPointerAlign()); +} + /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen /// function. Here is the logic: /// if (Cond) { @@ -3093,6 +3154,8 @@ enum KmpTaskTFields { KmpTaskTStride, /// (Taskloops only) Is last iteration flag. KmpTaskTLastIter, + /// (Taskloops only) Reduction data. + KmpTaskTReductions, }; } // anonymous namespace @@ -3644,6 +3707,7 @@ createKmpTaskTRecordDecl(CodeGenModule & // kmp_uint64 ub; // kmp_int64 st; // kmp_int32 liter; + // void * reductions; // }; auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); UD->startDefinition(); @@ -3667,6 +3731,7 @@ createKmpTaskTRecordDecl(CodeGenModule & addFieldToRecordDecl(C, RD, KmpUInt64Ty); addFieldToRecordDecl(C, RD, KmpInt64Ty); addFieldToRecordDecl(C, RD, KmpInt32Ty); + addFieldToRecordDecl(C, RD, C.VoidPtrTy); } RD->completeDefinition(); return RD; @@ -3697,7 +3762,7 @@ createKmpTaskTWithPrivatesRecordDecl(Cod /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, /// For taskloops: /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, -/// tt->shareds); +/// tt->reductions, tt->shareds); /// return 0; /// } /// \endcode @@ -3783,10 +3848,14 @@ emitProxyTaskFunction(CodeGenModule &CGM auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); auto LILVal = CGF.EmitLValueForField(Base, *LIFI); auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); + auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); + auto RLVal = CGF.EmitLValueForField(Base, *RFI); + auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal(); CallArgs.push_back(LBParam); CallArgs.push_back(UBParam); CallArgs.push_back(StParam); CallArgs.push_back(LIParam); + CallArgs.push_back(RParam); } CallArgs.push_back(SharedsParam); @@ -4549,6 +4618,16 @@ void CGOpenMPRuntime::emitTaskLoopCall(C cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), /*IsInitializer=*/true); + // Store reductions address. + LValue RedLVal = CGF.EmitLValueForField( + Result.TDBase, + *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); + if (Data.Reductions) + CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); + else { + CGF.EmitNullInitialization(RedLVal.getAddress(), + CGF.getContext().VoidPtrTy); + } enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; llvm::Value *TaskArgs[] = { UpLoc, @@ -5074,6 +5153,353 @@ void CGOpenMPRuntime::emitReduction(Code CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); } +/// Generates unique name for artificial threadprivate variables. +/// Format is: <Prefix> "." <Loc_raw_encoding> "_" <N> +static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc, + unsigned N) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << Prefix << "." << Loc.getRawEncoding() << "_" << N; + return Out.str(); +} + +/// Emits reduction initializer function: +/// \code +/// void @.red_init(void* %arg) { +/// %0 = bitcast void* %arg to <type>* +/// store <type> <init>, <type>* %0 +/// ret void +/// } +/// \endcode +static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, + SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N) { + auto &C = CGM.getContext(); + FunctionArgList Args; + ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other); + Args.emplace_back(&Param); + auto &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + ".red_init.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + Address PrivateAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&Param), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + llvm::Value *Size = nullptr; + // If the size of the reduction item is non-constant, load it from global + // threadprivate variable. + if (RCG.getSizes(N).second) { + Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + Size = + CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), SourceLocation()); + } + RCG.emitAggregateType(CGF, N, Size); + LValue SharedLVal; + // If initializer uses initializer from declare reduction construct, emit a + // pointer to the address of the original reduction item (reuired by reduction + // initializer) + if (RCG.usesReductionInitializer(N)) { + Address SharedAddr = + CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().VoidPtrTy, + generateUniqueName("reduction", Loc, N)); + SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); + } else { + SharedLVal = CGF.MakeNaturalAlignAddrLValue( + llvm::ConstantPointerNull::get(CGM.VoidPtrTy), + CGM.getContext().VoidPtrTy); + } + // Emit the initializer: + // %0 = bitcast void* %arg to <type>* + // store <type> <init>, <type>* %0 + RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, + [](CodeGenFunction &) { return false; }); + CGF.FinishFunction(); + return Fn; +} + +/// Emits reduction combiner function: +/// \code +/// void @.red_comb(void* %arg0, void* %arg1) { +/// %lhs = bitcast void* %arg0 to <type>* +/// %rhs = bitcast void* %arg1 to <type>* +/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) +/// store <type> %2, <type>* %lhs +/// ret void +/// } +/// \endcode +static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, + SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N, + const Expr *ReductionOp, + const Expr *LHS, const Expr *RHS, + const Expr *PrivateRef) { + auto &C = CGM.getContext(); + auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); + auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); + FunctionArgList Args; + ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other); + Args.emplace_back(&ParamInOut); + Args.emplace_back(&ParamIn); + auto &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + ".red_comb.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + llvm::Value *Size = nullptr; + // If the size of the reduction item is non-constant, load it from global + // threadprivate variable. + if (RCG.getSizes(N).second) { + Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + Size = + CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), SourceLocation()); + } + RCG.emitAggregateType(CGF, N, Size); + // Remap lhs and rhs variables to the addresses of the function arguments. + // %lhs = bitcast void* %arg0 to <type>* + // %rhs = bitcast void* %arg1 to <type>* + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address { + // Pull out the pointer to the variable. + Address PtrAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&ParamInOut), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + return CGF.Builder.CreateElementBitCast( + PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); + }); + PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address { + // Pull out the pointer to the variable. + Address PtrAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&ParamIn), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + return CGF.Builder.CreateElementBitCast( + PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); + }); + PrivateScope.Privatize(); + // Emit the combiner body: + // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) + // store <type> %2, <type>* %lhs + CGM.getOpenMPRuntime().emitSingleReductionCombiner( + CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), + cast<DeclRefExpr>(RHS)); + CGF.FinishFunction(); + return Fn; +} + +/// Emits reduction finalizer function: +/// \code +/// void @.red_fini(void* %arg) { +/// %0 = bitcast void* %arg to <type>* +/// <destroy>(<type>* %0) +/// ret void +/// } +/// \endcode +static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, + SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N) { + if (!RCG.needCleanups(N)) + return nullptr; + auto &C = CGM.getContext(); + FunctionArgList Args; + ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other); + Args.emplace_back(&Param); + auto &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + ".red_fini.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + Address PrivateAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&Param), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + llvm::Value *Size = nullptr; + // If the size of the reduction item is non-constant, load it from global + // threadprivate variable. + if (RCG.getSizes(N).second) { + Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + Size = + CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), SourceLocation()); + } + RCG.emitAggregateType(CGF, N, Size); + // Emit the finalizer body: + // <destroy>(<type>* %0) + RCG.emitCleanups(CGF, N, PrivateAddr); + CGF.FinishFunction(); + return Fn; +} + +llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( + CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { + if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) + return nullptr; + + // Build typedef struct: + // kmp_task_red_input { + // void *reduce_shar; // shared reduction item + // size_t reduce_size; // size of data item + // void *reduce_init; // data initialization routine + // void *reduce_fini; // data finalization routine + // void *reduce_comb; // data combiner routine + // kmp_task_red_flags_t flags; // flags for additional info from compiler + // } kmp_task_red_input_t; + ASTContext &C = CGM.getContext(); + auto *RD = C.buildImplicitRecord("kmp_task_red_input_t"); + RD->startDefinition(); + const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); + const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *FlagsFD = addFieldToRecordDecl( + C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); + RD->completeDefinition(); + QualType RDType = C.getRecordType(RD); + unsigned Size = Data.ReductionVars.size(); + llvm::APInt ArraySize(/*numBits=*/64, Size); + QualType ArrayRDType = C.getConstantArrayType( + RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); + // kmp_task_red_input_t .rd_input.[Size]; + Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); + ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, + Data.ReductionOps); + for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { + // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; + llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), + llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; + llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( + TaskRedInput.getPointer(), Idxs, + /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, + ".rd_input.gep."); + LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); + // ElemLVal.reduce_shar = &Shareds[Cnt]; + LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); + RCG.emitSharedLValue(CGF, Cnt); + llvm::Value *CastedShared = + CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); + CGF.EmitStoreOfScalar(CastedShared, SharedLVal); + RCG.emitAggregateType(CGF, Cnt); + llvm::Value *SizeValInChars; + llvm::Value *SizeVal; + std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); + // We use delayed creation/initialization for VLAs, array sections and + // custom reduction initializations. It is required because runtime does not + // provide the way to pass the sizes of VLAs/array sections to + // initializer/combiner/finalizer functions and does not pass the pointer to + // original reduction item to the initializer. Instead threadprivate global + // variables are used to store these values and use them in the functions. + bool DelayedCreation = !!SizeVal; + SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, + /*isSigned=*/false); + LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); + CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); + // ElemLVal.reduce_init = init; + LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); + llvm::Value *InitAddr = + CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); + CGF.EmitStoreOfScalar(InitAddr, InitLVal); + DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); + // ElemLVal.reduce_fini = fini; + LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); + llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); + llvm::Value *FiniAddr = Fini + ? CGF.EmitCastToVoidPtr(Fini) + : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); + CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); + // ElemLVal.reduce_comb = comb; + LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); + llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( + CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], + RHSExprs[Cnt], Data.ReductionCopies[Cnt])); + CGF.EmitStoreOfScalar(CombAddr, CombLVal); + // ElemLVal.flags = 0; + LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); + if (DelayedCreation) { + CGF.EmitStoreOfScalar( + llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), + FlagsLVal); + } else + CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); + } + // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void + // *data); + llvm::Value *Args[] = { + CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, + /*isSigned=*/true), + llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), + CGM.VoidPtrTy)}; + return CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); +} + +void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, + SourceLocation Loc, + ReductionCodeGen &RCG, + unsigned N) { + auto Sizes = RCG.getSizes(N); + // Emit threadprivate global variable if the type is non-constant + // (Sizes.second = nullptr). + if (Sizes.second) { + llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, + /*isSigned=*/false); + Address SizeAddr = getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); + } + // Store address of the original reduction item if custom initializer is used. + if (RCG.usesReductionInitializer(N)) { + Address SharedAddr = getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().VoidPtrTy, + generateUniqueName("reduction", Loc, N)); + CGF.Builder.CreateStore( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), + SharedAddr, /*IsVolatile=*/false); + } +} + +Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, + SourceLocation Loc, + llvm::Value *ReductionsPtr, + LValue SharedLVal) { + // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void + // *d); + llvm::Value *Args[] = { + CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, + /*isSigned=*/true), + ReductionsPtr, + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), + CGM.VoidPtrTy)}; + return Address( + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), + SharedLVal.getAlignment()); +} + void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h?rev=308174&r1=308173&r2=308174&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h (original) +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h Mon Jul 17 06:30:36 2017 @@ -96,10 +96,14 @@ struct OMPTaskDataTy final { SmallVector<const Expr *, 4> FirstprivateInits; SmallVector<const Expr *, 4> LastprivateVars; SmallVector<const Expr *, 4> LastprivateCopies; + SmallVector<const Expr *, 4> ReductionVars; + SmallVector<const Expr *, 4> ReductionCopies; + SmallVector<const Expr *, 4> ReductionOps; SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 4> Dependences; llvm::PointerIntPair<llvm::Value *, 1, bool> Final; llvm::PointerIntPair<llvm::Value *, 1, bool> Schedule; llvm::PointerIntPair<llvm::Value *, 1, bool> Priority; + llvm::Value *Reductions = nullptr; unsigned NumberOfParts = 0; bool Tied = true; bool Nogroup = false; @@ -125,9 +129,10 @@ private: /// List of addresses of original shared variables/expressions. SmallVector<std::pair<LValue, LValue>, 4> SharedAddresses; /// Sizes of the reduction items in chars. - SmallVector<llvm::Value *, 4> Sizes; + SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4> Sizes; /// Base declarations for the reduction items. SmallVector<const VarDecl *, 4> BaseDecls; + /// Emits lvalue for shared expresion. LValue emitSharedLValue(CodeGenFunction &CGF, const Expr *E); /// Emits upper bound for shared expression (if array section). @@ -135,9 +140,11 @@ private: /// Performs aggregate initialization. /// \param N Number of reduction item in the common list. /// \param PrivateAddr Address of the corresponding private item. - /// \param SharedLVal Addreiss of the original shared variable. + /// \param SharedLVal Address of the original shared variable. + /// \param DRD Declare reduction construct used for reduction item. void emitAggregateInitialization(CodeGenFunction &CGF, unsigned N, - Address PrivateAddr, LValue SharedLVal); + Address PrivateAddr, LValue SharedLVal, + const OMPDeclareReductionDecl *DRD); public: ReductionCodeGen(ArrayRef<const Expr *> Shareds, @@ -177,11 +184,16 @@ public: Address PrivateAddr); /// Returns LValue for the reduction item. LValue getSharedLValue(unsigned N) const { return SharedAddresses[N].first; } - /// Returns the size of the reduction item in chars, or nullptr, if the size - /// is a constant. - llvm::Value *getSizeInChars(unsigned N) const { return Sizes[N]; } + /// Returns the size of the reduction item (in chars and total number of + /// elements in the item), or nullptr, if the size is a constant. + std::pair<llvm::Value *, llvm::Value *> getSizes(unsigned N) const { + return Sizes[N]; + } /// Returns the base declaration of the reduction item. const VarDecl *getBaseDecl(unsigned N) const { return BaseDecls[N]; } + /// Returns true if the initialization of the reduction item uses initializer + /// from declare reduction construct. + bool usesReductionInitializer(unsigned N) const; }; class CGOpenMPRuntime { @@ -923,6 +935,14 @@ public: SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF = nullptr); + /// Creates artificial threadprivate variable with name \p Name and type \p + /// VarType. + /// \param VarType Type of the artificial threadprivate variable. + /// \param Name Name of the artificial threadprivate variable. + virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, + QualType VarType, + StringRef Name); + /// \brief Emit flush of the variables specified in 'omp flush' directive. /// \param Vars List of variables to flush. virtual void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars, @@ -1081,6 +1101,51 @@ public: ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options); + /// Emit a code for initialization of task reduction clause. Next code + /// should be emitted for reduction: + /// \code + /// + /// _task_red_item_t red_data[n]; + /// ... + /// red_data[i].shar = &origs[i]; + /// red_data[i].size = sizeof(origs[i]); + /// red_data[i].f_init = (void*)RedInit<i>; + /// red_data[i].f_fini = (void*)RedDest<i>; + /// red_data[i].f_comb = (void*)RedOp<i>; + /// red_data[i].flags = <Flag_i>; + /// ... + /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data); + /// \endcode + /// + /// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations. + /// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations. + /// \param Data Additional data for task generation like tiedness, final + /// state, list of privates, reductions etc. + virtual llvm::Value *emitTaskReductionInit(CodeGenFunction &CGF, + SourceLocation Loc, + ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, + const OMPTaskDataTy &Data); + + /// Required to resolve existing problems in the runtime. Emits threadprivate + /// variables to store the size of the VLAs/array sections for + /// initializer/combiner/finalizer functions + emits threadprivate variable to + /// store the pointer to the original reduction item for the custom + /// initializer defined by declare reduction construct. + /// \param RCG Allows to reuse an existing data for the reductions. + /// \param N Reduction item for which fixups must be emitted. + virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N); + + /// Get the address of `void *` type of the privatue copy of the reduction + /// item specified by the \p SharedLVal. + /// \param ReductionsPtr Pointer to the reduction data returned by the + /// emitTaskReductionInit function. + /// \param SharedLVal Address of the original reduction item. + virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, + llvm::Value *ReductionsPtr, + LValue SharedLVal); + /// \brief Emit code for 'taskwait' directive. virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc); Modified: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp?rev=308174&r1=308173&r2=308174&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp (original) +++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp Mon Jul 17 06:30:36 2017 @@ -2697,11 +2697,32 @@ void CodeGenFunction::EmitOMPTaskBasedDi ++ID; } } + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; + for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { + auto IPriv = C->privates().begin(); + auto IRed = C->reduction_ops().begin(); + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (const auto *Ref : C->varlists()) { + Data.ReductionVars.emplace_back(Ref); + Data.ReductionCopies.emplace_back(*IPriv); + Data.ReductionOps.emplace_back(*IRed); + LHSs.emplace_back(*ILHS); + RHSs.emplace_back(*IRHS); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ILHS, 1); + std::advance(IRHS, 1); + } + } + Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( + *this, S.getLocStart(), LHSs, RHSs, Data); // Build list of dependences. for (const auto *C : S.getClausesOfKind<OMPDependClause>()) for (auto *IRef : C->varlists()) Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); - auto &&CodeGen = [&Data, CS, &BodyGen, &LastprivateDstsOrigs]( + auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs]( CodeGenFunction &CGF, PrePostActionTy &Action) { // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); @@ -2756,6 +2777,34 @@ void CodeGenFunction::EmitOMPTaskBasedDi Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); } } + if (Data.Reductions) { + OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true); + ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, + Data.ReductionOps); + llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); + for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { + RedCG.emitSharedLValue(CGF, Cnt); + RedCG.emitAggregateType(CGF, Cnt); + Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( + CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); + Replacement = + Address(CGF.EmitScalarConversion( + Replacement.getPointer(), CGF.getContext().VoidPtrTy, + CGF.getContext().getPointerType( + Data.ReductionCopies[Cnt]->getType()), + SourceLocation()), + Replacement.getAlignment()); + Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); + Scope.addPrivate(RedCG.getBaseDecl(Cnt), + [Replacement]() { return Replacement; }); + // FIXME: This must removed once the runtime library is fixed. + // Emit required threadprivate variables for + // initilizer/combiner/finalizer. + CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), + RedCG, Cnt); + } + } (void)Scope.Privatize(); Action.Enter(CGF); Modified: cfe/trunk/lib/Sema/SemaOpenMP.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaOpenMP.cpp?rev=308174&r1=308173&r2=308174&view=diff ============================================================================== --- cfe/trunk/lib/Sema/SemaOpenMP.cpp (original) +++ cfe/trunk/lib/Sema/SemaOpenMP.cpp Mon Jul 17 06:30:36 2017 @@ -1807,6 +1807,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMP std::make_pair(".lb.", KmpUInt64Ty), std::make_pair(".ub.", KmpUInt64Ty), std::make_pair(".st.", KmpInt64Ty), std::make_pair(".liter.", KmpInt32Ty), + std::make_pair(".reductions.", + Context.VoidPtrTy.withConst().withRestrict()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, Modified: cfe/trunk/test/OpenMP/for_reduction_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/for_reduction_codegen.cpp?rev=308174&r1=308173&r2=308174&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/for_reduction_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/for_reduction_codegen.cpp Mon Jul 17 06:30:36 2017 @@ -701,7 +701,7 @@ int main() { // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], -// CHECK: [[ARR_SIZE:%.+]] = mul nuw i64 %{{.+}}, 4 +// CHECK: [[ARR_SIZE:%.+]] = udiv exact i64 // CHECK: call i8* @llvm.stacksave() // CHECK: [[ARR_PRIV:%.+]] = alloca i32, i64 [[ARR_SIZE]], Modified: cfe/trunk/test/OpenMP/for_reduction_codegen_UDR.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/for_reduction_codegen_UDR.cpp?rev=308174&r1=308173&r2=308174&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/for_reduction_codegen_UDR.cpp (original) +++ cfe/trunk/test/OpenMP/for_reduction_codegen_UDR.cpp Mon Jul 17 06:30:36 2017 @@ -505,7 +505,7 @@ int main() { // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], -// CHECK: [[ARR_SIZE:%.+]] = mul nuw i64 %{{.+}}, 4 +// CHECK: [[ARR_SIZE:%.+]] = udiv exact i64 // CHECK: call i8* @llvm.stacksave() // CHECK: [[ARR_PRIV:%.+]] = alloca i32, i64 [[ARR_SIZE]], Modified: cfe/trunk/test/OpenMP/taskloop_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/taskloop_codegen.cpp?rev=308174&r1=308173&r2=308174&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/taskloop_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/taskloop_codegen.cpp Mon Jul 17 06:30:36 2017 @@ -9,7 +9,7 @@ int main(int argc, char **argv) { // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]]) // CHECK: call void @__kmpc_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) -// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 33, i64 72, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*)) // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 @@ -24,7 +24,7 @@ int main(int argc, char **argv) { #pragma omp taskloop priority(argc) for (int i = 0; i < 10; ++i) ; -// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 72, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK2:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK2:@.+]] to i32 (i32, i8*)*)) // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 @@ -40,7 +40,7 @@ int main(int argc, char **argv) { for (int i = 0; i < 10; ++i) ; // CHECK: call void @__kmpc_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) -// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 72, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*)) // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 // CHECK: [[IF:%.+]] = icmp ne i32 %{{.+}}, 0 @@ -144,7 +144,7 @@ struct S { int a; S(int c) { // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]]) -// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 72, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK4:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK4:@.+]] to i32 (i32, i8*)*)) // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 Modified: cfe/trunk/test/OpenMP/taskloop_firstprivate_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/taskloop_firstprivate_codegen.cpp?rev=308174&r1=308173&r2=308174&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/taskloop_firstprivate_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/taskloop_firstprivate_codegen.cpp Mon Jul 17 06:30:36 2017 @@ -23,7 +23,7 @@ struct S { volatile double g; -// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32 } +// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32, i8* } // CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double } // CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32] // CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type {{.*}}{ [2 x i32]*, i32, {{.*}}[2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]*, i{{[0-9]+}} @@ -57,7 +57,7 @@ int main() { // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]]( [&]() { // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 88, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) + // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 // LAMBDA: [[G_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 0 @@ -101,7 +101,7 @@ int main() { // BLOCKS: call void {{%.+}}(i8 ^{ // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* - // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 88, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) + // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 // BLOCKS: [[G_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 0 @@ -190,7 +190,7 @@ int main() { // [[KMP_TASK_T]] task_data; // [[KMP_TASK_MAIN_TY]] privates; // }; -// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 112, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 120, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]* // Fill kmp_task_t->shareds by copying from original capture argument. Modified: cfe/trunk/test/OpenMP/taskloop_lastprivate_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/taskloop_lastprivate_codegen.cpp?rev=308174&r1=308173&r2=308174&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/taskloop_lastprivate_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/taskloop_lastprivate_codegen.cpp Mon Jul 17 06:30:36 2017 @@ -23,7 +23,7 @@ struct S { volatile double g; -// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32 } +// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32, i8* } // CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double } // CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32] // CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]*, i{{[0-9]+}}* } @@ -57,7 +57,7 @@ int main() { // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]]( [&]() { // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 88, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) + // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 // LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY:%[^*]+]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) @@ -98,7 +98,7 @@ int main() { // BLOCKS: call void {{%.+}}(i8 ^{ // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* - // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 88, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) + // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY:%[^*]+]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) // BLOCKS: ret @@ -182,7 +182,7 @@ int main() { // [[KMP_TASK_T]] task_data; // [[KMP_TASK_MAIN_TY]] privates; // }; -// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 112, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 120, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]* // Fill kmp_task_t->shareds by copying from original capture argument. Modified: cfe/trunk/test/OpenMP/taskloop_private_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/taskloop_private_codegen.cpp?rev=308174&r1=308173&r2=308174&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/taskloop_private_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/taskloop_private_codegen.cpp Mon Jul 17 06:30:36 2017 @@ -23,7 +23,7 @@ struct S { volatile double g; -// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32 } +// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32, i8* } // CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double } // CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { i8 } // CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32] @@ -55,7 +55,7 @@ int main() { // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]]( [&]() { // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 88, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) + // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 // LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* null) // LAMBDA: ret @@ -91,7 +91,7 @@ int main() { // BLOCKS: call void {{%.+}}(i8 ^{ // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* - // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 88, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) + // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* null) // BLOCKS: ret @@ -157,7 +157,7 @@ int main() { // [[KMP_TASK_T_TY]] task_data; // [[KMP_TASK_MAIN_TY]] privates; // }; -// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 112, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 120, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]* // CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 Added: cfe/trunk/test/OpenMP/taskloop_reduction_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/taskloop_reduction_codegen.cpp?rev=308174&view=auto ============================================================================== --- cfe/trunk/test/OpenMP/taskloop_reduction_codegen.cpp (added) +++ cfe/trunk/test/OpenMP/taskloop_reduction_codegen.cpp Mon Jul 17 06:30:36 2017 @@ -0,0 +1,235 @@ +// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu | FileCheck %s +// expected-no-diagnostics + +struct S { + float a; + S() : a(0.0f) {} + ~S() {} +}; + +#pragma omp declare reduction(+:S:omp_out.a += omp_in.a) initializer(omp_priv = omp_orig) + +float g; + +int a; +#pragma omp threadprivate(a) +int main (int argc, char *argv[]) +{ +int i, n; +float a[100], b[100], sum, e[argc + 100]; +S c[100]; +float &d = g; + +/* Some initializations */ +n = 100; +for (i=0; i < n; i++) + a[i] = b[i] = i * 1.0; +sum = 0.0; + +#pragma omp taskloop reduction(+:sum, c[:n], d, e) + for (i=0; i < n; i++) { + sum = sum + (a[i] * b[i]); + c[i].a = i*i; + d += i*i; + e[i] = i; + } + +} + +// CHECK-LABEL: @main( +// CHECK: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK: [[I:%.*]] = alloca i32, align 4 +// CHECK: [[N:%.*]] = alloca i32, align 4 +// CHECK: [[A:%.*]] = alloca [100 x float], align 4 +// CHECK: [[B:%.*]] = alloca [100 x float], align 4 +// CHECK: [[SUM:%.*]] = alloca float, align 4 +// CHECK: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK: [[C:%.*]] = alloca [100 x %struct.S], align 4 +// CHECK: [[D:%.*]] = alloca float*, align 8 +// CHECK: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%ident_t* +// CHECK: [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_task_red_input_t], align 8 +// CHECK: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK: store i32 0, i32* [[RETVAL]], align 4 +// CHECK: store i32 [[ARGC:%.*]], i32* [[ARGC_ADDR]], align 4 +// CHECK: store i8** [[ARGV:%.*]], i8*** [[ARGV_ADDR]], align 8 +// CHECK: [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK: [[ADD:%.*]] = add nsw i32 [[TMP1]], 100 +// CHECK: [[TMP2:%.*]] = zext i32 [[ADD]] to i64 +// CHECK: [[VLA:%.+]] = alloca float, i64 % + +// CHECK: call void @__kmpc_taskgroup(%ident_t* +// CHECK: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T:%.*]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK: [[TMP21:%.*]] = bitcast float* [[SUM]] to i8* +// CHECK: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK: store i64 4, i64* [[TMP22]], align 8 +// CHECK: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK: store i8* bitcast (void (i8*)* [[RED_INIT1:@.+]] to i8*), i8** [[TMP23]], align 8 +// CHECK: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK: store i8* null, i8** [[TMP24]], align 8 +// CHECK: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[RED_COMB1:@.+]] to i8*), i8** [[TMP25]], align 8 +// CHECK: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* [[TMP27]], i8 0, i64 4, i32 8, i1 false) +// CHECK: [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 0 +// CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 0 +// CHECK: [[TMP29:%.*]] = load i32, i32* [[N]], align 4 +// CHECK: [[TMP30:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP30]] +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 [[LB_ADD_LEN]] +// CHECK: [[TMP31:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* +// CHECK: store i8* [[TMP31]], i8** [[TMP28]], align 8 +// CHECK: [[TMP32:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64 +// CHECK: [[TMP33:%.*]] = ptrtoint %struct.S* [[ARRAYIDX5]] to i64 +// CHECK: [[TMP34:%.*]] = sub i64 [[TMP32]], [[TMP33]] +// CHECK: [[TMP35:%.*]] = sdiv exact i64 [[TMP34]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64) +// CHECK: [[TMP36:%.*]] = add nuw i64 [[TMP35]], 1 +// CHECK: [[TMP37:%.*]] = mul nuw i64 [[TMP36]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64) +// CHECK: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 1 +// CHECK: store i64 [[TMP37]], i64* [[TMP38]], align 8 +// CHECK: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2 +// CHECK: store i8* bitcast (void (i8*)* [[RED_INIT2:@.+]] to i8*), i8** [[TMP39]], align 8 +// CHECK: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*)* [[RED_FINI2:@.+]] to i8*), i8** [[TMP40]], align 8 +// CHECK: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[RED_COMB2:@.+]] to i8*), i8** [[TMP41]], align 8 +// CHECK: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5 +// CHECK: store i32 1, i32* [[TMP42]], align 8 +// CHECK: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64 2 +// CHECK: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 +// CHECK: [[TMP44:%.*]] = load float*, float** [[D]], align 8 +// CHECK: [[TMP45:%.*]] = bitcast float* [[TMP44]] to i8* +// CHECK: store i8* [[TMP45]], i8** [[TMP43]], align 8 +// CHECK: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 +// CHECK: store i64 4, i64* [[TMP46]], align 8 +// CHECK: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 +// CHECK: store i8* bitcast (void (i8*)* [[RED_INIT3:@.+]] to i8*), i8** [[TMP47]], align 8 +// CHECK: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 +// CHECK: store i8* null, i8** [[TMP48]], align 8 +// CHECK: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[RED_COMB3:@.+]] to i8*), i8** [[TMP49]], align 8 +// CHECK: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 +// CHECK: [[TMP51:%.*]] = bitcast i32* [[TMP50]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* [[TMP51]], i8 0, i64 4, i32 8, i1 false) +// CHECK: [[DOTRD_INPUT_GEP_8:%.*]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64 3 +// CHECK: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 0 +// CHECK: [[TMP53:%.*]] = bitcast float* [[VLA]] to i8* +// CHECK: store i8* [[TMP53]], i8** [[TMP52]], align 8 +// CHECK: [[TMP54:%.*]] = mul nuw i64 [[TMP2]], 4 +// CHECK: [[TMP55:%.*]] = udiv exact i64 [[TMP54]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64) +// CHECK: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 1 +// CHECK: store i64 [[TMP54]], i64* [[TMP56]], align 8 +// CHECK: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2 +// CHECK: store i8* bitcast (void (i8*)* [[RED_INIT4:@.+]] to i8*), i8** [[TMP57]], align 8 +// CHECK: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3 +// CHECK: store i8* null, i8** [[TMP58]], align 8 +// CHECK: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[RED_COMB4:@.+]] to i8*), i8** [[TMP59]], align 8 +// CHECK: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5 +// CHECK: store i32 1, i32* [[TMP60]], align 8 +// CHECK: [[TMP61:%.*]] = bitcast [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]] to i8* +// CHECK: [[TMP62:%.*]] = call i8* @__kmpc_task_reduction_init(i32 [[TMP0]], i32 4, i8* [[TMP61]]) +// CHECK: [[TMP63:%.*]] = load i32, i32* [[N]], align 4 +// CHECK: store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK: [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK: [[SUB:%.*]] = sub nsw i32 [[TMP64]], 0 +// CHECK: [[SUB10:%.*]] = sub nsw i32 [[SUB]], 1 +// CHECK: [[ADD11:%.*]] = add nsw i32 [[SUB10]], 1 +// CHECK: [[DIV:%.*]] = sdiv i32 [[ADD11]], 1 +// CHECK: [[SUB12:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK: store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK: [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%ident_t* %{{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 72, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @{{.+}} to i32 (i32, i8*)*)) +// CHECK: [[TMP66:%.*]] = bitcast i8* [[TMP65]] to %struct.kmp_task_t_with_privates* +// CHECK: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP66]], i32 0, i32 0 +// CHECK: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP67]], i32 0, i32 0 +// CHECK: [[TMP69:%.*]] = load i8*, i8** [[TMP68]], align 8 +// CHECK: [[TMP70:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP69]], i8* [[TMP70]], i64 72, i32 8, i1 false) +// CHECK: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP66]], i32 0, i32 1 +// CHECK: [[TMP72:%.*]] = bitcast i8* [[TMP69]] to %struct.anon* +// CHECK: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP71]], i32 0, i32 0 +// CHECK: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP72]], i32 0, i32 1 +// CHECK: [[REF:%.*]] = load i32*, i32** [[TMP74]], align 8 +// CHECK: [[TMP75:%.*]] = load i32, i32* [[REF]], align 4 +// CHECK: store i32 [[TMP75]], i32* [[TMP73]], align 8 +// CHECK: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP71]], i32 0, i32 1 +// CHECK: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP72]], i32 0, i32 3 +// CHECK: [[REF13:%.*]] = load [100 x float]*, [100 x float]** [[TMP77]], align 8 +// CHECK: [[TMP78:%.*]] = bitcast [100 x float]* [[TMP76]] to i8* +// CHECK: [[TMP79:%.*]] = bitcast [100 x float]* [[REF13]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP78]], i8* [[TMP79]], i64 400, i32 4, i1 false) +// CHECK: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP71]], i32 0, i32 2 +// CHECK: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP72]], i32 0, i32 4 +// CHECK: [[REF14:%.*]] = load [100 x float]*, [100 x float]** [[TMP81]], align 8 +// CHECK: [[TMP82:%.*]] = bitcast [100 x float]* [[TMP80]] to i8* +// CHECK: [[TMP83:%.*]] = bitcast [100 x float]* [[REF14]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP82]], i8* [[TMP83]], i64 400, i32 4, i1 false) +// CHECK: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP67]], i32 0, i32 5 +// CHECK: store i64 0, i64* [[TMP84]], align 8 +// CHECK: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP67]], i32 0, i32 6 +// CHECK: [[TMP86:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK: [[CONV15:%.*]] = sext i32 [[TMP86]] to i64 +// CHECK: store i64 [[CONV15]], i64* [[TMP85]], align 8 +// CHECK: [[TMP87:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP67]], i32 0, i32 7 +// CHECK: store i64 1, i64* [[TMP87]], align 8 +// CHECK: [[TMP88:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP67]], i32 0, i32 9 +// CHECK: store i8* [[TMP62]], i8** [[TMP88]], align 8 +// CHECK: [[TMP89:%.*]] = load i64, i64* [[TMP87]], align 8 +// CHECK: call void @__kmpc_taskloop(%ident_t* %{{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* [[TMP84]], i64* [[TMP85]], i64 [[TMP89]], i32 0, i32 0, i64 0, i8* null) +// CHECK: call void @__kmpc_end_taskgroup(%ident_t* + +// CHECK: ret i32 + +// CHECK: define internal void [[RED_INIT1]](i8*) +// CHECK: store float 0.000000e+00, float* % +// CHECK: ret void + +// CHECK: define internal void [[RED_COMB1]](i8*, i8*) +// CHECK: fadd float %6, %7 +// CHECK: store float %{{.+}}, float* % +// CHECK: ret void + +// CHECK: define internal void [[RED_INIT2]](i8*) +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64( +// CHECK: ret void + +// CHECK: define internal void [[RED_FINI2]](i8*) +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: call void @ +// CHECK: ret void + +// CHECK: define internal void [[RED_COMB2]](i8*, i8*) +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: fadd float % +// CHECK: store float %{{.+}}, float* % +// CHECK: ret void + +// CHECK: define internal void [[RED_INIT3]](i8*) +// CHECK: store float 0.000000e+00, float* % +// CHECK: ret void + +// CHECK: define internal void [[RED_COMB3]](i8*, i8*) +// CHECK: fadd float % +// CHECK: store float %{{.+}}, float* % +// CHECK: ret void + +// CHECK: define internal void [[RED_INIT4]](i8*) +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: store float 0.000000e+00, float* % +// CHECK: ret void + +// CHECK: define internal void [[RED_COMB4]](i8*, i8*) +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: fadd float % +// CHECK: store float %{{.+}}, float* % +// CHECK: ret void + Modified: cfe/trunk/test/OpenMP/taskloop_simd_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/taskloop_simd_codegen.cpp?rev=308174&r1=308173&r2=308174&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/taskloop_simd_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/taskloop_simd_codegen.cpp Mon Jul 17 06:30:36 2017 @@ -9,7 +9,7 @@ int main(int argc, char **argv) { // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]]) // CHECK: call void @__kmpc_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) -// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 33, i64 72, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*)) // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 @@ -24,7 +24,7 @@ int main(int argc, char **argv) { #pragma omp taskloop simd priority(argc) for (int i = 0; i < 10; ++i) ; -// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 72, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK2:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK2:@.+]] to i32 (i32, i8*)*)) // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 @@ -40,7 +40,7 @@ int main(int argc, char **argv) { for (int i = 0; i < 10; ++i) ; // CHECK: call void @__kmpc_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) -// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 72, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*)) // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 // CHECK: [[IF:%.+]] = icmp ne i32 %{{.+}}, 0 @@ -146,7 +146,7 @@ struct S { int a; S(int c) { // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]]) -// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 72, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK4:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK4:@.+]] to i32 (i32, i8*)*)) // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 Modified: cfe/trunk/test/OpenMP/taskloop_simd_firstprivate_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/taskloop_simd_firstprivate_codegen.cpp?rev=308174&r1=308173&r2=308174&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/taskloop_simd_firstprivate_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/taskloop_simd_firstprivate_codegen.cpp Mon Jul 17 06:30:36 2017 @@ -23,7 +23,7 @@ struct S { volatile double g; -// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32 } +// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32, i8* } // CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double } // CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32] // CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type {{.*}}{ [2 x i32]*, i32, {{.*}}[2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]*, i{{[0-9]+}} @@ -57,7 +57,7 @@ int main() { // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]]( [&]() { // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 88, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) + // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 // LAMBDA: [[G_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 0 @@ -101,7 +101,7 @@ int main() { // BLOCKS: call void {{%.+}}(i8 ^{ // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* - // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 88, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) + // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 // BLOCKS: [[G_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 0 @@ -190,7 +190,7 @@ int main() { // [[KMP_TASK_T]] task_data; // [[KMP_TASK_MAIN_TY]] privates; // }; -// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 112, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 120, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]* // Fill kmp_task_t->shareds by copying from original capture argument. Modified: cfe/trunk/test/OpenMP/taskloop_simd_lastprivate_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/taskloop_simd_lastprivate_codegen.cpp?rev=308174&r1=308173&r2=308174&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/taskloop_simd_lastprivate_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/taskloop_simd_lastprivate_codegen.cpp Mon Jul 17 06:30:36 2017 @@ -23,7 +23,7 @@ struct S { volatile double g; -// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32 } +// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32, i8* } // CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double } // CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32] // CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]*, i{{[0-9]+}}* } @@ -57,7 +57,7 @@ int main() { // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]]( [&]() { // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 88, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) + // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 // LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY:%[^*]+]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) @@ -98,7 +98,7 @@ int main() { // BLOCKS: call void {{%.+}}(i8 ^{ // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* - // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 88, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) + // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY:%[^*]+]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) // BLOCKS: ret @@ -182,7 +182,7 @@ int main() { // [[KMP_TASK_T]] task_data; // [[KMP_TASK_MAIN_TY]] privates; // }; -// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 112, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 120, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]* // Fill kmp_task_t->shareds by copying from original capture argument. Modified: cfe/trunk/test/OpenMP/taskloop_simd_private_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/taskloop_simd_private_codegen.cpp?rev=308174&r1=308173&r2=308174&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/taskloop_simd_private_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/taskloop_simd_private_codegen.cpp Mon Jul 17 06:30:36 2017 @@ -23,7 +23,7 @@ struct S { volatile double g; -// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32 } +// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32, i8* } // CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double } // CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { i8 } // CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32] @@ -55,7 +55,7 @@ int main() { // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]]( [&]() { // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 88, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) + // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 // LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* null) // LAMBDA: ret @@ -91,7 +91,7 @@ int main() { // BLOCKS: call void {{%.+}}(i8 ^{ // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* - // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 88, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) + // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* null) // BLOCKS: ret @@ -157,7 +157,7 @@ int main() { // [[KMP_TASK_T_TY]] task_data; // [[KMP_TASK_MAIN_TY]] privates; // }; -// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 112, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 120, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]* // CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 Added: cfe/trunk/test/OpenMP/taskloop_simd_reduction_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/taskloop_simd_reduction_codegen.cpp?rev=308174&view=auto ============================================================================== --- cfe/trunk/test/OpenMP/taskloop_simd_reduction_codegen.cpp (added) +++ cfe/trunk/test/OpenMP/taskloop_simd_reduction_codegen.cpp Mon Jul 17 06:30:36 2017 @@ -0,0 +1,235 @@ +// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu | FileCheck %s +// expected-no-diagnostics + +struct S { + float a; + S() : a(0.0f) {} + ~S() {} +}; + +#pragma omp declare reduction(+:S:omp_out.a += omp_in.a) initializer(omp_priv = omp_orig) + +float g; + +int a; +#pragma omp threadprivate(a) +int main (int argc, char *argv[]) +{ +int i, n; +float a[100], b[100], sum, e[argc + 100]; +S c[100]; +float &d = g; + +/* Some initializations */ +n = 100; +for (i=0; i < n; i++) + a[i] = b[i] = i * 1.0; +sum = 0.0; + +#pragma omp taskloop simd reduction(+:sum, c[:n], d, e) + for (i=0; i < n; i++) { + sum = sum + (a[i] * b[i]); + c[i].a = i*i; + d += i*i; + e[i] = i; + } + +} + +// CHECK-LABEL: @main( +// CHECK: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK: [[I:%.*]] = alloca i32, align 4 +// CHECK: [[N:%.*]] = alloca i32, align 4 +// CHECK: [[A:%.*]] = alloca [100 x float], align 4 +// CHECK: [[B:%.*]] = alloca [100 x float], align 4 +// CHECK: [[SUM:%.*]] = alloca float, align 4 +// CHECK: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK: [[C:%.*]] = alloca [100 x %struct.S], align 4 +// CHECK: [[D:%.*]] = alloca float*, align 8 +// CHECK: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%ident_t* +// CHECK: [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_task_red_input_t], align 8 +// CHECK: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK: store i32 0, i32* [[RETVAL]], align 4 +// CHECK: store i32 [[ARGC:%.*]], i32* [[ARGC_ADDR]], align 4 +// CHECK: store i8** [[ARGV:%.*]], i8*** [[ARGV_ADDR]], align 8 +// CHECK: [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK: [[ADD:%.*]] = add nsw i32 [[TMP1]], 100 +// CHECK: [[TMP2:%.*]] = zext i32 [[ADD]] to i64 +// CHECK: [[VLA:%.+]] = alloca float, i64 % + +// CHECK: call void @__kmpc_taskgroup(%ident_t* +// CHECK: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T:%.*]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK: [[TMP21:%.*]] = bitcast float* [[SUM]] to i8* +// CHECK: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK: store i64 4, i64* [[TMP22]], align 8 +// CHECK: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK: store i8* bitcast (void (i8*)* [[RED_INIT1:@.+]] to i8*), i8** [[TMP23]], align 8 +// CHECK: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK: store i8* null, i8** [[TMP24]], align 8 +// CHECK: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[RED_COMB1:@.+]] to i8*), i8** [[TMP25]], align 8 +// CHECK: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* [[TMP27]], i8 0, i64 4, i32 8, i1 false) +// CHECK: [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 0 +// CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 0 +// CHECK: [[TMP29:%.*]] = load i32, i32* [[N]], align 4 +// CHECK: [[TMP30:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP30]] +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 [[LB_ADD_LEN]] +// CHECK: [[TMP31:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* +// CHECK: store i8* [[TMP31]], i8** [[TMP28]], align 8 +// CHECK: [[TMP32:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64 +// CHECK: [[TMP33:%.*]] = ptrtoint %struct.S* [[ARRAYIDX5]] to i64 +// CHECK: [[TMP34:%.*]] = sub i64 [[TMP32]], [[TMP33]] +// CHECK: [[TMP35:%.*]] = sdiv exact i64 [[TMP34]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64) +// CHECK: [[TMP36:%.*]] = add nuw i64 [[TMP35]], 1 +// CHECK: [[TMP37:%.*]] = mul nuw i64 [[TMP36]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64) +// CHECK: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 1 +// CHECK: store i64 [[TMP37]], i64* [[TMP38]], align 8 +// CHECK: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2 +// CHECK: store i8* bitcast (void (i8*)* [[RED_INIT2:@.+]] to i8*), i8** [[TMP39]], align 8 +// CHECK: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*)* [[RED_FINI2:@.+]] to i8*), i8** [[TMP40]], align 8 +// CHECK: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[RED_COMB2:@.+]] to i8*), i8** [[TMP41]], align 8 +// CHECK: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5 +// CHECK: store i32 1, i32* [[TMP42]], align 8 +// CHECK: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64 2 +// CHECK: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 +// CHECK: [[TMP44:%.*]] = load float*, float** [[D]], align 8 +// CHECK: [[TMP45:%.*]] = bitcast float* [[TMP44]] to i8* +// CHECK: store i8* [[TMP45]], i8** [[TMP43]], align 8 +// CHECK: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 +// CHECK: store i64 4, i64* [[TMP46]], align 8 +// CHECK: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 +// CHECK: store i8* bitcast (void (i8*)* [[RED_INIT3:@.+]] to i8*), i8** [[TMP47]], align 8 +// CHECK: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 +// CHECK: store i8* null, i8** [[TMP48]], align 8 +// CHECK: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[RED_COMB3:@.+]] to i8*), i8** [[TMP49]], align 8 +// CHECK: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 +// CHECK: [[TMP51:%.*]] = bitcast i32* [[TMP50]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* [[TMP51]], i8 0, i64 4, i32 8, i1 false) +// CHECK: [[DOTRD_INPUT_GEP_8:%.*]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64 3 +// CHECK: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 0 +// CHECK: [[TMP53:%.*]] = bitcast float* [[VLA]] to i8* +// CHECK: store i8* [[TMP53]], i8** [[TMP52]], align 8 +// CHECK: [[TMP54:%.*]] = mul nuw i64 [[TMP2]], 4 +// CHECK: [[TMP55:%.*]] = udiv exact i64 [[TMP54]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64) +// CHECK: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 1 +// CHECK: store i64 [[TMP54]], i64* [[TMP56]], align 8 +// CHECK: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2 +// CHECK: store i8* bitcast (void (i8*)* [[RED_INIT4:@.+]] to i8*), i8** [[TMP57]], align 8 +// CHECK: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3 +// CHECK: store i8* null, i8** [[TMP58]], align 8 +// CHECK: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[RED_COMB4:@.+]] to i8*), i8** [[TMP59]], align 8 +// CHECK: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5 +// CHECK: store i32 1, i32* [[TMP60]], align 8 +// CHECK: [[TMP61:%.*]] = bitcast [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]] to i8* +// CHECK: [[TMP62:%.*]] = call i8* @__kmpc_task_reduction_init(i32 [[TMP0]], i32 4, i8* [[TMP61]]) +// CHECK: [[TMP63:%.*]] = load i32, i32* [[N]], align 4 +// CHECK: store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK: [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK: [[SUB:%.*]] = sub nsw i32 [[TMP64]], 0 +// CHECK: [[SUB10:%.*]] = sub nsw i32 [[SUB]], 1 +// CHECK: [[ADD11:%.*]] = add nsw i32 [[SUB10]], 1 +// CHECK: [[DIV:%.*]] = sdiv i32 [[ADD11]], 1 +// CHECK: [[SUB12:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK: store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK: [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%ident_t* %{{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 72, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @{{.+}} to i32 (i32, i8*)*)) +// CHECK: [[TMP66:%.*]] = bitcast i8* [[TMP65]] to %struct.kmp_task_t_with_privates* +// CHECK: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP66]], i32 0, i32 0 +// CHECK: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP67]], i32 0, i32 0 +// CHECK: [[TMP69:%.*]] = load i8*, i8** [[TMP68]], align 8 +// CHECK: [[TMP70:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP69]], i8* [[TMP70]], i64 72, i32 8, i1 false) +// CHECK: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP66]], i32 0, i32 1 +// CHECK: [[TMP72:%.*]] = bitcast i8* [[TMP69]] to %struct.anon* +// CHECK: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP71]], i32 0, i32 0 +// CHECK: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP72]], i32 0, i32 1 +// CHECK: [[REF:%.*]] = load i32*, i32** [[TMP74]], align 8 +// CHECK: [[TMP75:%.*]] = load i32, i32* [[REF]], align 4 +// CHECK: store i32 [[TMP75]], i32* [[TMP73]], align 8 +// CHECK: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP71]], i32 0, i32 1 +// CHECK: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP72]], i32 0, i32 3 +// CHECK: [[REF13:%.*]] = load [100 x float]*, [100 x float]** [[TMP77]], align 8 +// CHECK: [[TMP78:%.*]] = bitcast [100 x float]* [[TMP76]] to i8* +// CHECK: [[TMP79:%.*]] = bitcast [100 x float]* [[REF13]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP78]], i8* [[TMP79]], i64 400, i32 4, i1 false) +// CHECK: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP71]], i32 0, i32 2 +// CHECK: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP72]], i32 0, i32 4 +// CHECK: [[REF14:%.*]] = load [100 x float]*, [100 x float]** [[TMP81]], align 8 +// CHECK: [[TMP82:%.*]] = bitcast [100 x float]* [[TMP80]] to i8* +// CHECK: [[TMP83:%.*]] = bitcast [100 x float]* [[REF14]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP82]], i8* [[TMP83]], i64 400, i32 4, i1 false) +// CHECK: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP67]], i32 0, i32 5 +// CHECK: store i64 0, i64* [[TMP84]], align 8 +// CHECK: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP67]], i32 0, i32 6 +// CHECK: [[TMP86:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK: [[CONV15:%.*]] = sext i32 [[TMP86]] to i64 +// CHECK: store i64 [[CONV15]], i64* [[TMP85]], align 8 +// CHECK: [[TMP87:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP67]], i32 0, i32 7 +// CHECK: store i64 1, i64* [[TMP87]], align 8 +// CHECK: [[TMP88:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP67]], i32 0, i32 9 +// CHECK: store i8* [[TMP62]], i8** [[TMP88]], align 8 +// CHECK: [[TMP89:%.*]] = load i64, i64* [[TMP87]], align 8 +// CHECK: call void @__kmpc_taskloop(%ident_t* %{{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* [[TMP84]], i64* [[TMP85]], i64 [[TMP89]], i32 0, i32 0, i64 0, i8* null) +// CHECK: call void @__kmpc_end_taskgroup(%ident_t* + +// CHECK: ret i32 + +// CHECK: define internal void [[RED_INIT1]](i8*) +// CHECK: store float 0.000000e+00, float* % +// CHECK: ret void + +// CHECK: define internal void [[RED_COMB1]](i8*, i8*) +// CHECK: fadd float %6, %7 +// CHECK: store float %{{.+}}, float* % +// CHECK: ret void + +// CHECK: define internal void [[RED_INIT2]](i8*) +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64( +// CHECK: ret void + +// CHECK: define internal void [[RED_FINI2]](i8*) +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: call void @ +// CHECK: ret void + +// CHECK: define internal void [[RED_COMB2]](i8*, i8*) +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: fadd float % +// CHECK: store float %{{.+}}, float* % +// CHECK: ret void + +// CHECK: define internal void [[RED_INIT3]](i8*) +// CHECK: store float 0.000000e+00, float* % +// CHECK: ret void + +// CHECK: define internal void [[RED_COMB3]](i8*, i8*) +// CHECK: fadd float % +// CHECK: store float %{{.+}}, float* % +// CHECK: ret void + +// CHECK: define internal void [[RED_INIT4]](i8*) +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: store float 0.000000e+00, float* % +// CHECK: ret void + +// CHECK: define internal void [[RED_COMB4]](i8*, i8*) +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: fadd float % +// CHECK: store float %{{.+}}, float* % +// CHECK: ret void + _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits