llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-codegen Author: Joseph Huber (jhuber6) <details> <summary>Changes</summary> Summary: This patch attempts to remove the dependency on `libffi` by instead emitting the host / CPU kernels using an aggregate struct made from the captured context. This callows us to have a fixed function prototype we can call directly rather than requiring an extra library to decode the ABI to call a function with N (non variadic) arguments. NOTE: This currently fails for tests using a non-constant value for `num_teams` on the CPU. It seems that these use a method called `CGF.EmitScalarExpr(NumTeams)` which doesn't seem to work correctly with the created aggregate struct. --- Patch is 1.07 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91264.diff 38 Files Affected: - (modified) clang/lib/CodeGen/CGOpenMPRuntime.cpp (+6-2) - (modified) clang/lib/CodeGen/CGStmtOpenMP.cpp (+126) - (modified) clang/lib/CodeGen/CodeGenFunction.h (+3) - (modified) clang/test/OpenMP/declare_target_codegen.cpp (+3-3) - (modified) clang/test/OpenMP/declare_target_link_codegen.cpp (+1-1) - (modified) clang/test/OpenMP/distribute_codegen.cpp (+90-76) - (modified) clang/test/OpenMP/distribute_simd_codegen.cpp (+196-160) - (modified) clang/test/OpenMP/openmp_offload_codegen.cpp (+1-1) - (modified) clang/test/OpenMP/target_firstprivate_codegen.cpp (+704-644) - (modified) clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp (+170-102) - (modified) clang/test/OpenMP/target_parallel_codegen.cpp (+264-210) - (modified) clang/test/OpenMP/target_parallel_for_codegen.cpp (+306-240) - (modified) clang/test/OpenMP/target_parallel_for_simd_codegen.cpp (+638-498) - (modified) clang/test/OpenMP/target_parallel_generic_loop_codegen-2.cpp (+48-28) - (modified) clang/test/OpenMP/target_parallel_if_codegen.cpp (+178-106) - (modified) clang/test/OpenMP/target_parallel_num_threads_codegen.cpp (+154-94) - (modified) clang/test/OpenMP/target_private_codegen.cpp (+361-116) - (modified) clang/test/OpenMP/target_reduction_codegen.cpp (+290-106) - (modified) clang/test/OpenMP/target_task_affinity_codegen.cpp (+72-70) - (modified) clang/test/OpenMP/target_teams_codegen.cpp (+402-298) - (modified) clang/test/OpenMP/target_teams_distribute_codegen.cpp (+330-256) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp (+82-60) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp (+160-139) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp (+56-41) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp (+110-80) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp (+160-139) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp (+56-41) - (modified) clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp (+652-504) - (modified) clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp (+82-60) - (modified) clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp (+40-25) - (modified) clang/test/OpenMP/target_teams_map_codegen.cpp (+170-142) - (modified) clang/test/OpenMP/target_teams_num_teams_codegen.cpp (+154-94) - (modified) clang/test/OpenMP/target_teams_thread_limit_codegen.cpp (+164-100) - (modified) clang/test/OpenMP/teams_codegen.cpp (+104-60) - (modified) offload/plugins-nextgen/host/CMakeLists.txt (-13) - (removed) offload/plugins-nextgen/host/dynamic_ffi/ffi.cpp (-75) - (removed) offload/plugins-nextgen/host/dynamic_ffi/ffi.h (-78) - (modified) offload/plugins-nextgen/host/src/rtl.cpp (+23-18) ``````````diff diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index e39c7c58d2780e..3cd4bcff2f5852 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -5932,12 +5932,16 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( CodeGenFunction CGF(CGM, true); llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction = - [&CGF, &D, &CodeGen](StringRef EntryFnName) { + [&CGF, &D, &CodeGen, this](StringRef EntryFnName) { const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); + if (CGM.getLangOpts().OpenMPIsTargetDevice && !isGPU()) + return CGF.GenerateOpenMPCapturedStmtFunctionAggregate( + CS, D.getBeginLoc()); + else + return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); }; OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index ef3aa3a8e0dc61..b9d27815a8ae24 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -613,6 +613,102 @@ static llvm::Function *emitOutlinedFunctionPrologue( return F; } +static llvm::Function *emitOutlinedFunctionPrologueAggregate( + CodeGenFunction &CGF, FunctionArgList &Args, + llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> + &LocalAddrs, + llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> + &VLASizes, + llvm::Value *&CXXThisValue, const CapturedStmt &CS, SourceLocation Loc, + StringRef FunctionName) { + const CapturedDecl *CD = CS.getCapturedDecl(); + const RecordDecl *RD = CS.getCapturedRecordDecl(); + + CXXThisValue = nullptr; + // Build the argument list. + CodeGenModule &CGM = CGF.CGM; + ASTContext &Ctx = CGM.getContext(); + Args.append(CD->param_begin(), CD->param_end()); + + // Create the function declaration. + const CGFunctionInfo &FuncInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); + llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); + + auto *F = + llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, + FunctionName, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); + if (CD->isNothrow()) + F->setDoesNotThrow(); + F->setDoesNotRecurse(); + + // Generate the function. + CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, Loc, Loc); + Address ContextAddr = CGF.GetAddrOfLocalVar(CD->getContextParam()); + llvm::Value *ContextV = CGF.Builder.CreateLoad(ContextAddr); + LValue ContextLV = CGF.MakeNaturalAlignAddrLValue( + ContextV, CGM.getContext().getTagDeclType(RD)); + auto I = CS.captures().begin(); + for (const FieldDecl *FD : RD->fields()) { + LValue FieldLV = CGF.EmitLValueForFieldInitialization(ContextLV, FD); + // Do not map arguments if we emit function with non-original types. + Address LocalAddr = FieldLV.getAddress(CGF); + // If we are capturing a pointer by copy we don't need to do anything, just + // use the value that we get from the arguments. + if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { + const VarDecl *CurVD = I->getCapturedVar(); + LocalAddrs.insert({FD, {CurVD, LocalAddr}}); + ++I; + continue; + } + + LValue ArgLVal = + CGF.MakeAddrLValue(LocalAddr, FD->getType(), AlignmentSource::Decl); + if (FD->hasCapturedVLAType()) { + llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); + const VariableArrayType *VAT = FD->getCapturedVLAType(); + VLASizes.try_emplace(FD, VAT->getSizeExpr(), ExprArg); + } else if (I->capturesVariable()) { + const VarDecl *Var = I->getCapturedVar(); + QualType VarTy = Var->getType(); + Address ArgAddr = ArgLVal.getAddress(CGF); + if (ArgLVal.getType()->isLValueReferenceType()) { + ArgAddr = CGF.EmitLoadOfReference(ArgLVal); + } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { + assert(ArgLVal.getType()->isPointerType()); + ArgAddr = CGF.EmitLoadOfPointer( + ArgAddr, ArgLVal.getType()->castAs<PointerType>()); + } + LocalAddrs.insert( + {FD, + {Var, Address(ArgAddr.getBasePointer(), ArgAddr.getElementType(), + Ctx.getDeclAlign(Var))}}); + } else if (I->capturesVariableByCopy()) { + assert(!FD->getType()->isAnyPointerType() && + "Not expecting a captured pointer."); + const VarDecl *Var = I->getCapturedVar(); + Address CopyAddr = CGF.CreateMemTemp(FD->getType(), Ctx.getDeclAlign(FD), + Var->getName()); + LValue CopyLVal = + CGF.MakeAddrLValue(CopyAddr, FD->getType(), AlignmentSource::Decl); + + RValue ArgRVal = CGF.EmitLoadOfLValue(ArgLVal, I->getLocation()); + CGF.EmitStoreThroughLValue(ArgRVal, CopyLVal); + + LocalAddrs.insert({FD, {Var, CopyAddr}}); + } else { + // If 'this' is captured, load it into CXXThisValue. + assert(I->capturesThis()); + CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); + LocalAddrs.insert({FD, {nullptr, ArgLVal.getAddress(CGF)}}); + } + ++I; + } + + return F; +} + llvm::Function * CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, SourceLocation Loc) { @@ -695,6 +791,36 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, return WrapperF; } +llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunctionAggregate( + const CapturedStmt &S, SourceLocation Loc) { + assert( + CapturedStmtInfo && + "CapturedStmtInfo should be set when generating the captured function"); + const CapturedDecl *CD = S.getCapturedDecl(); + // Build the argument list. + FunctionArgList Args; + llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; + llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; + StringRef FunctionName = CapturedStmtInfo->getHelperName(); + llvm::Function *F = emitOutlinedFunctionPrologueAggregate( + *this, Args, LocalAddrs, VLASizes, CXXThisValue, S, Loc, FunctionName); + CodeGenFunction::OMPPrivateScope LocalScope(*this); + for (const auto &LocalAddrPair : LocalAddrs) { + if (LocalAddrPair.second.first) { + LocalScope.addPrivate(LocalAddrPair.second.first, + LocalAddrPair.second.second); + } + } + (void)LocalScope.Privatize(); + for (const auto &VLASizePair : VLASizes) + VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; + PGO.assignRegionCounters(GlobalDecl(CD), F); + CapturedStmtInfo->EmitBody(*this, CD->getBody()); + (void)LocalScope.ForceCleanup(); + FinishFunction(CD->getBodyRBrace()); + return F; +} + //===----------------------------------------------------------------------===// // OpenMP Directive Emission //===----------------------------------------------------------------------===// diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index e1e687af6a781b..4ad4b96767f795 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3639,6 +3639,9 @@ class CodeGenFunction : public CodeGenTypeCache { Address GenerateCapturedStmtArgument(const CapturedStmt &S); llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, SourceLocation Loc); + llvm::Function * + GenerateOpenMPCapturedStmtFunctionAggregate(const CapturedStmt &S, + SourceLocation Loc); void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars); void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy, diff --git a/clang/test/OpenMP/declare_target_codegen.cpp b/clang/test/OpenMP/declare_target_codegen.cpp index ba93772ede3e8e..81116c6617b5bd 100644 --- a/clang/test/OpenMP/declare_target_codegen.cpp +++ b/clang/test/OpenMP/declare_target_codegen.cpp @@ -150,7 +150,7 @@ int bar() { return 1 + foo() + bar() + baz1() + baz2(); } int maini1() { int a; static long aa = 32 + bbb + ccc + fff + ggg; -// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}maini1{{.*}}_l[[@LINE+1]](ptr {{.*}}, ptr noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %{{.*}}, i64 {{.*}}, i64 {{.*}}) +// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}maini1{{.*}}_l[[@LINE+1]](ptr {{.*}}, ptr {{.*}}) #pragma omp target map(tofrom \ : a, b) { @@ -163,7 +163,7 @@ int maini1() { int baz3() { return 2 + baz2(); } int baz2() { -// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}baz2{{.*}}_l[[@LINE+1]](ptr {{.*}}, i64 {{.*}}) +// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}baz2{{.*}}_l[[@LINE+1]](ptr {{.*}}, ptr {{.*}}) #pragma omp target parallel ++c; return 2 + baz3(); @@ -175,7 +175,7 @@ static __typeof(create) __t_create __attribute__((__weakref__("__create"))); int baz5() { bool a; -// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}baz5{{.*}}_l[[@LINE+1]](ptr {{.*}}, i64 {{.*}}) +// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}baz5{{.*}}_l[[@LINE+1]](ptr {{.*}}, ptr {{.*}}) #pragma omp target a = __extension__(void *) & __t_create != 0; return a; diff --git a/clang/test/OpenMP/declare_target_link_codegen.cpp b/clang/test/OpenMP/declare_target_link_codegen.cpp index 189c9ac59c153c..ba63a4bc543476 100644 --- a/clang/test/OpenMP/declare_target_link_codegen.cpp +++ b/clang/test/OpenMP/declare_target_link_codegen.cpp @@ -52,7 +52,7 @@ int maini1() { return 0; } -// DEVICE: define weak_odr protected void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l44(ptr {{[^,]+}}, ptr noundef nonnull align {{[0-9]+}} dereferenceable{{[^,]*}} +// DEVICE: define weak_odr protected void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l44(ptr {{[^,]+}}, ptr {{[^,]*}} // DEVICE: [[C_REF:%.+]] = load ptr, ptr @c_decl_tgt_ref_ptr, // DEVICE: [[C:%.+]] = load i32, ptr [[C_REF]], // DEVICE: store i32 [[C]], ptr % diff --git a/clang/test/OpenMP/distribute_codegen.cpp b/clang/test/OpenMP/distribute_codegen.cpp index 34d14c89fedaed..aaa28980839668 100644 --- a/clang/test/OpenMP/distribute_codegen.cpp +++ b/clang/test/OpenMP/distribute_codegen.cpp @@ -1947,19 +1947,18 @@ int fint(void) { return ftemplate<int>(); } // // // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56 -// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56.omp_outlined, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56.omp_outlined, ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]) // CHECK17-NEXT: ret void // // @@ -2058,19 +2057,18 @@ int fint(void) { return ftemplate<int>(); } // // // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68 -// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68.omp_outlined, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68.omp_outlined, ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]) // CHECK17-NEXT: ret void // // @@ -2169,19 +2167,18 @@ int fint(void) { return ftemplate<int>(); } // // // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80 -// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80.omp_outlined, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80.omp_outlined, ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]) // CHECK17-NEXT: ret void // // @@ -2297,13 +2294,18 @@ int fint(void) { return ftemplate<int>(); } // // // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92 -// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i8, align 1 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92.omp_outlined, ptr [[A_ADDR]]) +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK17-NEXT: store i8 [[TMP2]], ptr [[A]], align 1 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92.omp_outlined, ptr [[A]]) // CHECK17-NEXT: ret void // // @@ -2401,13 +2403,18 @@ int fint(void) { return ftemplate<int>(); } // // // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108 -// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108.omp_outlined, ptr [[AA_ADDR]]) +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// C... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/91264 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits