Author: Alexey Bataev Date: 2020-01-27T14:53:25-05:00 New Revision: e6d2583e45ef3af7646d4a58bada23333e639121
URL: https://github.com/llvm/llvm-project/commit/e6d2583e45ef3af7646d4a58bada23333e639121 DIFF: https://github.com/llvm/llvm-project/commit/e6d2583e45ef3af7646d4a58bada23333e639121.diff LOG: [OPENMP50]Track changes of lastprivate conditional in parallel-based regions with reductions, lastprivates or linears clauses. If the lastprivate conditional variable is updated in inner parallel region with reduction, lastprivate or linear clause, the value must be considred as a candidate for lastprivate conditional. Also, tracking in inner parallel regions is not required. Added: clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp Modified: clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/CodeGen/CGOpenMPRuntime.h clang/lib/CodeGen/CGStmtOpenMP.cpp clang/test/OpenMP/for_lastprivate_codegen.cpp clang/test/OpenMP/sections_lastprivate_codegen.cpp Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 896cf378c16b..aecf150401bb 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -11357,25 +11357,7 @@ CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( } } Data.IVLVal = IVLVal; - // In simd only mode or for simd directives no need to generate threadprivate - // references for the loop iteration counter, we can use the original one - // since outlining cannot happen in simd regions. - if (CGF.getLangOpts().OpenMPSimd || - isOpenMPSimdDirective(S.getDirectiveKind())) { - Data.UseOriginalIV = true; - return; - } - PresumedLoc PLoc = - CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc()); - assert(PLoc.isValid() && "Source location is expected to be always valid."); - - llvm::sys::fs::UniqueID ID; - if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) - CGM.getDiags().Report(diag::err_cannot_open_file) - << PLoc.getFilename() << EC.message(); - Data.IVName = CGM.getOpenMPRuntime().getName( - {"pl_cond", llvm::utostr(ID.getDevice()), llvm::utostr(ID.getFile()), - llvm::utostr(PLoc.getLine()), llvm::utostr(PLoc.getColumn()), "iv"}); + Data.CGF = &CGF; } CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { @@ -11384,27 +11366,6 @@ CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); } -void CGOpenMPRuntime::initLastprivateConditionalCounter( - CodeGenFunction &CGF, const OMPExecutableDirective &S) { - if (CGM.getLangOpts().OpenMPSimd || - !llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), - [](const OMPLastprivateClause *C) { - return C->getKind() == OMPC_LASTPRIVATE_conditional; - })) - return; - const CGOpenMPRuntime::LastprivateConditionalData &Data = - LastprivateConditionalStack.back(); - if (Data.UseOriginalIV) - return; - // Global loop counter. Required to handle inner parallel-for regions. - // global_iv = iv; - Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( - CGF, Data.IVLVal.getType(), Data.IVName); - LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType()); - llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc()); - CGF.EmitStoreOfScalar(IVVal, GlobIVLVal); -} - namespace { /// Checks if the lastprivate conditional variable is referenced in LHS. class LastprivateConditionalRefChecker final @@ -11415,9 +11376,7 @@ class LastprivateConditionalRefChecker final const Decl *FoundD = nullptr; StringRef UniqueDeclName; LValue IVLVal; - StringRef IVName; SourceLocation Loc; - bool UseOriginalIV = false; public: bool VisitDeclRefExpr(const DeclRefExpr *E) { @@ -11430,8 +11389,6 @@ class LastprivateConditionalRefChecker final FoundD = E->getDecl()->getCanonicalDecl(); UniqueDeclName = It->getSecond(); IVLVal = D.IVLVal; - IVName = D.IVName; - UseOriginalIV = D.UseOriginalIV; break; } return FoundE == E; @@ -11448,8 +11405,6 @@ class LastprivateConditionalRefChecker final FoundD = E->getMemberDecl()->getCanonicalDecl(); UniqueDeclName = It->getSecond(); IVLVal = D.IVLVal; - IVName = D.IVName; - UseOriginalIV = D.UseOriginalIV; break; } return FoundE == E; @@ -11470,17 +11425,17 @@ class LastprivateConditionalRefChecker final CodeGenFunction &CGF, ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) : CGF(CGF), LPM(LPM) {} - std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool> + std::tuple<const Expr *, const Decl *, StringRef, LValue> getFoundData() const { - return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, - UseOriginalIV); + return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal); } }; } // namespace void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS) { - if (CGF.getLangOpts().OpenMP < 50) + if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty() || + LastprivateConditionalStack.back().CGF != &CGF) return; LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack); if (!Checker.Visit(LHS)) @@ -11489,10 +11444,7 @@ void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Decl *FoundD; StringRef UniqueDeclName; LValue IVLVal; - StringRef IVName; - bool UseOriginalIV; - std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) = - Checker.getFoundData(); + std::tie(FoundE, FoundD, UniqueDeclName, IVLVal) = Checker.getFoundData(); // Last updated loop counter for the lastprivate conditional var. // int<xx> last_iv = 0; @@ -11517,11 +11469,6 @@ void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, // Global loop counter. Required to handle inner parallel-for regions. // global_iv - if (!UseOriginalIV) { - Address IVAddr = - getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName); - IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType()); - } llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc()); // #pragma omp critical(a) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 8159f5e8b790..dbbf253c2dd0 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -233,11 +233,7 @@ class CGOpenMPRuntime { llvm::SmallDenseMap<CanonicalDeclPtr<const Decl>, SmallString<16>> DeclToUniqeName; LValue IVLVal; - SmallString<16> IVName; - /// True if original lvalue for loop counter can be used in codegen (simd - /// region or simd only mode) and no need to create threadprivate - /// references. - bool UseOriginalIV = false; + CodeGenFunction *CGF = nullptr; }; /// Manages list of lastprivate conditional decls for the specified directive. class LastprivateConditionalRAII { @@ -1692,11 +1688,6 @@ class CGOpenMPRuntime { /// current context. bool isNontemporalDecl(const ValueDecl *VD) const; - /// Initializes global counter for lastprivate conditional. - virtual void - initLastprivateConditionalCounter(CodeGenFunction &CGF, - const OMPExecutableDirective &S); - /// Checks if the provided \p LVal is lastprivate conditional and emits the /// code to update the value of the original variable. /// \code diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index f44405a03622..0e41d520da20 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -18,6 +18,7 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/DeclOpenMP.h" +#include "clang/AST/OpenMPClause.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtOpenMP.h" #include "clang/Basic/PrettyStackTrace.h" @@ -1332,6 +1333,19 @@ static void emitCommonOMPParallelDirective( CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, CapturedVars, IfCond); + // Check for outer lastprivate conditional update. + for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { + for (const Expr *Ref : C->varlists()) + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); + } + for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { + for (const Expr *Ref : C->varlists()) + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); + } + for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { + for (const Expr *Ref : C->varlists()) + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); + } } static void emitEmptyBoundParameters(CodeGenFunction &, @@ -1890,7 +1904,6 @@ void CodeGenFunction::EmitOMPSimdFinal( static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, const OMPLoopDirective &S, CodeGenFunction::JumpDest LoopExit) { - CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S); CGF.EmitOMPLoopBody(S, LoopExit); CGF.EmitStopPoint(&S); } @@ -2011,8 +2024,6 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, CGF.EmitOMPInnerLoop( S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), [&S](CodeGenFunction &CGF) { - CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter( - CGF, S); CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); CGF.EmitStopPoint(&S); }, @@ -2667,8 +2678,6 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( : S.getCond(), StaticChunkedOne ? S.getDistInc() : S.getInc(), [&S, LoopExit](CodeGenFunction &CGF) { - CGF.CGM.getOpenMPRuntime() - .initLastprivateConditionalCounter(CGF, S); CGF.EmitOMPLoopBody(S, LoopExit); CGF.EmitStopPoint(&S); }, @@ -2851,7 +2860,6 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { // break; // } // .omp.sections.exit: - CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S); llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); llvm::SwitchInst *SwitchStmt = CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), diff --git a/clang/test/OpenMP/for_lastprivate_codegen.cpp b/clang/test/OpenMP/for_lastprivate_codegen.cpp index b310055447be..6a2f4d6a0f95 100644 --- a/clang/test/OpenMP/for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/for_lastprivate_codegen.cpp @@ -176,7 +176,6 @@ char cnt; // CHECK-DAG: [[X:@.+]] = global double 0.0 // CHECK-DAG: [[F:@.+]] = global float 0.0 // CHECK-DAG: [[CNT:@.+]] = global i8 0 -// OMP50-DAG: [[IV_REF:@.+]] = {{.*}}common global i32 0 // OMP50-DAG: [[LAST_IV_F:@.+]] = {{.*}}common global i32 0 // OMP50-DAG: [[LAST_F:@.+]] = {{.*}}common global float 0.000000e+00, @@ -674,16 +673,10 @@ int main() { // CHECK-NEXT: [[LB:%.+]] = load i32, i32* [[OMP_LB]] // CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]] // <Skip loop body> -// OMP50: [[LOCAL_IV_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32* [[IV_REF]] to i8*), i64 4, i8*** @{{.+}}) -// OMP50: [[BC:%.+]] = bitcast i8* [[LOCAL_IV_REF]] to i32* -// OMP50: store i32 %{{.+}}, i32* [[BC]], // CHECK: store float 0.000000e+00, float* [[F_PRIV:%.+]], -// OMP50: [[LOCAL_IV_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32* [[IV_REF]] to i8*), i64 4, i8*** @{{.+}}) -// OMP50: [[BC:%.+]] = bitcast i8* [[LOCAL_IV_REF]] to i32* -// OMP50: [[IV:%.+]] = load i32, i32* [[BC]], // OMP50: call void @__kmpc_critical(%struct.ident_t* @{{.+}}, i32 [[GTID]], [8 x i32]* [[F_REGION:@.+]]) // OMP50: [[LAST_IV:%.+]] = load i32, i32* [[LAST_IV_F]], -// OMP50: [[CMP:%.+]] = icmp sle i32 [[LAST_IV]], [[IV]] +// OMP50: [[CMP:%.+]] = icmp sle i32 [[LAST_IV]], [[IV:%.+]] // OMP50: br i1 [[CMP]], label %[[LP_THEN:.+]], label %[[LP_DONE:[^,]+]] // OMP50: [[LP_THEN]]: diff --git a/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp b/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp new file mode 100644 index 000000000000..e05a5b977a4c --- /dev/null +++ b/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp @@ -0,0 +1,57 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +int main() { + int a = 0; +#pragma omp parallel for lastprivate(conditional: a) + for (int i = 0; i < 10; ++i) { + if (i < 5) { + a = 0; +#pragma omp parallel reduction(+:a) num_threads(10) + a += i; + } + } + return 0; +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}) + +// CHECK: define internal void [[OUTLINED]]( +// CHECK: call void @__kmpc_push_num_threads(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 10) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @{{.+}} to void (i32*, i32*, ...)*), i32* {{.+}} i32* %{{.+}}) +// CHECK: call void @__kmpc_critical(%struct.ident_t* @{{.+}}, i32 %{{.+}}, [8 x i32]* @{{.+}}) +// CHECK: [[LAST_IV_VAL:%.+]] = load i32, i32* [[LAST_IV:@.+]], +// CHECK: [[RES:%.+]] = icmp sle i32 [[LAST_IV_VAL]], [[IV:%.+]] +// CHECK: br i1 [[RES]], label %[[THEN:.+]], label %[[DONE:.+]] +// CHECK: [[THEN]]: +// CHECK: store i32 [[IV]], i32* [[LAST_IV]], +// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PRIV:%.+]], +// CHECK: store i32 [[A_VAL]], i32* [[A_GLOB:@.+]], +// CHECK: br label %[[DONE]] +// CHECK: [[DONE]]: +// CHECK: call void @__kmpc_end_critical(%struct.ident_t* @{{.+}}, i32 %{{.+}}, [8 x i32]* @{{.+}}) +// CHECK: call void @__kmpc_for_static_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}) +// CHECK: [[IS_LAST:%.+]] = load i32, i32* %{{.+}}, +// CHECK: [[RES:%.+]] = icmp ne i32 [[IS_LAST]], 0 +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) +// CHECK: br i1 [[RES]], label %[[THEN:.+]], label %[[DONE:.+]] +// CHECK: [[THEN]]: +// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_GLOB]], +// CHECK: store i32 [[A_VAL]], i32* [[A_PRIV]], +// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PRIV]], +// CHECK: store i32 [[A_VAL]], i32* %{{.+}}, +// CHECK: br label %[[DONE]] +// CHECK: [[DONE]]: +// CHECK: ret void + +#endif // HEADER diff --git a/clang/test/OpenMP/sections_lastprivate_codegen.cpp b/clang/test/OpenMP/sections_lastprivate_codegen.cpp index 93b417ad0293..5e323ad9f00f 100644 --- a/clang/test/OpenMP/sections_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/sections_lastprivate_codegen.cpp @@ -46,7 +46,6 @@ volatile int g = 1212; // CHECK: [[S_INT_TY:%.+]] = type { i32 } // CHECK-DAG: [[SECTIONS_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8* // CHECK-DAG: [[X:@.+]] = global double 0.0 -// OMP50-DAG: [[IV_REF:@.+]] = common global i32 0 // OMP50-DAG: [[LAST_IV_X:@.+]] = {{.*}}common global i32 0 // OMP50-DAG: [[LAST_X:@.+]] = {{.*}}common global double 0.000000e+00, template <typename T> @@ -294,15 +293,9 @@ int main() { // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] // CHECK: call void @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) // <Skip loop body> -// OMP50: [[IV_GLOB_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32* [[IV_REF]] to i8*), i64 4, i8*** @{{.+}}) -// OMP50: [[BC:%.+]] = bitcast i8* [[IV_GLOB_REF]] to i32* -// OMP50: store i32 %{{.+}}, i32* [[BC]], -// OMP50: [[LOCAL_IV_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32* [[IV_REF]] to i8*), i64 4, i8*** @{{.+}}) -// OMP50: [[BC:%.+]] = bitcast i8* [[LOCAL_IV_REF]] to i32* -// OMP50: [[IV:%.+]] = load i32, i32* [[BC]], // OMP50: call void @__kmpc_critical(%struct.ident_t* @{{.+}}, i32 [[GTID]], [8 x i32]* [[X_REGION:@.+]]) // OMP50: [[LAST_IV:%.+]] = load i32, i32* [[LAST_IV_X]], -// OMP50: [[CMP:%.+]] = icmp sle i32 [[LAST_IV]], [[IV]] +// OMP50: [[CMP:%.+]] = icmp sle i32 [[LAST_IV]], [[IV:%.+]] // OMP50: br i1 [[CMP]], label %[[LP_THEN:.+]], label %[[LP_DONE:[^,]+]] // OMP50: [[LP_THEN]]: _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits