================ @@ -4899,6 +4899,234 @@ void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, } } +void CGOpenMPRuntime::emitPrivateReduction( + CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps) { + if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty()) + return; + + if (LHSExprs.size() != Privates.size() || + LHSExprs.size() != ReductionOps.size()) + return; + + // Create a shared global variable (__shared_reduction_var) to accumulate the + // final result. + // + // Call __kmpc_barrier to synchronize threads before initialization. + // + // The master thread (thread_id == 0) initializes __shared_reduction_var + // with the identity value or initializer. + // + // Call __kmpc_barrier to synchronize before combining. + // For each i: + // - Thread enters critical section. + // - Reads its private value from LHSExprs[i]. + // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i], + // LHSExprs[i]). + // - Exits critical section. + // + // Call __kmpc_barrier after combining. ---------------- chandraghale wrote:
Agreed. Handling simple cases with atomics for performance is a good idea.I would like to address this in a follow-up patch. Currently, synchronization generated here is independent of the `nowait` clause. https://github.com/llvm/llvm-project/pull/134709 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits