================ @@ -4899,6 +4899,234 @@ void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, } } +void CGOpenMPRuntime::emitPrivateReduction( + CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps) { + if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty()) + return; + + if (LHSExprs.size() != Privates.size() || + LHSExprs.size() != ReductionOps.size()) + return; + + // Create a shared global variable (__shared_reduction_var) to accumulate the + // final result. + // + // Call __kmpc_barrier to synchronize threads before initialization. + // + // The master thread (thread_id == 0) initializes __shared_reduction_var + // with the identity value or initializer. + // + // Call __kmpc_barrier to synchronize before combining. + // For each i: + // - Thread enters critical section. + // - Reads its private value from LHSExprs[i]. + // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i], + // LHSExprs[i]). + // - Exits critical section. + // + // Call __kmpc_barrier after combining. + // + // Each thread copies __shared_reduction_var[i] back to LHSExprs[i]. + // + // Final __kmpc_barrier to synchronize after broadcasting + QualType PrivateType = Privates[0]->getType(); + llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType); + + llvm::Constant *InitVal = nullptr; + const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps[0]); + // Determine the initial value for the shared reduction variable + if (!UDR) { + InitVal = llvm::Constant::getNullValue(LLVMType); + if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates[0])) { + if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { + const Expr *InitExpr = VD->getInit(); + if (InitExpr && !PrivateType->isAggregateType() && + !PrivateType->isAnyComplexType()) { + Expr::EvalResult Result; + if (InitExpr->EvaluateAsRValue(Result, CGF.getContext())) { + APValue &InitValue = Result.Val; + if (InitValue.isInt()) + InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt()); + } + } + } + } + } else { + InitVal = llvm::Constant::getNullValue(LLVMType); + } + + // Create an internal shared variable + std::string SharedName = getName({"internal_private_var"}); + llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable( + CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage, + InitVal, ".omp.reduction." + SharedName, nullptr, + llvm::GlobalVariable::NotThreadLocal); + + SharedVar->setAlignment( + llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8)); + + Address SharedResult(SharedVar, SharedVar->getValueType(), + CGF.getContext().getTypeAlignInChars(PrivateType)); + + llvm::Value *ThreadId = getThreadID(CGF, Loc); + llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); + llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId}; + + // First barrier to ensure all threads are ready. + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_barrier), + BarrierArgs); + // Initialize the shared variable by the master thread. + llvm::BasicBlock *InitBB = CGF.createBasicBlock("init"); + llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end"); + + llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ( + ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0)); + CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB); + + CGF.EmitBlock(InitBB); + + auto EmitSharedInit = [&]() { + if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates[0])) { ---------------- chandraghale wrote:
Yes I guess I had n't handled this correctly. updated the code to iterate over all privates to support multiple reduced variables. I tested it with a few cases, where it worked as expected. https://github.com/llvm/llvm-project/pull/134709 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits