================ @@ -4645,12 +4789,23 @@ initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp, (maxThreadsVal >= 0 && maxThreadsVal < combinedMaxThreadsVal)) combinedMaxThreadsVal = maxThreadsVal; + // Calculate reduction data size, limited to single reduction variable for + // now. + int32_t reductionDataSize = 0; + if (isGPU && capturedOp) { + if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp)) + reductionDataSize = getReductionDataSize(teamsOp); + } + // Update kernel bounds structure for the `OpenMPIRBuilder` to use. attrs.ExecFlags = targetOp.getKernelExecFlags(capturedOp); attrs.MinTeams = minTeamsVal; attrs.MaxTeams.front() = maxTeamsVal; attrs.MinThreads = 1; attrs.MaxThreads.front() = combinedMaxThreadsVal; + attrs.ReductionDataSize = reductionDataSize; + if (attrs.ReductionDataSize != 0) + attrs.ReductionBufferLength = 1024; ---------------- skatrak wrote:
Nit: Could you add some comment to document the reasoning for this size, or add a TODO to actually calculate this based on the actual reductions? https://github.com/llvm/llvm-project/pull/133310 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits