================
@@ -4645,12 +4789,23 @@ initTargetDefaultAttrs(omp::TargetOp targetOp, 
Operation *capturedOp,
       (maxThreadsVal >= 0 && maxThreadsVal < combinedMaxThreadsVal))
     combinedMaxThreadsVal = maxThreadsVal;
 
+  // Calculate reduction data size, limited to single reduction variable for
+  // now.
+  int32_t reductionDataSize = 0;
+  if (isGPU && capturedOp) {
+    if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp))
+      reductionDataSize = getReductionDataSize(teamsOp);
+  }
+
   // Update kernel bounds structure for the `OpenMPIRBuilder` to use.
   attrs.ExecFlags = targetOp.getKernelExecFlags(capturedOp);
   attrs.MinTeams = minTeamsVal;
   attrs.MaxTeams.front() = maxTeamsVal;
   attrs.MinThreads = 1;
   attrs.MaxThreads.front() = combinedMaxThreadsVal;
+  attrs.ReductionDataSize = reductionDataSize;
+  if (attrs.ReductionDataSize != 0)
+    attrs.ReductionBufferLength = 1024;
----------------
jsjodin wrote:

I looked into this a bit more and it is basically a fixed value in clang that 
can be modified with the fopenmp-cuda-teams-reduction-recs-num flang. I added a 
TODO for now.

https://github.com/llvm/llvm-project/pull/133310
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to