Author: Erich Keane
Date: 2025-05-19T10:40:10-07:00
New Revision: db4c94f96410fd4432f8de9e9b1dec44ca07b6d8

URL: 
https://github.com/llvm/llvm-project/commit/db4c94f96410fd4432f8de9e9b1dec44ca07b6d8
DIFF: 
https://github.com/llvm/llvm-project/commit/db4c94f96410fd4432f8de9e9b1dec44ca07b6d8.diff

LOG: [OpenACC][CIR] Implement beginning of 'copy' lowering for compute con… 
(#140304)

…structs

This is a partial implementation of the 'copy' lowering. It is missing 3
things, which are coming in future patches:

1- does not handle subscript/subarrays for emission as variables 2- does
not handle member expressions for emissions as variables 3- does not
handle modifier-list

1 and 2 are because of the complexity and should be split off into a
separate patch. 3 is because it isn't clear how the IR is going to
handle this, and I'd like to make sure it gets done 'all at once' when
the IR is updated to handle these, so I'm pushing that off to the
future.

This DOES however handle the complexity of having a acc.copyin and
acc.copyout, plus the additional complexity of the 'async' clause.

Added: 
    clang/test/CIR/CodeGenOpenACC/compute-copy.c

Modified: 
    clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
    clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
    clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h 
b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
index 9adbe6a497214..ecbc8ce6b525a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
@@ -14,6 +14,7 @@
 
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "llvm/ADT/TypeSwitch.h"
 namespace clang {
 // Simple type-trait to see if the first template arg is one of the list, so we
 // can tell whether to `if-constexpr` a bunch of stuff.
@@ -36,6 +37,72 @@ template <typename ToTest> constexpr bool isCombinedType = 
false;
 template <typename T>
 constexpr bool isCombinedType<CombinedConstructClauseInfo<T>> = true;
 
+namespace {
+struct DataOperandInfo {
+  mlir::Location beginLoc;
+  mlir::Value varValue;
+  llvm::StringRef name;
+};
+
+inline mlir::Value emitOpenACCIntExpr(CIRGen::CIRGenFunction &cgf,
+                                      CIRGen::CIRGenBuilderTy &builder,
+                                      const Expr *intExpr) {
+  mlir::Value expr = cgf.emitScalarExpr(intExpr);
+  mlir::Location exprLoc = cgf.cgm.getLoc(intExpr->getBeginLoc());
+
+  mlir::IntegerType targetType = mlir::IntegerType::get(
+      &cgf.getMLIRContext(), cgf.getContext().getIntWidth(intExpr->getType()),
+      intExpr->getType()->isSignedIntegerOrEnumerationType()
+          ? mlir::IntegerType::SignednessSemantics::Signed
+          : mlir::IntegerType::SignednessSemantics::Unsigned);
+
+  auto conversionOp = builder.create<mlir::UnrealizedConversionCastOp>(
+      exprLoc, targetType, expr);
+  return conversionOp.getResult(0);
+}
+
+// A helper function that gets the information from an operand to a data
+// clause, so that it can be used to emit the data operations.
+inline DataOperandInfo getDataOperandInfo(CIRGen::CIRGenFunction &cgf,
+                                          CIRGen::CIRGenBuilderTy &builder,
+                                          OpenACCDirectiveKind dk,
+                                          const Expr *e) {
+  // TODO: OpenACC: Cache was 
diff erent enough as to need a separate
+  // `ActOnCacheVar`, so we are going to need to do some investigations here
+  // when it comes to implement this for cache.
+  if (dk == OpenACCDirectiveKind::Cache) {
+    cgf.cgm.errorNYI(e->getSourceRange(),
+                     "OpenACC data operand for 'cache' directive");
+    return {cgf.cgm.getLoc(e->getBeginLoc()), {}, {}};
+  }
+
+  const Expr *curVarExpr = e->IgnoreParenImpCasts();
+
+  mlir::Location exprLoc = cgf.cgm.getLoc(curVarExpr->getBeginLoc());
+
+  // TODO: OpenACC: Assemble the list of bounds.
+  if (isa<ArraySectionExpr, ArraySubscriptExpr>(curVarExpr)) {
+    cgf.cgm.errorNYI(curVarExpr->getSourceRange(),
+                     "OpenACC data clause array subscript/section");
+    return {exprLoc, {}, {}};
+  }
+
+  // TODO: OpenACC: if this is a member expr, emit the VarPtrPtr correctly.
+  if (isa<MemberExpr>(curVarExpr)) {
+    cgf.cgm.errorNYI(curVarExpr->getSourceRange(),
+                     "OpenACC Data clause member expr");
+    return {exprLoc, {}, {}};
+  }
+
+  // Sema has made sure that only 4 types of things can get here, array
+  // subscript, array section, member expr, or DRE to a var decl (or the former
+  // 3 wrapping a var-decl), so we should be able to assume this is right.
+  const auto *dre = cast<DeclRefExpr>(curVarExpr);
+  const auto *vd = cast<VarDecl>(dre->getFoundDecl()->getCanonicalDecl());
+  return {exprLoc, cgf.emitDeclRefLValue(dre).getPointer(), vd->getName()};
+}
+} //  namespace
+
 template <typename OpTy>
 class OpenACCClauseCIREmitter final
     : public OpenACCClauseVisitor<OpenACCClauseCIREmitter<OpTy>> {
@@ -54,6 +121,11 @@ class OpenACCClauseCIREmitter final
   SourceLocation dirLoc;
 
   llvm::SmallVector<mlir::acc::DeviceType> lastDeviceTypeValues;
+  // Keep track of the async-clause so that we can shortcut updating the data
+  // operands async clauses.
+  bool hasAsyncClause = false;
+  // Keep track of the data operands so that we can update their async clauses.
+  llvm::SmallVector<mlir::Operation *> dataOperands;
 
   void setLastDeviceTypeClause(const OpenACCDeviceTypeClause &clause) {
     lastDeviceTypeValues.clear();
@@ -69,19 +141,8 @@ class OpenACCClauseCIREmitter final
     cgf.cgm.errorNYI(c.getSourceRange(), "OpenACC Clause", c.getClauseKind());
   }
 
-  mlir::Value createIntExpr(const Expr *intExpr) {
-    mlir::Value expr = cgf.emitScalarExpr(intExpr);
-    mlir::Location exprLoc = cgf.cgm.getLoc(intExpr->getBeginLoc());
-
-    mlir::IntegerType targetType = mlir::IntegerType::get(
-        &cgf.getMLIRContext(), 
cgf.getContext().getIntWidth(intExpr->getType()),
-        intExpr->getType()->isSignedIntegerOrEnumerationType()
-            ? mlir::IntegerType::SignednessSemantics::Signed
-            : mlir::IntegerType::SignednessSemantics::Unsigned);
-
-    auto conversionOp = builder.create<mlir::UnrealizedConversionCastOp>(
-        exprLoc, targetType, expr);
-    return conversionOp.getResult(0);
+  mlir::Value emitOpenACCIntExpr(const Expr *intExpr) {
+    return clang::emitOpenACCIntExpr(cgf, builder, intExpr);
   }
 
   // 'condition' as an OpenACC grammar production is used for 'if' and (some
@@ -157,6 +218,104 @@ class OpenACCClauseCIREmitter final
     computeEmitter.Visit(&c);
   }
 
+  template <typename BeforeOpTy, typename AfterOpTy>
+  void addDataOperand(const Expr *varOperand, mlir::acc::DataClause dataClause,
+                      bool structured, bool implicit) {
+    DataOperandInfo opInfo =
+        getDataOperandInfo(cgf, builder, dirKind, varOperand);
+    mlir::ValueRange bounds;
+
+    // TODO: OpenACC: we should comprehend the 'modifier-list' here for the 
data
+    // operand. At the moment, we don't have a uniform way to assign these
+    // properly, and the dialect cannot represent anything other than 
'readonly'
+    // and 'zero' on copyin/copyout/create, so for now, we skip it.
+
+    auto beforeOp =
+        builder.create<BeforeOpTy>(opInfo.beginLoc, opInfo.varValue, 
structured,
+                                   implicit, opInfo.name, bounds);
+    operation.getDataClauseOperandsMutable().append(beforeOp.getResult());
+
+    AfterOpTy afterOp;
+    {
+      mlir::OpBuilder::InsertionGuard guardCase(builder);
+      builder.setInsertionPointAfter(operation);
+      afterOp = builder.create<AfterOpTy>(opInfo.beginLoc, 
beforeOp.getResult(),
+                                          opInfo.varValue, structured, 
implicit,
+                                          opInfo.name, bounds);
+    }
+
+    // Set the 'rest' of the info for both operations.
+    beforeOp.setDataClause(dataClause);
+    afterOp.setDataClause(dataClause);
+
+    // Make sure we record these, so 'async' values can be updated later.
+    dataOperands.push_back(beforeOp.getOperation());
+    dataOperands.push_back(afterOp.getOperation());
+  }
+
+  // Helper function that covers for the fact that we don't have this function
+  // on all operation types.
+  mlir::ArrayAttr getAsyncOnlyAttr() {
+    if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp, 
mlir::acc::SerialOp,
+                               mlir::acc::KernelsOp, mlir::acc::DataOp>)
+      return operation.getAsyncOnlyAttr();
+
+    // Note: 'wait' has async as well, but it cannot have data clauses, so we
+    // don't have to handle them here.
+
+    llvm_unreachable("getting asyncOnly when clause not valid on operation?");
+  }
+
+  // Helper function that covers for the fact that we don't have this function
+  // on all operation types.
+  mlir::ArrayAttr getAsyncOperandsDeviceTypeAttr() {
+    if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp, 
mlir::acc::SerialOp,
+                               mlir::acc::KernelsOp, mlir::acc::DataOp>)
+      return operation.getAsyncOperandsDeviceTypeAttr();
+
+    // Note: 'wait' has async as well, but it cannot have data clauses, so we
+    // don't have to handle them here.
+
+    llvm_unreachable(
+        "getting asyncOperandsDeviceType when clause not valid on operation?");
+  }
+
+  // Helper function that covers for the fact that we don't have this function
+  // on all operation types.
+  mlir::OperandRange getAsyncOperands() {
+    if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp, 
mlir::acc::SerialOp,
+                               mlir::acc::KernelsOp, mlir::acc::DataOp>)
+      return operation.getAsyncOperands();
+
+    // Note: 'wait' has async as well, but it cannot have data clauses, so we
+    // don't have to handle them here.
+
+    llvm_unreachable(
+        "getting asyncOperandsDeviceType when clause not valid on operation?");
+  }
+
+  // The 'data' clauses all require that we add the 'async' values from the
+  // operation to them. We've collected the data operands along the way, so use
+  // that list to get the current 'async' values.
+  void updateDataOperandAsyncValues() {
+    if (!hasAsyncClause || dataOperands.empty())
+      return;
+
+    // TODO: OpenACC: Handle this correctly for combined constructs.
+
+    for (mlir::Operation *dataOp : dataOperands) {
+      llvm::TypeSwitch<mlir::Operation *, void>(dataOp)
+          .Case<ACC_DATA_ENTRY_OPS, ACC_DATA_EXIT_OPS>([&](auto op) {
+            op.setAsyncOnlyAttr(getAsyncOnlyAttr());
+            
op.setAsyncOperandsDeviceTypeAttr(getAsyncOperandsDeviceTypeAttr());
+            op.getAsyncOperandsMutable().assign(getAsyncOperands());
+          })
+          .Default([&](mlir::Operation *) {
+            llvm_unreachable("Not a data operation?");
+          });
+    }
+  }
+
 public:
   OpenACCClauseCIREmitter(OpTy &operation, CIRGen::CIRGenFunction &cgf,
                           CIRGen::CIRGenBuilderTy &builder,
@@ -168,6 +327,14 @@ class OpenACCClauseCIREmitter final
     clauseNotImplemented(clause);
   }
 
+  // The entry point for the CIR emitter. All users should use this rather than
+  // 'visitClauseList', as this also handles the things that have to happen
+  // 'after' the clauses are all visited.
+  void emitClauses(ArrayRef<const OpenACCClause *> clauses) {
+    this->VisitClauseList(clauses);
+    updateDataOperandAsyncValues();
+  }
+
   void VisitDefaultClause(const OpenACCDefaultClause &clause) {
     // This type-trait checks if 'op'(the first arg) is one of the mlir::acc
     // operations listed in the rest of the arguments.
@@ -227,7 +394,7 @@ class OpenACCClauseCIREmitter final
     if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp,
                                mlir::acc::KernelsOp>) {
       operation.addNumWorkersOperand(builder.getContext(),
-                                     createIntExpr(clause.getIntExpr()),
+                                     emitOpenACCIntExpr(clause.getIntExpr()),
                                      lastDeviceTypeValues);
     } else if constexpr (isCombinedType<OpTy>) {
       applyToComputeOp(clause);
@@ -240,7 +407,7 @@ class OpenACCClauseCIREmitter final
     if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp,
                                mlir::acc::KernelsOp>) {
       operation.addVectorLengthOperand(builder.getContext(),
-                                       createIntExpr(clause.getIntExpr()),
+                                       emitOpenACCIntExpr(clause.getIntExpr()),
                                        lastDeviceTypeValues);
     } else if constexpr (isCombinedType<OpTy>) {
       applyToComputeOp(clause);
@@ -250,14 +417,26 @@ class OpenACCClauseCIREmitter final
   }
 
   void VisitAsyncClause(const OpenACCAsyncClause &clause) {
+    hasAsyncClause = true;
     if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp, 
mlir::acc::SerialOp,
                                mlir::acc::KernelsOp, mlir::acc::DataOp>) {
       if (!clause.hasIntExpr())
         operation.addAsyncOnly(builder.getContext(), lastDeviceTypeValues);
-      else
-        operation.addAsyncOperand(builder.getContext(),
-                                  createIntExpr(clause.getIntExpr()),
+      else {
+
+        mlir::Value intExpr;
+        {
+          // Async int exprs can be referenced by the data operands, which 
means
+          // that the int-exprs have to appear before them.  IF there is a data
+          // operand already, set the insertion point to 'before' it.
+          mlir::OpBuilder::InsertionGuard guardCase(builder);
+          if (!dataOperands.empty())
+            builder.setInsertionPoint(dataOperands.front());
+          intExpr = emitOpenACCIntExpr(clause.getIntExpr());
+        }
+        operation.addAsyncOperand(builder.getContext(), intExpr,
                                   lastDeviceTypeValues);
+      }
     } else if constexpr (isOneOfTypes<OpTy, mlir::acc::WaitOp>) {
       // Wait doesn't have a device_type, so its handling here is slightly
       // 
diff erent.
@@ -265,7 +444,7 @@ class OpenACCClauseCIREmitter final
         operation.setAsync(true);
       else
         operation.getAsyncOperandMutable().append(
-            createIntExpr(clause.getIntExpr()));
+            emitOpenACCIntExpr(clause.getIntExpr()));
     } else if constexpr (isCombinedType<OpTy>) {
       applyToComputeOp(clause);
     } else {
@@ -321,7 +500,7 @@ class OpenACCClauseCIREmitter final
     if constexpr (isOneOfTypes<OpTy, mlir::acc::InitOp, mlir::acc::ShutdownOp,
                                mlir::acc::SetOp>) {
       operation.getDeviceNumMutable().append(
-          createIntExpr(clause.getIntExpr()));
+          emitOpenACCIntExpr(clause.getIntExpr()));
     } else {
       llvm_unreachable(
           "init, shutdown, set, are only valid device_num constructs");
@@ -333,7 +512,7 @@ class OpenACCClauseCIREmitter final
                                mlir::acc::KernelsOp>) {
       llvm::SmallVector<mlir::Value> values;
       for (const Expr *E : clause.getIntExprs())
-        values.push_back(createIntExpr(E));
+        values.push_back(emitOpenACCIntExpr(E));
 
       operation.addNumGangsOperands(builder.getContext(), values,
                                     lastDeviceTypeValues);
@@ -352,9 +531,9 @@ class OpenACCClauseCIREmitter final
       } else {
         llvm::SmallVector<mlir::Value> values;
         if (clause.hasDevNumExpr())
-          values.push_back(createIntExpr(clause.getDevNumExpr()));
+          values.push_back(emitOpenACCIntExpr(clause.getDevNumExpr()));
         for (const Expr *E : clause.getQueueIdExprs())
-          values.push_back(createIntExpr(E));
+          values.push_back(emitOpenACCIntExpr(E));
         operation.addWaitOperands(builder.getContext(), clause.hasDevNumExpr(),
                                   values, lastDeviceTypeValues);
       }
@@ -370,7 +549,7 @@ class OpenACCClauseCIREmitter final
   void VisitDefaultAsyncClause(const OpenACCDefaultAsyncClause &clause) {
     if constexpr (isOneOfTypes<OpTy, mlir::acc::SetOp>) {
       operation.getDefaultAsyncMutable().append(
-          createIntExpr(clause.getIntExpr()));
+          emitOpenACCIntExpr(clause.getIntExpr()));
     } else {
       llvm_unreachable("set, is only valid device_num constructs");
     }
@@ -460,7 +639,7 @@ class OpenACCClauseCIREmitter final
     if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
       if (clause.hasIntExpr())
         operation.addWorkerNumOperand(builder.getContext(),
-                                      createIntExpr(clause.getIntExpr()),
+                                      emitOpenACCIntExpr(clause.getIntExpr()),
                                       lastDeviceTypeValues);
       else
         operation.addEmptyWorker(builder.getContext(), lastDeviceTypeValues);
@@ -478,7 +657,7 @@ class OpenACCClauseCIREmitter final
     if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
       if (clause.hasIntExpr())
         operation.addVectorOperand(builder.getContext(),
-                                   createIntExpr(clause.getIntExpr()),
+                                   emitOpenACCIntExpr(clause.getIntExpr()),
                                    lastDeviceTypeValues);
       else
         operation.addEmptyVector(builder.getContext(), lastDeviceTypeValues);
@@ -514,7 +693,7 @@ class OpenACCClauseCIREmitter final
           } else if (isa<OpenACCAsteriskSizeExpr>(expr)) {
             values.push_back(createConstantInt(exprLoc, 64, -1));
           } else {
-            values.push_back(createIntExpr(expr));
+            values.push_back(emitOpenACCIntExpr(expr));
           }
         }
 
@@ -527,6 +706,20 @@ class OpenACCClauseCIREmitter final
       llvm_unreachable("Unknown construct kind in VisitGangClause");
     }
   }
+
+  void VisitCopyClause(const OpenACCCopyClause &clause) {
+    if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp, 
mlir::acc::SerialOp,
+                               mlir::acc::KernelsOp>) {
+      for (auto var : clause.getVarList())
+        addDataOperand<mlir::acc::CopyinOp, mlir::acc::CopyoutOp>(
+            var, mlir::acc::DataClause::acc_copy, /*structured=*/true,
+            /*implicit=*/false);
+    } else {
+      // TODO: When we've implemented this for everything, switch this to an
+      // unreachable. data, declare, combined constructs remain.
+      return clauseNotImplemented(clause);
+    }
+  }
 };
 
 template <typename OpTy>

diff  --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp 
b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
index fc76f57ce7c29..3c18f5d9e205c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
@@ -39,8 +39,7 @@ mlir::LogicalResult 
CIRGenFunction::emitOpenACCOpAssociatedStmt(
     // Sets insertion point before the 'op', since every new expression needs 
to
     // be before the operation.
     builder.setInsertionPoint(op);
-    makeClauseEmitter(op, *this, builder, dirKind, dirLoc)
-        .VisitClauseList(clauses);
+    makeClauseEmitter(op, *this, builder, dirKind, 
dirLoc).emitClauses(clauses);
   }
 
   {
@@ -115,7 +114,7 @@ mlir::LogicalResult 
CIRGenFunction::emitOpenACCOpCombinedConstruct(
       // We don't bother setting the insertion point, since the clause emitter
       // is going to have to do this correctly.
       makeClauseEmitter(inf, *this, builder, dirKind, dirLoc)
-          .VisitClauseList(clauses);
+          .emitClauses(clauses);
     }
 
     builder.create<TermOp>(end);
@@ -137,8 +136,7 @@ Op CIRGenFunction::emitOpenACCOp(
     // Sets insertion point before the 'op', since every new expression needs 
to
     // be before the operation.
     builder.setInsertionPoint(op);
-    makeClauseEmitter(op, *this, builder, dirKind, dirLoc)
-        .VisitClauseList(clauses);
+    makeClauseEmitter(op, *this, builder, dirKind, 
dirLoc).emitClauses(clauses);
   }
   return op;
 }

diff  --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp 
b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp
index 2b78bc1a6d4a5..8a868fdc96350 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp
@@ -96,7 +96,7 @@ CIRGenFunction::emitOpenACCLoopConstruct(const 
OpenACCLoopConstruct &s) {
     builder.setInsertionPoint(op);
     makeClauseEmitter(op, *this, builder, s.getDirectiveKind(),
                       s.getDirectiveLoc())
-        .VisitClauseList(s.clauses());
+        .emitClauses(s.clauses());
   }
 
   mlir::LogicalResult stmtRes = mlir::success();

diff  --git a/clang/test/CIR/CodeGenOpenACC/compute-copy.c 
b/clang/test/CIR/CodeGenOpenACC/compute-copy.c
new file mode 100644
index 0000000000000..a542409f07152
--- /dev/null
+++ b/clang/test/CIR/CodeGenOpenACC/compute-copy.c
@@ -0,0 +1,213 @@
+// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir 
-fclangir %s -o - | FileCheck %s
+
+int global;
+void acc_compute(int parmVar) {
+  // CHECK: cir.func @acc_compute(%[[ARG:.*]]: !s32i{{.*}}) {
+  // CHECK-NEXT: %[[PARM:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["parmVar", 
init]
+  int localVar1;
+  short localVar2;
+  float localVar3;
+  // CHECK-NEXT: %[[LOCAL1:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, 
["localVar1"]
+  // CHECK-NEXT: %[[LOCAL2:.*]] = cir.alloca !s16i, !cir.ptr<!s16i>, 
["localVar2"]
+  // CHECK-NEXT: %[[LOCAL3:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, 
["localVar3"] 
+  // CHECK-NEXT: %[[LOCALPTR:.*]] = cir.alloca !cir.ptr<!s16i>, 
!cir.ptr<!cir.ptr<!s16i>>, ["localPointer"]
+  // CHECK-NEXT: %[[LOCALARRAY:.*]] = cir.alloca !cir.array<!cir.float x 5>, 
!cir.ptr<!cir.array<!cir.float x 5>>, ["localArray"]
+  // CHECK-NEXT: cir.store %[[ARG]], %[[PARM]] : !s32i, !cir.ptr<!s32i>
+
+#pragma acc parallel copy(localVar1, parmVar) copy(localVar2) copy(localVar3, 
parmVar)
+  ;
+  // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : 
!cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copy>, 
name = "localVar1"} loc
+  // CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[PARM]] : 
!cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copy>, 
name = "parmVar"} loc
+  // CHECK-NEXT: %[[COPYIN3:.*]] = acc.copyin varPtr(%[[LOCAL2]] : 
!cir.ptr<!s16i>) -> !cir.ptr<!s16i> {dataClause = #acc<data_clause acc_copy>, 
name = "localVar2"} loc
+  // CHECK-NEXT: %[[COPYIN4:.*]] = acc.copyin varPtr(%[[LOCAL3]] : 
!cir.ptr<!cir.float>) -> !cir.ptr<!cir.float> {dataClause = #acc<data_clause 
acc_copy>, name = "localVar3"} loc
+  // CHECK-NEXT: %[[COPYIN5:.*]] = acc.copyin varPtr(%[[PARM]] : 
!cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copy>, 
name = "parmVar"} loc
+  // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]], %[[COPYIN2]], 
%[[COPYIN3]], %[[COPYIN4]], %[[COPYIN5]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, 
!cir.ptr<!s16i>, !cir.ptr<!cir.float>, !cir.ptr<!s32i>) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN5]] : !cir.ptr<!s32i>) to 
varPtr(%[[PARM]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, 
name = "parmVar"} loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN4]] : !cir.ptr<!cir.float>) to 
varPtr(%[[LOCAL3]] : !cir.ptr<!cir.float>) {dataClause = #acc<data_clause 
acc_copy>, name = "localVar3"} loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN3]] : !cir.ptr<!s16i>) to 
varPtr(%[[LOCAL2]] : !cir.ptr<!s16i>) {dataClause = #acc<data_clause acc_copy>, 
name = "localVar2"} loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN2]] : !cir.ptr<!s32i>) to 
varPtr(%[[PARM]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, 
name = "parmVar"} loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) to 
varPtr(%[[LOCAL1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, 
name = "localVar1"} loc
+
+#pragma acc serial copy(localVar1, parmVar) copy(localVar2) copy(localVar3, 
parmVar)
+  ;
+  // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : 
!cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copy>, 
name = "localVar1"} loc
+  // CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[PARM]] : 
!cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copy>, 
name = "parmVar"} loc
+  // CHECK-NEXT: %[[COPYIN3:.*]] = acc.copyin varPtr(%[[LOCAL2]] : 
!cir.ptr<!s16i>) -> !cir.ptr<!s16i> {dataClause = #acc<data_clause acc_copy>, 
name = "localVar2"} loc
+  // CHECK-NEXT: %[[COPYIN4:.*]] = acc.copyin varPtr(%[[LOCAL3]] : 
!cir.ptr<!cir.float>) -> !cir.ptr<!cir.float> {dataClause = #acc<data_clause 
acc_copy>, name = "localVar3"} loc
+  // CHECK-NEXT: %[[COPYIN5:.*]] = acc.copyin varPtr(%[[PARM]] : 
!cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copy>, 
name = "parmVar"} loc
+  // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]], %[[COPYIN2]], 
%[[COPYIN3]], %[[COPYIN4]], %[[COPYIN5]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, 
!cir.ptr<!s16i>, !cir.ptr<!cir.float>, !cir.ptr<!s32i>) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN5]] : !cir.ptr<!s32i>) to 
varPtr(%[[PARM]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, 
name = "parmVar"} loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN4]] : !cir.ptr<!cir.float>) to 
varPtr(%[[LOCAL3]] : !cir.ptr<!cir.float>) {dataClause = #acc<data_clause 
acc_copy>, name = "localVar3"} loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN3]] : !cir.ptr<!s16i>) to 
varPtr(%[[LOCAL2]] : !cir.ptr<!s16i>) {dataClause = #acc<data_clause acc_copy>, 
name = "localVar2"} loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN2]] : !cir.ptr<!s32i>) to 
varPtr(%[[PARM]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, 
name = "parmVar"} loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) to 
varPtr(%[[LOCAL1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, 
name = "localVar1"} loc
+
+#pragma acc kernels copy(localVar1, parmVar) copy(localVar2) copy(localVar3, 
parmVar)
+  ;
+  // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : 
!cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copy>, 
name = "localVar1"} loc
+  // CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[PARM]] : 
!cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copy>, 
name = "parmVar"} loc
+  // CHECK-NEXT: %[[COPYIN3:.*]] = acc.copyin varPtr(%[[LOCAL2]] : 
!cir.ptr<!s16i>) -> !cir.ptr<!s16i> {dataClause = #acc<data_clause acc_copy>, 
name = "localVar2"} loc
+  // CHECK-NEXT: %[[COPYIN4:.*]] = acc.copyin varPtr(%[[LOCAL3]] : 
!cir.ptr<!cir.float>) -> !cir.ptr<!cir.float> {dataClause = #acc<data_clause 
acc_copy>, name = "localVar3"} loc
+  // CHECK-NEXT: %[[COPYIN5:.*]] = acc.copyin varPtr(%[[PARM]] : 
!cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copy>, 
name = "parmVar"} loc
+  // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]], %[[COPYIN2]], 
%[[COPYIN3]], %[[COPYIN4]], %[[COPYIN5]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, 
!cir.ptr<!s16i>, !cir.ptr<!cir.float>, !cir.ptr<!s32i>) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN5]] : !cir.ptr<!s32i>) to 
varPtr(%[[PARM]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, 
name = "parmVar"} loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN4]] : !cir.ptr<!cir.float>) to 
varPtr(%[[LOCAL3]] : !cir.ptr<!cir.float>) {dataClause = #acc<data_clause 
acc_copy>, name = "localVar3"} loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN3]] : !cir.ptr<!s16i>) to 
varPtr(%[[LOCAL2]] : !cir.ptr<!s16i>) {dataClause = #acc<data_clause acc_copy>, 
name = "localVar2"} loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN2]] : !cir.ptr<!s32i>) to 
varPtr(%[[PARM]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, 
name = "parmVar"} loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) to 
varPtr(%[[LOCAL1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, 
name = "localVar1"} loc
+
+  // TODO: OpenACC: Represent alwaysin/alwaysout/always correctly. For now,
+  // these do nothing to the IR.
+#pragma acc parallel copy(alwaysin: localVar1) copy(alwaysout: localVar2) 
copy(always: localVar3)
+  ;
+  // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : 
!cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copy>, 
name = "localVar1"} loc
+  // CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[LOCAL2]] : 
!cir.ptr<!s16i>) -> !cir.ptr<!s16i> {dataClause = #acc<data_clause acc_copy>, 
name = "localVar2"} loc
+  // CHECK-NEXT: %[[COPYIN3:.*]] = acc.copyin varPtr(%[[LOCAL3]] : 
!cir.ptr<!cir.float>) -> !cir.ptr<!cir.float> {dataClause = #acc<data_clause 
acc_copy>, name = "localVar3"} loc
+  // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]], %[[COPYIN2]], 
%[[COPYIN3]] : !cir.ptr<!s32i>, !cir.ptr<!s16i>, !cir.ptr<!cir.float>) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN3]] : !cir.ptr<!cir.float>) to 
varPtr(%[[LOCAL3]] : !cir.ptr<!cir.float>) {dataClause = #acc<data_clause 
acc_copy>, name = "localVar3"} loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN2]] : !cir.ptr<!s16i>) to 
varPtr(%[[LOCAL2]] : !cir.ptr<!s16i>) {dataClause = #acc<data_clause acc_copy>, 
name = "localVar2"} loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) to 
varPtr(%[[LOCAL1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, 
name = "localVar1"} loc
+
+#pragma acc serial copy(always, alwaysin, alwaysout: localVar1)
+  ;
+  // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : 
!cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copy>, 
name = "localVar1"} loc
+  // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!s32i>) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) to 
varPtr(%[[LOCAL1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, 
name = "localVar1"} loc
+
+  short *localPointer;
+  float localArray[5];
+
+#pragma acc kernels copy(localArray, localPointer, global)
+  ;
+  // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : 
!cir.ptr<!cir.array<!cir.float x 5>>) -> !cir.ptr<!cir.array<!cir.float x 5>> 
{dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+  // CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : 
!cir.ptr<!cir.ptr<!s16i>>) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = 
#acc<data_clause acc_copy>, name = "localPointer"} loc
+  // CHECK-NEXT: %[[GLOBAL_REF:.*]] = cir.get_global @global : !cir.ptr<!s32i>
+  // CHECK-NEXT: %[[COPYIN3:.*]] = acc.copyin varPtr(%[[GLOBAL_REF]] : 
!cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copy>, 
name = "global"} loc
+  // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]], %[[COPYIN2]], 
%[[COPYIN3]] : !cir.ptr<!cir.array<!cir.float x 5>>, !cir.ptr<!cir.ptr<!s16i>>, 
!cir.ptr<!s32i>) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN3]] : !cir.ptr<!s32i>) to 
varPtr(%[[GLOBAL_REF]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause 
acc_copy>, name = "global"} loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN2]] : !cir.ptr<!cir.ptr<!s16i>>) 
to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = 
#acc<data_clause acc_copy>, name = "localPointer"} loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : 
!cir.ptr<!cir.array<!cir.float x 5>>) to varPtr(%[[LOCALARRAY]] : 
!cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, 
name = "localArray"} loc
+
+#pragma acc parallel copy(localVar1) async
+  ;
+  // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : 
!cir.ptr<!s32i>) async -> !cir.ptr<!s32i> {dataClause = #acc<data_clause 
acc_copy>, name = "localVar1"} loc
+  // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!s32i>) 
async {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) async to 
varPtr(%[[LOCAL1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, 
name = "localVar1"} loc
+
+#pragma acc serial async copy(localVar1, localVar2)
+  ;
+  // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : 
!cir.ptr<!s32i>) async -> !cir.ptr<!s32i> {dataClause = #acc<data_clause 
acc_copy>, name = "localVar1"} loc
+  // CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[LOCAL2]] : 
!cir.ptr<!s16i>) async -> !cir.ptr<!s16i> {dataClause = #acc<data_clause 
acc_copy>, name = "localVar2"} loc
+  // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]], %[[COPYIN2]] : 
!cir.ptr<!s32i>, !cir.ptr<!s16i>) async {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN2]] : !cir.ptr<!s16i>) async to 
varPtr(%[[LOCAL2]] : !cir.ptr<!s16i>) {dataClause = #acc<data_clause acc_copy>, 
name = "localVar2"} loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) async to 
varPtr(%[[LOCAL1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, 
name = "localVar1"} loc
+
+#pragma acc kernels copy(localVar1, localVar2) async(1)
+  ;
+  // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE]] : !s32i to si32
+  // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : 
!cir.ptr<!s32i>) async(%[[ONE_CAST]] : si32) -> !cir.ptr<!s32i> {dataClause = 
#acc<data_clause acc_copy>, name = "localVar1"} loc
+  // CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[LOCAL2]] : 
!cir.ptr<!s16i>) async(%[[ONE_CAST]] : si32) -> !cir.ptr<!s16i> {dataClause = 
#acc<data_clause acc_copy>, name = "localVar2"} loc
+  // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]], %[[COPYIN2]] : 
!cir.ptr<!s32i>, !cir.ptr<!s16i>) async(%[[ONE_CAST]] : si32) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN2]] : !cir.ptr<!s16i>) 
async(%[[ONE_CAST]] : si32) to varPtr(%[[LOCAL2]] : !cir.ptr<!s16i>) 
{dataClause = #acc<data_clause acc_copy>, name = "localVar2"} loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) 
async(%[[ONE_CAST]] : si32) to varPtr(%[[LOCAL1]] : !cir.ptr<!s32i>) 
{dataClause = #acc<data_clause acc_copy>, name = "localVar1"} loc
+
+#pragma acc parallel async(1) copy(localVar1)
+  ;
+  // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE]] : !s32i to si32
+  // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : 
!cir.ptr<!s32i>) async(%[[ONE_CAST]] : si32) -> !cir.ptr<!s32i> {dataClause = 
#acc<data_clause acc_copy>, name = "localVar1"} loc
+  // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!s32i>) 
async(%[[ONE_CAST]] : si32) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) 
async(%[[ONE_CAST]] : si32) to varPtr(%[[LOCAL1]] : !cir.ptr<!s32i>) 
{dataClause = #acc<data_clause acc_copy>, name = "localVar1"} loc
+
+#pragma acc serial copy(localVar1) device_type(nvidia, radeon) async
+  ;
+  // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : 
!cir.ptr<!s32i>) async([#acc.device_type<nvidia>, #acc.device_type<radeon>]) -> 
!cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copy>, name = "localVar1"} 
loc
+  // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!s32i>) 
async([#acc.device_type<nvidia>, #acc.device_type<radeon>]) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>)  
async([#acc.device_type<nvidia>, #acc.device_type<radeon>]) to 
varPtr(%[[LOCAL1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, 
name = "localVar1"} loc
+
+#pragma acc kernels copy(localVar1) device_type(nvidia, radeon) async(1)
+  ;
+  // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE]] : !s32i to si32
+  // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : 
!cir.ptr<!s32i>) async(%[[ONE_CAST]] : si32 [#acc.device_type<nvidia>], 
%[[ONE_CAST]] : si32 [#acc.device_type<radeon>]) -> !cir.ptr<!s32i> {dataClause 
= #acc<data_clause acc_copy>, name = "localVar1"} loc
+  // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr<!s32i>) 
async(%[[ONE_CAST]] : si32 [#acc.device_type<nvidia>], %[[ONE_CAST]] : si32 
[#acc.device_type<radeon>]) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>)  
async(%[[ONE_CAST]] : si32 [#acc.device_type<nvidia>], %[[ONE_CAST]] : si32 
[#acc.device_type<radeon>]) to varPtr(%[[LOCAL1]] : !cir.ptr<!s32i>) 
{dataClause = #acc<data_clause acc_copy>, name = "localVar1"} loc
+
+#pragma acc parallel copy(localVar1) async device_type(nvidia, radeon) async
+  ;
+  // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : 
!cir.ptr<!s32i>) async([#acc.device_type<none>, #acc.device_type<nvidia>, 
#acc.device_type<radeon>]) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause 
acc_copy>, name = "localVar1"} loc
+  // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!s32i>) 
async([#acc.device_type<none>, #acc.device_type<nvidia>, 
#acc.device_type<radeon>]) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>)  
async([#acc.device_type<none>, #acc.device_type<nvidia>, 
#acc.device_type<radeon>]) to varPtr(%[[LOCAL1]] : !cir.ptr<!s32i>) {dataClause 
= #acc<data_clause acc_copy>, name = "localVar1"} loc
+
+#pragma acc serial copy(localVar1) async(0) device_type(nvidia, radeon) 
async(1)
+  ;
+  // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+  // CHECK-NEXT: %[[ZERO_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ZERO]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE]] : !s32i to si32
+  // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : 
!cir.ptr<!s32i>) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 
[#acc.device_type<nvidia>], %[[ONE_CAST]] : si32 [#acc.device_type<radeon>]) -> 
!cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copy>, name = "localVar1"} 
loc
+  // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!s32i>) 
async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type<nvidia>], 
%[[ONE_CAST]] : si32 [#acc.device_type<radeon>]) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>)  
async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type<nvidia>], 
%[[ONE_CAST]] : si32 [#acc.device_type<radeon>]) to varPtr(%[[LOCAL1]] : 
!cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, name = "localVar1"} 
loc
+
+#pragma acc kernels copy(localVar1) async device_type(nvidia, radeon) async(1)
+  ;
+  // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE]] : !s32i to si32
+  // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : 
!cir.ptr<!s32i>) async([#acc.device_type<none>], %[[ONE_CAST]] : si32 
[#acc.device_type<nvidia>], %[[ONE_CAST]] : si32 [#acc.device_type<radeon>]) -> 
!cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copy>, name = "localVar1"} 
loc
+  // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr<!s32i>) 
async([#acc.device_type<none>], %[[ONE_CAST]] : si32 
[#acc.device_type<nvidia>], %[[ONE_CAST]] : si32 [#acc.device_type<radeon>]) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>)  
async([#acc.device_type<none>], %[[ONE_CAST]] : si32 
[#acc.device_type<nvidia>], %[[ONE_CAST]] : si32 [#acc.device_type<radeon>]) to 
varPtr(%[[LOCAL1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, 
name = "localVar1"} loc
+
+#pragma acc parallel copy(localVar1) async(1) device_type(nvidia, radeon) async
+  ;
+  // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE]] : !s32i to si32
+  // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : 
!cir.ptr<!s32i>) async([#acc.device_type<nvidia>, #acc.device_type<radeon>], 
%[[ONE_CAST]] : si32) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause 
acc_copy>, name = "localVar1"} loc
+
+  // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!s32i>) 
async([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[ONE_CAST]] : 
si32) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) 
async([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[ONE_CAST]] : 
si32) to varPtr(%[[LOCAL1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause 
acc_copy>, name = "localVar1"} loc
+
+#pragma acc serial copy(localVar1) async(0) device_type(nvidia, radeon) 
async(1)
+  ;
+  // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+  // CHECK-NEXT: %[[ZERO_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ZERO]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE]] : !s32i to si32
+  // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : 
!cir.ptr<!s32i>) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 
[#acc.device_type<nvidia>], %[[ONE_CAST]] : si32 [#acc.device_type<radeon>]) -> 
!cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copy>, name = "localVar1"} 
loc
+  // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!s32i>) 
async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type<nvidia>], 
%[[ONE_CAST]] : si32 [#acc.device_type<radeon>]) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+  // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>)  
async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type<nvidia>], 
%[[ONE_CAST]] : si32 [#acc.device_type<radeon>]) to varPtr(%[[LOCAL1]] : 
!cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, name = "localVar1"} 
loc
+}


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to