Author: Erich Keane
Date: 2025-04-25T13:15:03-07:00
New Revision: 881f6de812632bdbf835aaf8d6a792ff5d93764c

URL: 
https://github.com/llvm/llvm-project/commit/881f6de812632bdbf835aaf8d6a792ff5d93764c
DIFF: 
https://github.com/llvm/llvm-project/commit/881f6de812632bdbf835aaf8d6a792ff5d93764c.diff

LOG: [OpenACC][CIR] Lower 'wait' clause for compute/data constructs (#137359)

The 'wait' clause is a bit complicated, and is laid out awkwardly in the
IR addative functions, so this patch has to do a little bit of work to
do that (mostly the 'devnum' work).

Otherwise, this is very similar to how num_gangs works, with the
additional complexity of the 'empty' wait being represented differently
as well, but this is similar to how 'async' and a few others work as
well.

Added: 
    

Modified: 
    clang/include/clang/AST/OpenACCClause.h
    clang/lib/AST/OpenACCClause.cpp
    clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
    clang/lib/Sema/TreeTransform.h
    clang/test/CIR/CodeGenOpenACC/data.c
    clang/test/CIR/CodeGenOpenACC/kernels.c
    clang/test/CIR/CodeGenOpenACC/parallel.c
    clang/test/CIR/CodeGenOpenACC/serial.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/AST/OpenACCClause.h 
b/clang/include/clang/AST/OpenACCClause.h
index f18a6cf62f2c5..449bcb71f9f32 100644
--- a/clang/include/clang/AST/OpenACCClause.h
+++ b/clang/include/clang/AST/OpenACCClause.h
@@ -565,6 +565,9 @@ class OpenACCWaitClause final
   llvm::ArrayRef<Expr *> getQueueIdExprs() const {
     return OpenACCClauseWithExprs::getExprs().drop_front();
   }
+  // If this is a plain `wait` (no parens) this returns 'false'. Else 
Sema/Parse
+  // ensures we have at least one QueueId expression.
+  bool hasExprs() const { return getLParenLoc().isValid(); }
 };
 
 class OpenACCNumGangsClause final

diff  --git a/clang/lib/AST/OpenACCClause.cpp b/clang/lib/AST/OpenACCClause.cpp
index 01e183051934d..9d3645e6da1ca 100644
--- a/clang/lib/AST/OpenACCClause.cpp
+++ b/clang/lib/AST/OpenACCClause.cpp
@@ -865,7 +865,7 @@ void OpenACCClausePrinter::VisitReductionClause(
 
 void OpenACCClausePrinter::VisitWaitClause(const OpenACCWaitClause &C) {
   OS << "wait";
-  if (!C.getLParenLoc().isInvalid()) {
+  if (C.hasExprs()) {
     OS << "(";
     if (C.hasDevNumExpr()) {
       OS << "devnum: ";

diff  --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp 
b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
index 5720fec556ff2..6e65f94c78bed 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
@@ -386,6 +386,53 @@ class OpenACCClauseCIREmitter final
     }
   }
 
+  void VisitWaitClause(const OpenACCWaitClause &clause) {
+    if constexpr (isOneOfTypes<OpTy, ParallelOp, SerialOp, KernelsOp, DataOp>) 
{
+      if (!clause.hasExprs()) {
+        operation.setWaitOnlyAttr(
+            handleDeviceTypeAffectedClause(operation.getWaitOnlyAttr()));
+      } else {
+        llvm::SmallVector<mlir::Value> values;
+
+        if (clause.hasDevNumExpr())
+          values.push_back(createIntExpr(clause.getDevNumExpr()));
+        for (const Expr *E : clause.getQueueIdExprs())
+          values.push_back(createIntExpr(E));
+
+        llvm::SmallVector<int32_t> segments;
+        if (operation.getWaitOperandsSegments())
+          llvm::copy(*operation.getWaitOperandsSegments(),
+                     std::back_inserter(segments));
+
+        unsigned beforeSegmentSize = segments.size();
+
+        mlir::MutableOperandRange range = operation.getWaitOperandsMutable();
+        operation.setWaitOperandsDeviceTypeAttr(handleDeviceTypeAffectedClause(
+            operation.getWaitOperandsDeviceTypeAttr(), values, range,
+            segments));
+        operation.setWaitOperandsSegments(segments);
+
+        // In addition to having to set the 'segments', wait also has a list of
+        // bool attributes whether it is annotated with 'devnum'.  We can use
+        // our knowledge of how much the 'segments' array grew to determine how
+        // many we need to add.
+        llvm::SmallVector<bool> hasDevNums;
+        if (operation.getHasWaitDevnumAttr())
+          for (mlir::Attribute A : operation.getHasWaitDevnumAttr())
+            hasDevNums.push_back(cast<mlir::BoolAttr>(A).getValue());
+
+        hasDevNums.insert(hasDevNums.end(), segments.size() - 
beforeSegmentSize,
+                          clause.hasDevNumExpr());
+
+        operation.setHasWaitDevnumAttr(builder.getBoolArrayAttr(hasDevNums));
+      }
+    } else {
+      // TODO: When we've implemented this for everything, switch this to an
+      // unreachable. Enter data, exit data, update, Combined constructs 
remain.
+      return clauseNotImplemented(clause);
+    }
+  }
+
   void VisitDefaultAsyncClause(const OpenACCDefaultAsyncClause &clause) {
     if constexpr (isOneOfTypes<OpTy, SetOp>) {
       operation.getDefaultAsyncMutable().append(

diff  --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 071389543ab68..94d2b6b6cc808 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -12228,7 +12228,7 @@ void OpenACCClauseTransform<Derived>::VisitVectorClause(
 template <typename Derived>
 void OpenACCClauseTransform<Derived>::VisitWaitClause(
     const OpenACCWaitClause &C) {
-  if (!C.getLParenLoc().isInvalid()) {
+  if (C.hasExprs()) {
     Expr *DevNumExpr = nullptr;
     llvm::SmallVector<Expr *> InstantiatedQueueIdExprs;
 

diff  --git a/clang/test/CIR/CodeGenOpenACC/data.c 
b/clang/test/CIR/CodeGenOpenACC/data.c
index 29fd465708054..7887df6503f08 100644
--- a/clang/test/CIR/CodeGenOpenACC/data.c
+++ b/clang/test/CIR/CodeGenOpenACC/data.c
@@ -109,5 +109,117 @@ void acc_data(int cond) {
   // CHECK-NEXT: acc.terminator
   // CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue none>}
 
+#pragma acc data default(none) wait
+  {}
+  // CHECK-NEXT: acc.data wait {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait device_type(nvidia) wait
+  {}
+  // CHECK-NEXT: acc.data wait([#acc.device_type<none>, 
#acc.device_type<nvidia>]) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait(1) device_type(nvidia) wait
+  {}
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.data wait([#acc.device_type<nvidia>], {%[[ONE_CAST]] : 
si32}) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait device_type(nvidia) wait(1)
+  {}
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.data wait([#acc.device_type<none>], {%[[ONE_CAST]] : 
si32} [#acc.device_type<nvidia>]) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait(1) device_type(nvidia) wait(1)
+  {}
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL2:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL2]] : !s32i to si32
+  // CHECK-NEXT: acc.data wait({%[[ONE_CAST]] : si32}, {%[[ONE_CAST2]] : si32} 
[#acc.device_type<nvidia>]) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait(devnum: cond : 1)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.data wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : 
si32}) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait(devnum: cond : 1) device_type(nvidia) 
wait(devnum: cond : 1)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.data wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : 
si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32} 
[#acc.device_type<nvidia>]) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait(devnum: cond : 1, 2)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[TWO_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.data wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : 
si32, %[[TWO_CAST]] : si32}) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait(devnum: cond : 1, 2) device_type(nvidia, 
radeon) wait(devnum: cond : 1, 2)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[TWO_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK-NEXT: %[[TWO_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[TWO_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.data wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : 
si32, %[[TWO_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : 
si32, %[[TWO_CAST2]] : si32} [#acc.device_type<nvidia>], {devnum: 
%[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} 
[#acc.device_type<radeon>]) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait(cond,  1)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.data wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait(queues: cond,  1) device_type(radeon)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.data wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
   // CHECK-NEXT: cir.return
 }

diff  --git a/clang/test/CIR/CodeGenOpenACC/kernels.c 
b/clang/test/CIR/CodeGenOpenACC/kernels.c
index a57a0ccb557dc..7175e342c39bd 100644
--- a/clang/test/CIR/CodeGenOpenACC/kernels.c
+++ b/clang/test/CIR/CodeGenOpenACC/kernels.c
@@ -302,5 +302,117 @@ void acc_kernels(int cond) {
   // CHECK-NEXT: acc.terminator
   // CHECK-NEXT: } loc
 
+#pragma acc kernels wait
+  {}
+  // CHECK-NEXT: acc.kernels wait {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+
+#pragma acc kernels wait device_type(nvidia) wait
+  {}
+  // CHECK-NEXT: acc.kernels wait([#acc.device_type<none>, 
#acc.device_type<nvidia>]) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+
+#pragma acc kernels wait(1) device_type(nvidia) wait
+  {}
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.kernels wait([#acc.device_type<nvidia>], {%[[ONE_CAST]] : 
si32}) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+
+#pragma acc kernels wait device_type(nvidia) wait(1)
+  {}
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.kernels wait([#acc.device_type<none>], {%[[ONE_CAST]] : 
si32} [#acc.device_type<nvidia>]) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+
+#pragma acc kernels wait(1) device_type(nvidia) wait(1)
+  {}
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL2:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL2]] : !s32i to si32
+  // CHECK-NEXT: acc.kernels wait({%[[ONE_CAST]] : si32}, {%[[ONE_CAST2]] : 
si32} [#acc.device_type<nvidia>]) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+
+#pragma acc kernels wait(devnum: cond : 1)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.kernels wait({devnum: %[[CONV_CAST]] : si32, 
%[[ONE_CAST]] : si32}) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+
+#pragma acc kernels wait(devnum: cond : 1) device_type(nvidia) wait(devnum: 
cond : 1)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.kernels wait({devnum: %[[CONV_CAST]] : si32, 
%[[ONE_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32} 
[#acc.device_type<nvidia>]) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+
+#pragma acc kernels wait(devnum: cond : 1, 2)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[TWO_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.kernels wait({devnum: %[[CONV_CAST]] : si32, 
%[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+
+#pragma acc kernels wait(devnum: cond : 1, 2) device_type(nvidia, radeon) 
wait(devnum: cond : 1, 2)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[TWO_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK-NEXT: %[[TWO_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[TWO_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.kernels wait({devnum: %[[CONV_CAST]] : si32, 
%[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, 
%[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type<nvidia>], 
{devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} 
[#acc.device_type<radeon>]) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+
+#pragma acc kernels wait(cond,  1)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.kernels wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : 
si32}) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+
+#pragma acc kernels wait(queues: cond,  1) device_type(radeon)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.kernels wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : 
si32}) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+
   // CHECK-NEXT: cir.return
 }

diff  --git a/clang/test/CIR/CodeGenOpenACC/parallel.c 
b/clang/test/CIR/CodeGenOpenACC/parallel.c
index 89ef6069d320e..c9208566bf2b9 100644
--- a/clang/test/CIR/CodeGenOpenACC/parallel.c
+++ b/clang/test/CIR/CodeGenOpenACC/parallel.c
@@ -329,5 +329,117 @@ void acc_parallel(int cond) {
   // CHECK-NEXT: acc.yield
   // CHECK-NEXT: } loc
 
+#pragma acc parallel wait
+  {}
+  // CHECK-NEXT: acc.parallel wait {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc parallel wait device_type(nvidia) wait
+  {}
+  // CHECK-NEXT: acc.parallel wait([#acc.device_type<none>, 
#acc.device_type<nvidia>]) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc parallel wait(1) device_type(nvidia) wait
+  {}
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.parallel wait([#acc.device_type<nvidia>], {%[[ONE_CAST]] 
: si32}) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc parallel wait device_type(nvidia) wait(1)
+  {}
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.parallel wait([#acc.device_type<none>], {%[[ONE_CAST]] : 
si32} [#acc.device_type<nvidia>]) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc parallel wait(1) device_type(nvidia) wait(1)
+  {}
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL2:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL2]] : !s32i to si32
+  // CHECK-NEXT: acc.parallel wait({%[[ONE_CAST]] : si32}, {%[[ONE_CAST2]] : 
si32} [#acc.device_type<nvidia>]) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc parallel wait(devnum: cond : 1)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.parallel wait({devnum: %[[CONV_CAST]] : si32, 
%[[ONE_CAST]] : si32}) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc parallel wait(devnum: cond : 1) device_type(nvidia) wait(devnum: 
cond : 1)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.parallel wait({devnum: %[[CONV_CAST]] : si32, 
%[[ONE_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32} 
[#acc.device_type<nvidia>]) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc parallel wait(devnum: cond : 1, 2)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[TWO_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.parallel wait({devnum: %[[CONV_CAST]] : si32, 
%[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc parallel wait(devnum: cond : 1, 2) device_type(nvidia, radeon) 
wait(devnum: cond : 1, 2)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[TWO_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK-NEXT: %[[TWO_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[TWO_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.parallel wait({devnum: %[[CONV_CAST]] : si32, 
%[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, 
%[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type<nvidia>], 
{devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} 
[#acc.device_type<radeon>]) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc parallel wait(cond,  1)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.parallel wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : 
si32}) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc parallel wait(queues: cond,  1) device_type(radeon)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.parallel wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : 
si32}) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
   // CHECK-NEXT: cir.return
 }

diff  --git a/clang/test/CIR/CodeGenOpenACC/serial.c 
b/clang/test/CIR/CodeGenOpenACC/serial.c
index 094958f0e3b23..88a49a95f87d7 100644
--- a/clang/test/CIR/CodeGenOpenACC/serial.c
+++ b/clang/test/CIR/CodeGenOpenACC/serial.c
@@ -152,5 +152,117 @@ void acc_serial(int cond) {
   // CHECK-NEXT: acc.yield
   // CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<nvidia>, 
#acc.device_type<radeon>]}
 
+#pragma acc serial wait
+  {}
+  // CHECK-NEXT: acc.serial wait {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc serial wait device_type(nvidia) wait
+  {}
+  // CHECK-NEXT: acc.serial wait([#acc.device_type<none>, 
#acc.device_type<nvidia>]) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc serial wait(1) device_type(nvidia) wait
+  {}
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.serial wait([#acc.device_type<nvidia>], {%[[ONE_CAST]] : 
si32}) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc serial wait device_type(nvidia) wait(1)
+  {}
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.serial wait([#acc.device_type<none>], {%[[ONE_CAST]] : 
si32} [#acc.device_type<nvidia>]) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc serial wait(1) device_type(nvidia) wait(1)
+  {}
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL2:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL2]] : !s32i to si32
+  // CHECK-NEXT: acc.serial wait({%[[ONE_CAST]] : si32}, {%[[ONE_CAST2]] : 
si32} [#acc.device_type<nvidia>]) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc serial wait(devnum: cond : 1)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.serial wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] 
: si32}) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc serial wait(devnum: cond : 1) device_type(nvidia) wait(devnum: 
cond : 1)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.serial wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] 
: si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32} 
[#acc.device_type<nvidia>]) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc serial wait(devnum: cond : 1, 2)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[TWO_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.serial wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] 
: si32, %[[TWO_CAST]] : si32}) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc serial wait(devnum: cond : 1, 2) device_type(nvidia, radeon) 
wait(devnum: cond : 1, 2)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[TWO_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK-NEXT: %[[TWO_CAST2:.*]] = builtin.unrealized_conversion_cast 
%[[TWO_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.serial wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] 
: si32, %[[TWO_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] 
: si32, %[[TWO_CAST2]] : si32} [#acc.device_type<nvidia>], {devnum: 
%[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} 
[#acc.device_type<radeon>]) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc serial wait(cond,  1)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.serial wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : 
si32}) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc serial wait(queues: cond,  1) device_type(radeon)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, 
!s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[ONE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.serial wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : 
si32}) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
   // CHECK-NEXT: cir.return
 }


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to