https://github.com/erichkeane created https://github.com/llvm/llvm-project/pull/140971
The array indexes(and sections) are represented by the acc.bounds operation, which this ensures we fill in properly. The lowerbound is required, so we always get that. The upperbound or extent is required. We typically do extent, since that is the 'length' as specified by ACC, but in cases where we have implicit length, we use the extent instead. It isn't clear when 'stride' should be anything besides 1, though by my reading, since we have full-types in the emitted code, we should never have it be anything but 1. This patch enables these for copy on compute and combined constructs, and makes sure to test everything I could think of for combinations/permutations. >From ec6359330368ed72d6e4354afdcf6d0a32f9a2f9 Mon Sep 17 00:00:00 2001 From: erichkeane <eke...@nvidia.com> Date: Tue, 20 May 2025 08:55:39 -0700 Subject: [PATCH] [OpenACC][CIR] Add lowering for 'copy' array indexes The array indexes(and sections) are represented by the acc.bounds operation, which this ensures we fill in properly. The lowerbound is required, so we always get that. The upperbound or extent is required. We typically do extent, since that is the 'length' as specified by ACC, but in cases where we have implicit length, we use the extent instead. It isn't clear when 'stride' should be anything besides 1, though by my reading, since we have full-types in the emitted code, we should never have it be anything but 1. This patch enables these for copy on compute and combined constructs, and makes sure to test everything I could think of for combinations/permutations. --- clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp | 88 ++- clang/test/CIR/CodeGenOpenACC/combined-copy.c | 501 ++++++++++++++++++ clang/test/CIR/CodeGenOpenACC/compute-copy.c | 425 +++++++++++++++ 3 files changed, 1001 insertions(+), 13 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp index 91aa358638d5d..9e45ad8519e11 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp @@ -121,6 +121,11 @@ class OpenACCClauseCIREmitter final return constOp.getResult(); } + mlir::Value createConstantInt(SourceLocation loc, unsigned width, + int64_t value) { + return createConstantInt(cgf.cgm.getLoc(loc), width, value); + } + mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) { // '*' case leaves no identifier-info, just a nullptr. if (!ii) @@ -184,37 +189,94 @@ class OpenACCClauseCIREmitter final mlir::Location beginLoc; mlir::Value varValue; llvm::StringRef name; + llvm::SmallVector<mlir::Value> bounds; }; + mlir::Value createBound(mlir::Location boundLoc, mlir::Value lowerBound, + mlir::Value upperBound, mlir::Value extent) { + // Arrays always have a start-idx of 0. + mlir::Value startIdx = createConstantInt(boundLoc, 64, 0); + // TODO: OpenACC: It isn't clear that stride would ever be anything other + // than '1'? We emit the type of the reference 'correctly' as far as I + // know, so it should just be 1 element each time. We could perhaps use + // the 'inBytes' variant here, but it isn't clear what value that gets us. + // We might need to revisit this once we try to opt this and see what is + // going to happen. + mlir::Value stride = createConstantInt(boundLoc, 64, 1); + + auto bound = builder.create<mlir::acc::DataBoundsOp>(boundLoc, lowerBound, + upperBound); + bound.getStartIdxMutable().assign(startIdx); + if (extent) + bound.getExtentMutable().assign(extent); + bound.getStrideMutable().assign(stride); + + return bound; + } + // A helper function that gets the information from an operand to a data // clause, so that it can be used to emit the data operations. - inline DataOperandInfo getDataOperandInfo(OpenACCDirectiveKind dk, - const Expr *e) { + DataOperandInfo getDataOperandInfo(OpenACCDirectiveKind dk, const Expr *e) { // TODO: OpenACC: Cache was different enough as to need a separate // `ActOnCacheVar`, so we are going to need to do some investigations here // when it comes to implement this for cache. if (dk == OpenACCDirectiveKind::Cache) { cgf.cgm.errorNYI(e->getSourceRange(), "OpenACC data operand for 'cache' directive"); - return {cgf.cgm.getLoc(e->getBeginLoc()), {}, {}}; + return {cgf.cgm.getLoc(e->getBeginLoc()), {}, {}, {}}; } const Expr *curVarExpr = e->IgnoreParenImpCasts(); mlir::Location exprLoc = cgf.cgm.getLoc(curVarExpr->getBeginLoc()); + llvm::SmallVector<mlir::Value> bounds; + + // Assemble the list of bounds. + while (isa<ArraySectionExpr, ArraySubscriptExpr>(curVarExpr)) { + mlir::Location boundLoc = cgf.cgm.getLoc(curVarExpr->getBeginLoc()); + mlir::Value lowerBound; + mlir::Value upperBound; + mlir::Value extent; + + if (const auto *section = dyn_cast<ArraySectionExpr>(curVarExpr)) { + if (const Expr *lb = section->getLowerBound()) + lowerBound = emitIntExpr(lb); + else + lowerBound = createConstantInt(boundLoc, 64, 0); + + if (const Expr *len = section->getLength()) { + extent = emitIntExpr(len); + } else { + QualType baseTy = ArraySectionExpr::getBaseOriginalType( + section->getBase()->IgnoreParenImpCasts()); + // We know this is the case as implicit lengths are only allowed for + // array types with a constant size, or a dependent size. AND since + // we are codegen we know we're not dependent. + auto *arrayTy = cgf.getContext().getAsConstantArrayType(baseTy); + // Rather than trying to calculate the extent based on the + // lower-bound, we can just emit this as an upper bound. + upperBound = + createConstantInt(boundLoc, 64, arrayTy->getLimitedSize() - 1); + } - // TODO: OpenACC: Assemble the list of bounds. - if (isa<ArraySectionExpr, ArraySubscriptExpr>(curVarExpr)) { - cgf.cgm.errorNYI(curVarExpr->getSourceRange(), - "OpenACC data clause array subscript/section"); - return {exprLoc, {}, {}}; + curVarExpr = section->getBase()->IgnoreParenImpCasts(); + } else { + const auto *subscript = dyn_cast<ArraySubscriptExpr>(curVarExpr); + + lowerBound = emitIntExpr(subscript->getIdx()); + // Length of an array index is always 1. + extent = createConstantInt(boundLoc, 64, 1); + curVarExpr = subscript->getBase()->IgnoreParenImpCasts(); + } + + bounds.push_back(createBound(boundLoc, lowerBound, upperBound, extent)); } // TODO: OpenACC: if this is a member expr, emit the VarPtrPtr correctly. if (isa<MemberExpr>(curVarExpr)) { cgf.cgm.errorNYI(curVarExpr->getSourceRange(), "OpenACC Data clause member expr"); - return {exprLoc, {}, {}}; + return {exprLoc, {}, {}, std::move(bounds)}; } // Sema has made sure that only 4 types of things can get here, array @@ -223,14 +285,14 @@ class OpenACCClauseCIREmitter final // right. const auto *dre = cast<DeclRefExpr>(curVarExpr); const auto *vd = cast<VarDecl>(dre->getFoundDecl()->getCanonicalDecl()); - return {exprLoc, cgf.emitDeclRefLValue(dre).getPointer(), vd->getName()}; + return {exprLoc, cgf.emitDeclRefLValue(dre).getPointer(), vd->getName(), + std::move(bounds)}; } template <typename BeforeOpTy, typename AfterOpTy> void addDataOperand(const Expr *varOperand, mlir::acc::DataClause dataClause, bool structured, bool implicit) { DataOperandInfo opInfo = getDataOperandInfo(dirKind, varOperand); - mlir::ValueRange bounds; // TODO: OpenACC: we should comprehend the 'modifier-list' here for the data // operand. At the moment, we don't have a uniform way to assign these @@ -239,7 +301,7 @@ class OpenACCClauseCIREmitter final auto beforeOp = builder.create<BeforeOpTy>(opInfo.beginLoc, opInfo.varValue, structured, - implicit, opInfo.name, bounds); + implicit, opInfo.name, opInfo.bounds); operation.getDataClauseOperandsMutable().append(beforeOp.getResult()); AfterOpTy afterOp; @@ -248,7 +310,7 @@ class OpenACCClauseCIREmitter final builder.setInsertionPointAfter(operation); afterOp = builder.create<AfterOpTy>(opInfo.beginLoc, beforeOp.getResult(), opInfo.varValue, structured, implicit, - opInfo.name, bounds); + opInfo.name, opInfo.bounds); } // Set the 'rest' of the info for both operations. diff --git a/clang/test/CIR/CodeGenOpenACC/combined-copy.c b/clang/test/CIR/CodeGenOpenACC/combined-copy.c index 8df6b9fc03454..3ebca6578c8a9 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-copy.c +++ b/clang/test/CIR/CodeGenOpenACC/combined-copy.c @@ -12,6 +12,8 @@ void acc_compute(int parmVar) { // CHECK-NEXT: %[[LOCAL3:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["localVar3"] // CHECK-NEXT: %[[LOCALPTR:.*]] = cir.alloca !cir.ptr<!s16i>, !cir.ptr<!cir.ptr<!s16i>>, ["localPointer"] // CHECK-NEXT: %[[LOCALARRAY:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["localArray"] + // CHECK-NEXT: %[[LOCALARRAYOFPTRS:.*]] = cir.alloca !cir.array<!cir.ptr<!cir.float> x 5>, !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>, ["localArrayOfPtrs"] + // CHECK-NEXT: %[[THREEDARRAY:.*]] = cir.alloca !cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>, !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>, ["threeDArray"] // CHECK-NEXT: cir.store %[[ARG]], %[[PARM]] : !s32i, !cir.ptr<!s32i> #pragma acc parallel loop copy(localVar1, parmVar) copy(localVar2) copy(localVar3, parmVar) @@ -261,4 +263,503 @@ void acc_compute(int parmVar) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type<nvidia>], %[[ONE_CAST]] : si32 [#acc.device_type<radeon>]) to varPtr(%[[LOCAL1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, name = "localVar1"} loc + +#pragma acc parallel loop copy(localArray[3]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + +#pragma acc serial loop copy(localArray[1:3]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) { + // CHECK-NEXT: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + +#pragma acc kernels loop copy(localArray[:3]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) { + // CHECK-NEXT: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + +#pragma acc parallel loop copy(localArray[1:]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) upperbound(%[[FOUR_CONST]] : i64) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + +#pragma acc serial loop copy(localArray[localVar1:localVar2]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) { + // CHECK-NEXT: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + +#pragma acc kernels loop copy(localArray[:localVar2]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) { + // CHECK-NEXT: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + +#pragma acc parallel loop copy(localArray[localVar1:]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) upperbound(%[[FOUR_CONST]] : i64) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + +#pragma acc serial loop copy(localPointer[3]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) { + // CHECK-NEXT: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + +#pragma acc kernels loop copy(localPointer[1:3]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) { + // CHECK-NEXT: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + +#pragma acc parallel loop copy(localPointer[:3]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + +#pragma acc serial loop copy(localPointer[localVar1:localVar2]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) { + // CHECK-NEXT: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + +#pragma acc kernels loop copy(localPointer[:localVar2]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) { + // CHECK-NEXT: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + + float *localArrayOfPtrs[5]; +#pragma acc parallel loop copy(localArrayOfPtrs[3]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc serial loop copy(localArrayOfPtrs[3][2]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc kernels loop copy(localArrayOfPtrs[localVar1:localVar2]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc parallel loop copy(localArrayOfPtrs[localVar1:]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) upperbound(%[[FOUR_CONST]] : i64) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc serial loop copy(localArrayOfPtrs[:localVar2]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc kernels loop copy(localArrayOfPtrs[localVar1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc parallel loop copy(localArrayOfPtrs[localVar1][localVar2]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV2_CAST]] : si16) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc serial loop copy(localArrayOfPtrs[localVar1][localVar2:parmVar]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[PV:.*]] = cir.load %[[PARM]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[PV_CAST:.*]] = builtin.unrealized_conversion_cast %[[PV]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV2_CAST]] : si16) extent(%[[PV_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc kernels loop copy(localArrayOfPtrs[localVar1][:parmVar]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[PV:.*]] = cir.load %[[PARM]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[PV_CAST:.*]] = builtin.unrealized_conversion_cast %[[PV]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[PV_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc parallel loop copy(localArrayOfPtrs[localVar1:localVar2][:1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[ONE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc serial loop copy(localArrayOfPtrs[localVar1:localVar2][1:1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + + double threeDArray[5][6][7]; +#pragma acc kernels loop copy(threeDArray[1][2][3]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS3:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[THREEDARRAY]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]], %[[BOUNDS3]]) -> !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "threeDArray"} loc + // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) { + // CHECK-NEXT: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]], %[[BOUNDS3]]) to varPtr(%[[THREEDARRAY]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "threeDArray"} loc + +#pragma acc parallel loop copy(threeDArray[1:1][2:1][3:1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS3:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[THREEDARRAY]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]], %[[BOUNDS3]]) -> !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "threeDArray"} loc + // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]], %[[BOUNDS3]]) to varPtr(%[[THREEDARRAY]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "threeDArray"} loc } diff --git a/clang/test/CIR/CodeGenOpenACC/compute-copy.c b/clang/test/CIR/CodeGenOpenACC/compute-copy.c index a542409f07152..77e8cc8562c73 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-copy.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-copy.c @@ -12,6 +12,9 @@ void acc_compute(int parmVar) { // CHECK-NEXT: %[[LOCAL3:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["localVar3"] // CHECK-NEXT: %[[LOCALPTR:.*]] = cir.alloca !cir.ptr<!s16i>, !cir.ptr<!cir.ptr<!s16i>>, ["localPointer"] // CHECK-NEXT: %[[LOCALARRAY:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["localArray"] + // CHECK-NEXT: %[[LOCALARRAYOFPTRS:.*]] = cir.alloca !cir.array<!cir.ptr<!cir.float> x 5>, !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>, ["localArrayOfPtrs"] + // CHECK-NEXT: %[[THREEDARRAY:.*]] = cir.alloca !cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>, !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>, ["threeDArray"] + // CHECK-NEXT: cir.store %[[ARG]], %[[PARM]] : !s32i, !cir.ptr<!s32i> #pragma acc parallel copy(localVar1, parmVar) copy(localVar2) copy(localVar3, parmVar) @@ -210,4 +213,426 @@ void acc_compute(int parmVar) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type<nvidia>], %[[ONE_CAST]] : si32 [#acc.device_type<radeon>]) to varPtr(%[[LOCAL1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, name = "localVar1"} loc + +#pragma acc parallel copy(localArray[3]) + ; + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + +#pragma acc serial copy(localArray[1:3]) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + +#pragma acc kernels copy(localArray[:3]) + ; + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + +#pragma acc parallel copy(localArray[1:]) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) upperbound(%[[FOUR_CONST]] : i64) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc +#pragma acc serial copy(localArray[localVar1:localVar2]) + ; + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + +#pragma acc kernels copy(localArray[:localVar2]) + ; + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + +#pragma acc parallel copy(localArray[localVar1:]) + ; + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) upperbound(%[[FOUR_CONST]] : i64) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc + +#pragma acc serial copy(localPointer[3]) + ; + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + +#pragma acc kernels copy(localPointer[1:3]) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + +#pragma acc parallel copy(localPointer[:3]) + ; + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + +#pragma acc serial copy(localPointer[localVar1:localVar2]) + ; + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + +#pragma acc kernels copy(localPointer[:localVar2]) + ; + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc + + float *localArrayOfPtrs[5]; +#pragma acc parallel copy(localArrayOfPtrs[3]) + ; + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc +#pragma acc serial copy(localArrayOfPtrs[3][2]) + ; + // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc kernels copy(localArrayOfPtrs[localVar1:localVar2]) + ; + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc parallel copy(localArrayOfPtrs[localVar1:]) + ; + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) upperbound(%[[FOUR_CONST]] : i64) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc serial copy(localArrayOfPtrs[:localVar2]) + ; + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc kernels copy(localArrayOfPtrs[localVar1]) + ; + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc parallel copy(localArrayOfPtrs[localVar1][localVar2]) + ; + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV2_CAST]] : si16) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc serial copy(localArrayOfPtrs[localVar1][localVar2:parmVar]) + ; + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[PV:.*]] = cir.load %[[PARM]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[PV_CAST:.*]] = builtin.unrealized_conversion_cast %[[PV]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV2_CAST]] : si16) extent(%[[PV_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc kernels copy(localArrayOfPtrs[localVar1][:parmVar]) + ; + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[PV:.*]] = cir.load %[[PARM]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[PV_CAST:.*]] = builtin.unrealized_conversion_cast %[[PV]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[PV_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc parallel copy(localArrayOfPtrs[localVar1:localVar2][:1]) + ; + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[ONE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + +#pragma acc serial copy(localArrayOfPtrs[localVar1:localVar2][1:1]) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc + // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i + // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc + + double threeDArray[5][6][7]; +#pragma acc kernels copy(threeDArray[1][2][3]) + ; + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS3:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[THREEDARRAY]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]], %[[BOUNDS3]]) -> !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "threeDArray"} loc + // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]], %[[BOUNDS3]]) to varPtr(%[[THREEDARRAY]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "threeDArray"} loc + +#pragma acc parallel copy(threeDArray[1:1][2:1][3:1]) + ; + // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS3:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[THREEDARRAY]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]], %[[BOUNDS3]]) -> !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "threeDArray"} loc + // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]], %[[BOUNDS3]]) to varPtr(%[[THREEDARRAY]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "threeDArray"} loc } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits