Author: erichkeane Date: 2025-06-06T16:36:11-07:00 New Revision: b09b1d65efd7a3d6dd4f61333a5f09c0e69b42a4
URL: https://github.com/llvm/llvm-project/commit/b09b1d65efd7a3d6dd4f61333a5f09c0e69b42a4 DIFF: https://github.com/llvm/llvm-project/commit/b09b1d65efd7a3d6dd4f61333a5f09c0e69b42a4.diff LOG: [OpenACC][CIR] Implement lowering for 'no_create' clause for comp/comb no_create has its own 'data-in', plus uses the 'delete' for the data-out operation. Additionally, like all data clauses it uses the 'async' functionality previous implemented. This patch implements no_create for combined/compute constructs completely, and ensures that the feature is tested. Added: Modified: clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp clang/test/CIR/CodeGenOpenACC/combined.cpp clang/test/CIR/CodeGenOpenACC/kernels.c clang/test/CIR/CodeGenOpenACC/parallel.c clang/test/CIR/CodeGenOpenACC/serial.c Removed: ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp index 9f283974d8c8f..392a44917691e 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp @@ -305,9 +305,19 @@ class OpenACCClauseCIREmitter final { mlir::OpBuilder::InsertionGuard guardCase(builder); builder.setInsertionPointAfter(operation); - afterOp = builder.create<AfterOpTy>(opInfo.beginLoc, beforeOp.getResult(), - opInfo.varValue, structured, implicit, - opInfo.name, opInfo.bounds); + + if constexpr (std::is_same_v<AfterOpTy, mlir::acc::DeleteOp> || + std::is_same_v<AfterOpTy, mlir::acc::DetachOp>) { + // Detach/Delete ops don't have the variable reference here, so they + // take 1 fewer argument to their build function. + afterOp = builder.create<AfterOpTy>( + opInfo.beginLoc, beforeOp.getResult(), structured, implicit, + opInfo.name, opInfo.bounds); + } else { + afterOp = builder.create<AfterOpTy>( + opInfo.beginLoc, beforeOp.getResult(), opInfo.varValue, structured, + implicit, opInfo.name, opInfo.bounds); + } } // Set the 'rest' of the info for both operations. @@ -845,6 +855,22 @@ class OpenACCClauseCIREmitter final return clauseNotImplemented(clause); } } + + void VisitNoCreateClause(const OpenACCNoCreateClause &clause) { + if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp, mlir::acc::SerialOp, + mlir::acc::KernelsOp>) { + for (auto var : clause.getVarList()) + addDataOperand<mlir::acc::NoCreateOp, mlir::acc::DeleteOp>( + var, mlir::acc::DataClause::acc_no_create, /*structured=*/true, + /*implicit=*/false); + } else if constexpr (isCombinedType<OpTy>) { + applyToComputeOp(clause); + } else { + // TODO: When we've implemented this for everything, switch this to an + // unreachable. data remains. + return clauseNotImplemented(clause); + } + } }; template <typename OpTy> diff --git a/clang/test/CIR/CodeGenOpenACC/combined.cpp b/clang/test/CIR/CodeGenOpenACC/combined.cpp index 5107ee56c568c..74073094cd29a 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined.cpp @@ -1011,8 +1011,8 @@ extern "C" void acc_combined(int N, int cond) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc } -extern "C" void acc_combined_deviceptr(int *arg1, int *arg2) { - // CHECK: cir.func @acc_combined_deviceptr(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) { +extern "C" void acc_combined_data_clauses(int *arg1, int *arg2) { + // CHECK: cir.func @acc_combined_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) { // CHECK-NEXT: %[[ARG1:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg1", init] // CHECK-NEXT: %[[ARG2:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg2", init] // CHECK-NEXT: cir.store %[[ARG1_PARAM]], %[[ARG1]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>> @@ -1079,4 +1079,39 @@ extern "C" void acc_combined_deviceptr(int *arg1, int *arg2) { // CHECK-NEXT: } loc // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc + +#pragma acc parallel loop no_create(arg1) + for(unsigned I = 0; I < 5; ++I); + // CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"} + // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"} + +#pragma acc serial loop no_create(arg2) + for(unsigned I = 0; I < 5; ++I); + // CHECK-NEXT: %[[NOCREATE2:.*]] = acc.nocreate varPtr(%[[ARG2]] : !cir.ptr<!cir.ptr<!s32i>>) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg2"} + // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>) { + // CHECK-NEXT: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>) {dataClause = #acc<data_clause acc_no_create>, name = "arg2"} + +#pragma acc kernels loop no_create(arg1, arg2) device_type(host) async + for(unsigned I = 0; I < 5; ++I); + // CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"} + // CHECK-NEXT: %[[NOCREATE2:.*]] = acc.nocreate varPtr(%[[ARG2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg2"} + // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[NOCREATE1]], %[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) { + // CHECK-NEXT: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg2"} + // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"} } diff --git a/clang/test/CIR/CodeGenOpenACC/kernels.c b/clang/test/CIR/CodeGenOpenACC/kernels.c index d276c2c23fbed..67847d0b8ce03 100644 --- a/clang/test/CIR/CodeGenOpenACC/kernels.c +++ b/clang/test/CIR/CodeGenOpenACC/kernels.c @@ -417,8 +417,8 @@ void acc_kernels(int cond) { // CHECK-NEXT: cir.return } -void acc_kernels_deviceptr(int *arg1, int *arg2) { - // CHECK: cir.func @acc_kernels_deviceptr(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) { +void acc_kernels_data_clauses(int *arg1, int *arg2) { + // CHECK: cir.func @acc_kernels_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) { // CHECK-NEXT: %[[ARG1:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg1", init] // CHECK-NEXT: %[[ARG2:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg2", init] // CHECK-NEXT: cir.store %[[ARG1_PARAM]], %[[ARG1]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>> @@ -451,4 +451,22 @@ void acc_kernels_deviceptr(int *arg1, int *arg2) { // CHECK-NEXT: acc.kernels dataOperands(%[[DEVPTR1]], %[[DEVPTR2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc + +#pragma acc kernels no_create(arg1) + ; + // CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"} + // CHECK-NEXT: acc.kernels dataOperands(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"} + +#pragma acc kernels no_create(arg1, arg2) device_type(nvidia) async + ; + // CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"} + // CHECK-NEXT: %[[NOCREATE2:.*]] = acc.nocreate varPtr(%[[ARG2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg2"} + // CHECK-NEXT: acc.kernels dataOperands(%[[NOCREATE1]], %[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg2"} + // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"} } diff --git a/clang/test/CIR/CodeGenOpenACC/parallel.c b/clang/test/CIR/CodeGenOpenACC/parallel.c index df05a7aa53bb8..665648aa57134 100644 --- a/clang/test/CIR/CodeGenOpenACC/parallel.c +++ b/clang/test/CIR/CodeGenOpenACC/parallel.c @@ -444,8 +444,8 @@ void acc_parallel(int cond) { // CHECK-NEXT: cir.return } -void acc_parallel_deviceptr(int *arg1, int *arg2) { - // CHECK: cir.func @acc_parallel_deviceptr(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) { +void acc_parallel_data_clauses(int *arg1, int *arg2) { + // CHECK: cir.func @acc_parallel_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) { // CHECK-NEXT: %[[ARG1:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg1", init] // CHECK-NEXT: %[[ARG2:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg2", init] // CHECK-NEXT: cir.store %[[ARG1_PARAM]], %[[ARG1]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>> @@ -478,4 +478,23 @@ void acc_parallel_deviceptr(int *arg1, int *arg2) { // CHECK-NEXT: acc.parallel dataOperands(%[[DEVPTR1]], %[[DEVPTR2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc + +#pragma acc parallel no_create(arg1) + ; + // CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"} + // CHECK-NEXT: acc.parallel dataOperands(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"} + +#pragma acc parallel no_create(arg1, arg2) device_type(radeon) async + ; + // CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"} + // CHECK-NEXT: %[[NOCREATE2:.*]] = acc.nocreate varPtr(%[[ARG2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg2"} + // CHECK-NEXT: acc.parallel dataOperands(%[[NOCREATE1]], %[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg2"} + // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"} } + diff --git a/clang/test/CIR/CodeGenOpenACC/serial.c b/clang/test/CIR/CodeGenOpenACC/serial.c index 48bda5387dffb..eb09a9e7aea8e 100644 --- a/clang/test/CIR/CodeGenOpenACC/serial.c +++ b/clang/test/CIR/CodeGenOpenACC/serial.c @@ -267,8 +267,8 @@ void acc_serial(int cond) { // CHECK-NEXT: cir.return } -void acc_serial_deviceptr(int *arg1, int *arg2) { - // CHECK: cir.func @acc_serial_deviceptr(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) { +void acc_serial_data_clauses(int *arg1, int *arg2) { + // CHECK: cir.func @acc_serial_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) { // CHECK-NEXT: %[[ARG1:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg1", init] // CHECK-NEXT: %[[ARG2:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg2", init] // CHECK-NEXT: cir.store %[[ARG1_PARAM]], %[[ARG1]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>> @@ -302,4 +302,21 @@ void acc_serial_deviceptr(int *arg1, int *arg2) { // CHECK-NEXT: acc.serial dataOperands(%[[DEVPTR1]], %[[DEVPTR2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc + +#pragma acc serial no_create(arg1) + ; + // CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"} + // CHECK-NEXT: acc.serial dataOperands(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"} +#pragma acc serial no_create(arg1, arg2) device_type(nvidia) async + ; + // CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"} + // CHECK-NEXT: %[[NOCREATE2:.*]] = acc.nocreate varPtr(%[[ARG2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg2"} + // CHECK-NEXT: acc.serial dataOperands(%[[NOCREATE1]], %[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg2"} + // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"} } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits