https://github.com/skatrak updated https://github.com/llvm/llvm-project/pull/94359
>From 88a2553168b4fd3ad1b65b855c2bdf5aba09d126 Mon Sep 17 00:00:00 2001 From: Sergio Afonso <safon...@amd.com> Date: Tue, 4 Jun 2024 15:26:38 +0100 Subject: [PATCH 1/2] [Flang][OpenMP] Add -fopenmp-force-usm option to flang This patch enables the `-fopenmp-force-usm` option to be passed to the flang driver, which forwards it to the compiler frontend. This flag, when set, results in the introduction of the `unified_shared_memory` bit to the `omp.requires` attribute of the top-level module operation. This is later combined with any other target device-related REQUIRES clauses that may have been explicitly set in the compilation unit. --- clang/include/clang/Driver/Options.td | 2 +- clang/lib/Driver/ToolChains/Flang.cpp | 2 ++ flang/include/flang/Frontend/LangOptions.def | 2 ++ flang/include/flang/Tools/CrossToolHelpers.h | 20 ++++++++++++------- flang/lib/Frontend/CompilerInvocation.cpp | 3 +++ flang/lib/Lower/OpenMP/OpenMP.cpp | 4 +++- flang/test/Driver/omp-driver-offload.f90 | 20 +++++++++++++++++++ flang/test/Lower/OpenMP/force-usm.f90 | 12 +++++++++++ .../test/Lower/OpenMP/requires-force-usm.f90 | 15 ++++++++++++++ flang/tools/bbc/bbc.cpp | 15 +++++++++----- 10 files changed, 81 insertions(+), 14 deletions(-) create mode 100644 flang/test/Lower/OpenMP/force-usm.f90 create mode 100644 flang/test/Lower/OpenMP/requires-force-usm.f90 diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 5ab2d49c7a497..f04c220d6e1db 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3592,7 +3592,7 @@ def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group< HelpText<"Do not create a host fallback if offloading to the device fails.">, MarshallingInfoFlag<LangOpts<"OpenMPOffloadMandatory">>; def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>, - Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>, + Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, HelpText<"Force behvaior as if the user specified pragma omp requires unified_shared_memory.">, MarshallingInfoFlag<LangOpts<"OpenMPForceUSM">>; def fopenmp_target_jit : Flag<["-"], "fopenmp-target-jit">, Group<f_Group>, diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 42ca060186fd8..b7abe7b1c19bc 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -766,6 +766,8 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ); // FIXME: Clang supports a whole bunch more flags here. + if (Args.hasArg(options::OPT_fopenmp_force_usm)) + CmdArgs.push_back("-fopenmp-force-usm"); break; default: // By default, if Clang doesn't know how to generate useful OpenMP code diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Frontend/LangOptions.def index 2bf10826120a8..d3e1e972d1519 100644 --- a/flang/include/flang/Frontend/LangOptions.def +++ b/flang/include/flang/Frontend/LangOptions.def @@ -42,6 +42,8 @@ LANGOPT(OpenMPVersion, 32, 0) LANGOPT(OpenMPIsTargetDevice, 1, false) /// Generate OpenMP target code only for GPUs LANGOPT(OpenMPIsGPU, 1, false) +/// Generate OpenMP target code only for GPUs +LANGOPT(OpenMPForceUSM, 1, false) /// Enable debugging in the OpenMP offloading device RTL LANGOPT(OpenMPTargetDebug, 32, 0) /// Assume work-shared loops do not have more iterations than participating diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h index 77b68fc6187fa..26fbe51d329c7 100644 --- a/flang/include/flang/Tools/CrossToolHelpers.h +++ b/flang/include/flang/Tools/CrossToolHelpers.h @@ -130,16 +130,16 @@ struct OffloadModuleOpts { OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription, bool OpenMPThreadSubscription, bool OpenMPNoThreadState, bool OpenMPNoNestedParallelism, bool OpenMPIsTargetDevice, - bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {}, - bool NoGPULib = false) + bool OpenMPIsGPU, bool OpenMPForceUSM, uint32_t OpenMPVersion, + std::string OMPHostIRFile = {}, bool NoGPULib = false) : OpenMPTargetDebug(OpenMPTargetDebug), OpenMPTeamSubscription(OpenMPTeamSubscription), OpenMPThreadSubscription(OpenMPThreadSubscription), OpenMPNoThreadState(OpenMPNoThreadState), OpenMPNoNestedParallelism(OpenMPNoNestedParallelism), OpenMPIsTargetDevice(OpenMPIsTargetDevice), OpenMPIsGPU(OpenMPIsGPU), - OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile), - NoGPULib(NoGPULib) {} + OpenMPForceUSM(OpenMPForceUSM), OpenMPVersion(OpenMPVersion), + OMPHostIRFile(OMPHostIRFile), NoGPULib(NoGPULib) {} OffloadModuleOpts(Fortran::frontend::LangOptions &Opts) : OpenMPTargetDebug(Opts.OpenMPTargetDebug), @@ -148,8 +148,9 @@ struct OffloadModuleOpts { OpenMPNoThreadState(Opts.OpenMPNoThreadState), OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism), OpenMPIsTargetDevice(Opts.OpenMPIsTargetDevice), - OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPVersion(Opts.OpenMPVersion), - OMPHostIRFile(Opts.OMPHostIRFile), NoGPULib(Opts.NoGPULib) {} + OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPForceUSM(Opts.OpenMPForceUSM), + OpenMPVersion(Opts.OpenMPVersion), OMPHostIRFile(Opts.OMPHostIRFile), + NoGPULib(Opts.NoGPULib) {} uint32_t OpenMPTargetDebug = 0; bool OpenMPTeamSubscription = false; @@ -158,6 +159,7 @@ struct OffloadModuleOpts { bool OpenMPNoNestedParallelism = false; bool OpenMPIsTargetDevice = false; bool OpenMPIsGPU = false; + bool OpenMPForceUSM = false; uint32_t OpenMPVersion = 11; std::string OMPHostIRFile = {}; bool NoGPULib = false; @@ -172,13 +174,17 @@ struct OffloadModuleOpts { module.getOperation())) { offloadMod.setIsTargetDevice(Opts.OpenMPIsTargetDevice); offloadMod.setIsGPU(Opts.OpenMPIsGPU); + if (Opts.OpenMPForceUSM) { + offloadMod.setRequires(mlir::omp::ClauseRequires::unified_shared_memory); + } if (Opts.OpenMPIsTargetDevice) { offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription, Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState, Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion, Opts.NoGPULib); - if (!Opts.OMPHostIRFile.empty()) + if (!Opts.OMPHostIRFile.empty()) { offloadMod.setHostIRFilePath(Opts.OMPHostIRFile); + } } } } diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 50c3e8b0113b5..f64a939b785ef 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -906,6 +906,9 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args, res.getLangOpts().OpenMPVersion, diags)) { res.getLangOpts().OpenMPVersion = Version; } + if (args.hasArg(clang::driver::options::OPT_fopenmp_force_usm)) { + res.getLangOpts().OpenMPForceUSM = 1; + } if (args.hasArg(clang::driver::options::OPT_fopenmp_is_target_device)) { res.getLangOpts().OpenMPIsTargetDevice = 1; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 9598457d123cf..af9e2af24619b 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2608,7 +2608,9 @@ void Fortran::lower::genOpenMPRequires(mlir::Operation *mod, symbol->details()); } - MlirRequires mlirFlags = MlirRequires::none; + // Use pre-populated omp.requires module attribute if it was set, so that + // the "-fopenmp-force-usm" compiler option is honored. + MlirRequires mlirFlags = offloadMod.getRequires(); if (semaFlags.test(SemaRequires::ReverseOffload)) mlirFlags = mlirFlags | MlirRequires::reverse_offload; if (semaFlags.test(SemaRequires::UnifiedAddress)) diff --git a/flang/test/Driver/omp-driver-offload.f90 b/flang/test/Driver/omp-driver-offload.f90 index 8f48ca75114ce..6fb4f4eeeeca1 100644 --- a/flang/test/Driver/omp-driver-offload.f90 +++ b/flang/test/Driver/omp-driver-offload.f90 @@ -207,3 +207,23 @@ ! RUN: --rocm-path=%S/Inputs/rocm %s 2>&1 \ ! RUN: | FileCheck --check-prefix=ROCM-PATH %s ! ROCM-PATH: Found HIP installation: {{.*Inputs.*rocm}}, version 3.6.20214-a2917cd + +! Test -fopenmp-force-usm option without offload +! RUN: %flang -S -### %s -o %t 2>&1 \ +! RUN: -fopenmp -fopenmp-force-usm \ +! RUN: --target=aarch64-unknown-linux-gnu \ +! RUN: | FileCheck %s --check-prefix=FORCE-USM-NO-OFFLOAD + +! FORCE-USM-NO-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu" +! FORCE-USM-NO-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm" + +! Test -fopenmp-force-usm option with offload +! RUN: %flang -S -### %s -o %t 2>&1 \ +! RUN: -fopenmp -fopenmp-force-usm --offload-arch=gfx90a \ +! RUN: --target=aarch64-unknown-linux-gnu \ +! RUN: | FileCheck %s --check-prefix=FORCE-USM-OFFLOAD + +! FORCE-USM-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu" +! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm" +! FORCE-USM-OFFLOAD-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa" +! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm" diff --git a/flang/test/Lower/OpenMP/force-usm.f90 b/flang/test/Lower/OpenMP/force-usm.f90 new file mode 100644 index 0000000000000..90bbf3c4d842f --- /dev/null +++ b/flang/test/Lower/OpenMP/force-usm.f90 @@ -0,0 +1,12 @@ +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-force-usm %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-force-usm %s -o - | FileCheck %s +! RUN: bbc -fopenmp -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s +! RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s + +! This test checks the addition of requires unified_shared_memory when +! -fopenmp-force-usm is set + +!CHECK: module attributes { +!CHECK-SAME: omp.requires = #omp<clause_requires unified_shared_memory> +program requires +end program requires diff --git a/flang/test/Lower/OpenMP/requires-force-usm.f90 b/flang/test/Lower/OpenMP/requires-force-usm.f90 new file mode 100644 index 0000000000000..5f5cf9e64cd70 --- /dev/null +++ b/flang/test/Lower/OpenMP/requires-force-usm.f90 @@ -0,0 +1,15 @@ +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-force-usm %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-force-usm %s -o - | FileCheck %s +! RUN: bbc -fopenmp -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s +! RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s + +! This test checks the addition of requires unified_shared_memory when +! -fopenmp-force-usm is set, even when other requires directives are present + +!CHECK: module attributes { +!CHECK-SAME: omp.requires = #omp<clause_requires reverse_offload|unified_shared_memory> +program requires + !$omp requires reverse_offload + !$omp target + !$omp end target +end program requires diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index bab21338cef26..3485c1499d3b6 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -144,6 +144,11 @@ static llvm::cl::opt<bool> llvm::cl::desc("enable openmp GPU target codegen"), llvm::cl::init(false)); +static llvm::cl::opt<bool> enableOpenMPForceUSM( + "fopenmp-force-usm", + llvm::cl::desc("force openmp unified shared memory mode"), + llvm::cl::init(false)); + // A simplified subset of the OpenMP RTL Flags from Flang, only the primary // positive options are available, no negative options e.g. fopen_assume* vs // fno_open_assume* @@ -374,11 +379,11 @@ static mlir::LogicalResult convertFortranSourceToMLIR( "-fopenmp-is-target-device is also set"; return mlir::failure(); } - auto offloadModuleOpts = - OffloadModuleOpts(setOpenMPTargetDebug, setOpenMPTeamSubscription, - setOpenMPThreadSubscription, setOpenMPNoThreadState, - setOpenMPNoNestedParallelism, enableOpenMPDevice, - enableOpenMPGPU, setOpenMPVersion, "", setNoGPULib); + auto offloadModuleOpts = OffloadModuleOpts( + setOpenMPTargetDebug, setOpenMPTeamSubscription, + setOpenMPThreadSubscription, setOpenMPNoThreadState, + setOpenMPNoNestedParallelism, enableOpenMPDevice, enableOpenMPGPU, + enableOpenMPForceUSM, setOpenMPVersion, "", setNoGPULib); setOffloadModuleInterfaceAttributes(mlirModule, offloadModuleOpts); setOpenMPVersionAttribute(mlirModule, setOpenMPVersion); } >From a102a23edb62ac008baec7011f1e7f8b2e704fa1 Mon Sep 17 00:00:00 2001 From: Sergio Afonso <safon...@amd.com> Date: Wed, 5 Jun 2024 13:55:18 +0100 Subject: [PATCH 2/2] Address review comments --- clang/lib/Driver/ToolChains/Flang.cpp | 3 ++- flang/include/flang/Tools/CrossToolHelpers.h | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index b7abe7b1c19bc..9609a1dc65c09 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -765,9 +765,10 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fopenmp"); Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ); - // FIXME: Clang supports a whole bunch more flags here. if (Args.hasArg(options::OPT_fopenmp_force_usm)) CmdArgs.push_back("-fopenmp-force-usm"); + + // FIXME: Clang supports a whole bunch more flags here. break; default: // By default, if Clang doesn't know how to generate useful OpenMP code diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h index 26fbe51d329c7..1d890fd8e1f6f 100644 --- a/flang/include/flang/Tools/CrossToolHelpers.h +++ b/flang/include/flang/Tools/CrossToolHelpers.h @@ -182,9 +182,8 @@ struct OffloadModuleOpts { Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState, Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion, Opts.NoGPULib); - if (!Opts.OMPHostIRFile.empty()) { + if (!Opts.OMPHostIRFile.empty()) offloadMod.setHostIRFilePath(Opts.OMPHostIRFile); - } } } } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits