gtbercea created this revision. This flag "--fopenmp-ptx=" enables the overwriting of the default PTX version used for GPU offloaded OpenMP target regions: "+ptx42".
Repository: rL LLVM https://reviews.llvm.org/D29660 Files: include/clang/Driver/Options.td lib/Driver/ToolChains.cpp test/Driver/openmp-offload.c Index: test/Driver/openmp-offload.c =================================================================== --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -627,6 +627,16 @@ /// ########################################################################### +/// Check PTXAS is passed the compute capability passed to the driver. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --fopenmp-ptx=+ptx52 -save-temps -no-canonical-prefixes %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PTX %s + +// CHK-PTX: clang{{.*}}.bc" {{.*}}"-target-feature" "+ptx52" +// CHK-PTX-NEXT: clang{{.*}}.bc" {{.*}}"-target-feature" "+ptx52" +// CHK-PTX-NEXT: clang{{.*}}.bc" {{.*}}"-target-feature" "+ptx52" + +/// ########################################################################### + /// Check that CLANG forwards the -v flag to PTXAS. // RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps -no-canonical-prefixes -v %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-VERBOSE %s Index: lib/Driver/ToolChains.cpp =================================================================== --- lib/Driver/ToolChains.cpp +++ lib/Driver/ToolChains.cpp @@ -4932,8 +4932,18 @@ // than LLVM defaults to. Use PTX4.2 which is the PTX version that // came with CUDA-7.0. CC1Args.push_back("-target-feature"); + + StringRef PtxVersion = DriverArgs.getLastArgValue( + options::OPT_fopenmp_ptx_EQ); + CC1Args.push_back("+ptx42"); + if (DeviceOffloadingKind == Action::OFK_OpenMP && + !PtxVersion.empty()) { + // Use PTX version passed to the driver. + CC1Args.back() = PtxVersion.data(); + } + if (DeviceOffloadingKind == Action::OFK_OpenMP) { SmallVector<std::string, 8> LibraryPaths; if (char *Env = ::getenv("LIBRARY_PATH")) { Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -490,6 +490,8 @@ HelpText<"CUDA installation path">; def fopenmp_cuda_gpu_arch_EQ : Joined<["--"], "fopenmp-cuda-gpu-arch=">, Flags<[DriverOption]>, HelpText<"Pass a single CUDA GPU architecture (default sm_20) to be used by OpenMP device offloading.">; +def fopenmp_ptx_EQ : Joined<["--"], "fopenmp-ptx=">, Flags<[DriverOption]>, + HelpText<"Pass a PTX version +ptxXX, default +ptx42 (for PTX version 4.2) used by OpenMP device offloading.">; def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, Group<i_Group>, HelpText<"Path to ptxas (used for compiling CUDA code)">; def fcuda_flush_denormals_to_zero : Flag<["-"], "fcuda-flush-denormals-to-zero">,
Index: test/Driver/openmp-offload.c =================================================================== --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -627,6 +627,16 @@ /// ########################################################################### +/// Check PTXAS is passed the compute capability passed to the driver. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --fopenmp-ptx=+ptx52 -save-temps -no-canonical-prefixes %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PTX %s + +// CHK-PTX: clang{{.*}}.bc" {{.*}}"-target-feature" "+ptx52" +// CHK-PTX-NEXT: clang{{.*}}.bc" {{.*}}"-target-feature" "+ptx52" +// CHK-PTX-NEXT: clang{{.*}}.bc" {{.*}}"-target-feature" "+ptx52" + +/// ########################################################################### + /// Check that CLANG forwards the -v flag to PTXAS. // RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps -no-canonical-prefixes -v %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-VERBOSE %s Index: lib/Driver/ToolChains.cpp =================================================================== --- lib/Driver/ToolChains.cpp +++ lib/Driver/ToolChains.cpp @@ -4932,8 +4932,18 @@ // than LLVM defaults to. Use PTX4.2 which is the PTX version that // came with CUDA-7.0. CC1Args.push_back("-target-feature"); + + StringRef PtxVersion = DriverArgs.getLastArgValue( + options::OPT_fopenmp_ptx_EQ); + CC1Args.push_back("+ptx42"); + if (DeviceOffloadingKind == Action::OFK_OpenMP && + !PtxVersion.empty()) { + // Use PTX version passed to the driver. + CC1Args.back() = PtxVersion.data(); + } + if (DeviceOffloadingKind == Action::OFK_OpenMP) { SmallVector<std::string, 8> LibraryPaths; if (char *Env = ::getenv("LIBRARY_PATH")) { Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -490,6 +490,8 @@ HelpText<"CUDA installation path">; def fopenmp_cuda_gpu_arch_EQ : Joined<["--"], "fopenmp-cuda-gpu-arch=">, Flags<[DriverOption]>, HelpText<"Pass a single CUDA GPU architecture (default sm_20) to be used by OpenMP device offloading.">; +def fopenmp_ptx_EQ : Joined<["--"], "fopenmp-ptx=">, Flags<[DriverOption]>, + HelpText<"Pass a PTX version +ptxXX, default +ptx42 (for PTX version 4.2) used by OpenMP device offloading.">; def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, Group<i_Group>, HelpText<"Path to ptxas (used for compiling CUDA code)">; def fcuda_flush_denormals_to_zero : Flag<["-"], "fcuda-flush-denormals-to-zero">,
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits