https://github.com/jdenny-ornl updated https://github.com/llvm/llvm-project/pull/102521
>From 6546428805b52f1b6f350193ab08ff027892710f Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" <jdenny.o...@gmail.com> Date: Thu, 8 Aug 2024 15:02:04 -0400 Subject: [PATCH 1/5] [Clang] Add env var for nvptx-arch/amdgpu-arch timeout When working on very busy systems, check-offload frequently fails many tests with this diagnostic: ``` clang: error: cannot determine amdgcn architecture: /tmp/llvm/build/bin/amdgpu-arch: Child timed out: ; consider passing it via '-march' ``` The timeout is 10 seconds. This patch accepts the environment variable `CLANG_TOOL_CHAIN_PROGRAM_WAIT` to increase it. It should be documented somewhere. Any suggestions on where? --- clang/lib/Driver/ToolChain.cpp | 10 +++++++++- clang/lib/Driver/ToolChains/AMDGPU.cpp | 3 ++- clang/lib/Driver/ToolChains/Cuda.cpp | 3 ++- llvm/utils/lit/lit/TestingConfig.py | 1 + 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 2d50c2cbbc881c..04b281e1bb10cd 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" #include "llvm/Support/VersionTuple.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/TargetParser/AArch64TargetParser.h" @@ -105,7 +106,7 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T, llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> ToolChain::executeToolChainProgram(StringRef Executable, - unsigned SecondsToWait) const { + unsigned DefaultSecondsToWait) const { llvm::SmallString<64> OutputFile; llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile); llvm::FileRemover OutputRemover(OutputFile.c_str()); @@ -116,6 +117,13 @@ ToolChain::executeToolChainProgram(StringRef Executable, }; std::string ErrorMessage; + int SecondsToWait = DefaultSecondsToWait; + if (std::optional<std::string> Str = + llvm::sys::Process::GetEnv("CLANG_TOOL_CHAIN_PROGRAM_WAIT")) { + int Val = std::atoi(Str->c_str()); + if (Val > 0) + SecondsToWait = Val; + } if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects, SecondsToWait, /*MemoryLimit=*/0, &ErrorMessage)) return llvm::createStringError(std::error_code(), diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index aa8f9197cfabc3..4ed366d21f5c43 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -899,7 +899,8 @@ AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const { else Program = GetProgramPath("amdgpu-arch"); - auto StdoutOrErr = executeToolChainProgram(Program, /*SecondsToWait=*/10); + auto StdoutOrErr = executeToolChainProgram(Program, + /*DefaultSecondsToWait=*/10); if (!StdoutOrErr) return StdoutOrErr.takeError(); diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 17c952c808f725..104217eaf5d849 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -804,7 +804,8 @@ NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const { else Program = GetProgramPath("nvptx-arch"); - auto StdoutOrErr = executeToolChainProgram(Program, /*SecondsToWait=*/10); + auto StdoutOrErr = executeToolChainProgram(Program, + /*DefaultSecondsToWait=*/10); if (!StdoutOrErr) return StdoutOrErr.takeError(); diff --git a/llvm/utils/lit/lit/TestingConfig.py b/llvm/utils/lit/lit/TestingConfig.py index eb9f8de2a7f960..06713429d06b4b 100644 --- a/llvm/utils/lit/lit/TestingConfig.py +++ b/llvm/utils/lit/lit/TestingConfig.py @@ -26,6 +26,7 @@ def fromdefaults(litConfig): "SYSTEMROOT", "TERM", "CLANG", + "CLANG_TOOL_CHAIN_PROGRAM_WAIT", "LLDB", "LD_PRELOAD", "LLVM_SYMBOLIZER_PATH", >From 711cf93741ba618c7ee8051190b18bba938121fa Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" <jdenny.o...@gmail.com> Date: Thu, 8 Aug 2024 16:21:28 -0400 Subject: [PATCH 2/5] Apply reviewer suggestion --- clang/include/clang/Basic/DiagnosticDriverKinds.td | 3 ++- clang/lib/Driver/ToolChain.cpp | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 3d8240f8357b40..05642f803d07de 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -99,7 +99,8 @@ def warn_drv_amdgpu_cov6: Warning< " use at your own risk">; def err_drv_undetermined_gpu_arch : Error< "cannot determine %0 architecture: %1; consider passing it via " - "'%2'">; + "'%2' or increasing the tool timeout using the environment variable " + "'CLANG_TOOL_CHAIN_PROGRAM_WAIT' (in secs, <=0 is inifinite)">; def warn_drv_multi_gpu_arch : Warning< "multiple %0 architectures are detected: %1; only the first one is used for " "'%2'">, InGroup<MultiGPU>; diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 04b281e1bb10cd..22de2bdeaa29f5 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -123,6 +123,8 @@ ToolChain::executeToolChainProgram(StringRef Executable, int Val = std::atoi(Str->c_str()); if (Val > 0) SecondsToWait = Val; + else if (Val <= 0) + SecondsToWait = 0; // infinite } if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects, SecondsToWait, /*MemoryLimit=*/0, &ErrorMessage)) >From 6ec00ff8e8ca2d4a3f7267f3189ee190e211ca05 Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" <jdenny.o...@gmail.com> Date: Thu, 8 Aug 2024 16:26:24 -0400 Subject: [PATCH 3/5] Update clang/lib/Driver/ToolChain.cpp --- clang/lib/Driver/ToolChain.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 22de2bdeaa29f5..c6436fb2ee33b4 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -123,7 +123,7 @@ ToolChain::executeToolChainProgram(StringRef Executable, int Val = std::atoi(Str->c_str()); if (Val > 0) SecondsToWait = Val; - else if (Val <= 0) + else SecondsToWait = 0; // infinite } if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects, SecondsToWait, >From 37cd64bc733f06d3caf3c660f57b5d717a59faf7 Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" <jdenny.o...@gmail.com> Date: Thu, 8 Aug 2024 18:59:15 -0400 Subject: [PATCH 4/5] Apply reviewer suggestions --- .../include/clang/Basic/DiagnosticDriverKinds.td | 7 ++++--- clang/include/clang/Driver/ToolChain.h | 2 +- clang/lib/Driver/ToolChain.cpp | 14 ++++++++------ clang/test/Driver/amdgpu-hip-system-arch.c | 8 ++++++++ clang/test/Driver/nvptx-cuda-system-arch.c | 8 ++++++++ clang/test/Driver/openmp-system-arch.c | 16 ++++++++++++++++ llvm/utils/lit/lit/TestingConfig.py | 2 +- 7 files changed, 46 insertions(+), 11 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 05642f803d07de..92a602829933ce 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -98,9 +98,10 @@ def warn_drv_amdgpu_cov6: Warning< "code object v6 is still in development and not ready for production use yet;" " use at your own risk">; def err_drv_undetermined_gpu_arch : Error< - "cannot determine %0 architecture: %1; consider passing it via " - "'%2' or increasing the tool timeout using the environment variable " - "'CLANG_TOOL_CHAIN_PROGRAM_WAIT' (in secs, <=0 is inifinite)">; + "cannot determine %0 architecture: %1; consider passing it via '%2'; " + "environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool " + "timeout (integer secs, <=0 is infinite)">; + def warn_drv_multi_gpu_arch : Warning< "multiple %0 architectures are detected: %1; only the first one is used for " "'%2'">, InGroup<MultiGPU>; diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index ece1384d5d3c02..fbac660548346e 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -206,7 +206,7 @@ class ToolChain { /// Executes the given \p Executable and returns the stdout. llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> executeToolChainProgram(StringRef Executable, - unsigned SecondsToWait = 0) const; + unsigned DefaultSecondsToWait = 0) const; void setTripleEnvironment(llvm::Triple::EnvironmentType Env); diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index c6436fb2ee33b4..5fd8af373e1db5 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -119,12 +119,14 @@ ToolChain::executeToolChainProgram(StringRef Executable, std::string ErrorMessage; int SecondsToWait = DefaultSecondsToWait; if (std::optional<std::string> Str = - llvm::sys::Process::GetEnv("CLANG_TOOL_CHAIN_PROGRAM_WAIT")) { - int Val = std::atoi(Str->c_str()); - if (Val > 0) - SecondsToWait = Val; - else - SecondsToWait = 0; // infinite + llvm::sys::Process::GetEnv("CLANG_TOOLCHAIN_PROGRAM_TIMEOUT")) { + int SecondsToWait; + if (!llvm::to_integer(*Str, SecondsToWait)) + return llvm::createStringError(std::error_code(), + "CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected " + "an integer, got '" + + *Str + "'"); + SecondsToWait = std::min(SecondsToWait, 0); } if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects, SecondsToWait, /*MemoryLimit=*/0, &ErrorMessage)) diff --git a/clang/test/Driver/amdgpu-hip-system-arch.c b/clang/test/Driver/amdgpu-hip-system-arch.c index a46077b38fac08..f25a4087080f6d 100644 --- a/clang/test/Driver/amdgpu-hip-system-arch.c +++ b/clang/test/Driver/amdgpu-hip-system-arch.c @@ -29,3 +29,11 @@ // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib --offload-new-driver --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 -x hip %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=ARCH-GFX906 // ARCH-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" + +// case when CLANG_TOOLCHAIN_PROGRAM_TIMEOUT is malformed. +// RUN: env CLANG_TOOLCHAIN_PROGRAM_TIMEOUT=foo \ +// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ +// RUN: --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 \ +// RUN: -x hip %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=BAD-TIMEOUT +// BAD-TIMEOUT: clang: error: cannot determine amdgcn architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite) diff --git a/clang/test/Driver/nvptx-cuda-system-arch.c b/clang/test/Driver/nvptx-cuda-system-arch.c index e6a2fa40f0a038..6a8a218406d139 100644 --- a/clang/test/Driver/nvptx-cuda-system-arch.c +++ b/clang/test/Driver/nvptx-cuda-system-arch.c @@ -42,3 +42,11 @@ // RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 | FileCheck %s --check-prefix=MARCH-sm_89 // MARCH-sm_89: warning: multiple nvptx64 architectures are detected: sm_89, sm_80; only the first one is used for '-march' [-Wmulti-gpu] // MARCH-sm_89: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_89" + +// case when CLANG_TOOLCHAIN_PROGRAM_TIMEOUT is malformed. +// RUN: env CLANG_TOOLCHAIN_PROGRAM_TIMEOUT=foo \ +// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib \ +// RUN: --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 \ +// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda -x cuda %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=BAD-TIMEOUT +// BAD-TIMEOUT: clang: error: cannot determine nvptx64 architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite) diff --git a/clang/test/Driver/openmp-system-arch.c b/clang/test/Driver/openmp-system-arch.c index ea6ec6428592b3..cd49f460099666 100644 --- a/clang/test/Driver/openmp-system-arch.c +++ b/clang/test/Driver/openmp-system-arch.c @@ -75,3 +75,19 @@ // RUN: -fopenmp-targets=amdgcn-amd-amdhsa --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=AMDGPU // AMDGPU: error: cannot determine amdgcn architecture: No AMD GPU detected in the system; consider passing it via '-march' + +// case when CLANG_TOOLCHAIN_PROGRAM_TIMEOUT is malformed for nvptx-arch. +// RUN: env CLANG_TOOLCHAIN_PROGRAM_TIMEOUT=foo \ +// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \ +// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib \ +// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=BAD-TIMEOUT-NVPTX +// BAD-TIMEOUT-NVPTX: clang: error: cannot determine nvptx64 architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '-march'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite) + +// case when CLANG_TOOLCHAIN_PROGRAM_TIMEOUT is malformed for amdgpu-arch. +// RUN: env CLANG_TOOLCHAIN_PROGRAM_TIMEOUT= \ +// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \ +// RUN: -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib \ +// RUN: --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=BAD-TIMEOUT-AMDGPU +// BAD-TIMEOUT-AMDGPU: clang: error: cannot determine amdgcn architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got ''; consider passing it via '-march'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite) diff --git a/llvm/utils/lit/lit/TestingConfig.py b/llvm/utils/lit/lit/TestingConfig.py index 06713429d06b4b..9d8e93460933d8 100644 --- a/llvm/utils/lit/lit/TestingConfig.py +++ b/llvm/utils/lit/lit/TestingConfig.py @@ -26,7 +26,7 @@ def fromdefaults(litConfig): "SYSTEMROOT", "TERM", "CLANG", - "CLANG_TOOL_CHAIN_PROGRAM_WAIT", + "CLANG_TOOLCHAIN_PROGRAM_TIMEOUT", "LLDB", "LD_PRELOAD", "LLVM_SYMBOLIZER_PATH", >From 42ac67eae5c3952083de1fcb68b9344374ba56f6 Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" <jdenny.o...@gmail.com> Date: Thu, 8 Aug 2024 19:33:44 -0400 Subject: [PATCH 5/5] I wish I knew how to write timeout tests --- clang/lib/Driver/ToolChain.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 5fd8af373e1db5..dbac6c370e85e6 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -120,7 +120,6 @@ ToolChain::executeToolChainProgram(StringRef Executable, int SecondsToWait = DefaultSecondsToWait; if (std::optional<std::string> Str = llvm::sys::Process::GetEnv("CLANG_TOOLCHAIN_PROGRAM_TIMEOUT")) { - int SecondsToWait; if (!llvm::to_integer(*Str, SecondsToWait)) return llvm::createStringError(std::error_code(), "CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected " _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits