https://github.com/macurtis-amd updated https://github.com/llvm/llvm-project/pull/101008
>From 5590228721e4ce719001311137d8eb674b0e2a3d Mon Sep 17 00:00:00 2001 From: Matthew Curtis <macur...@amd.com> Date: Thu, 25 Jul 2024 15:37:25 -0500 Subject: [PATCH 1/3] [clang-linker-wrapper] Add '--lto-in-process' to force in-process LTO --- clang/test/Driver/linker-wrapper.c | 12 +++++++++--- .../clang-linker-wrapper/ClangLinkerWrapper.cpp | 6 +++++- .../tools/clang-linker-wrapper/LinkerWrapperOpts.td | 4 ++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index f165a410c1929..8238f80c67299 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -30,7 +30,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-debug -O0 \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK-DEBUG -// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 {{.*}}.o {{.*}}.o -g +// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 {{.*}}.o {{.*}}.o -g // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ @@ -45,11 +45,17 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \ // RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out -// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \ +// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --wrapper-verbose --dry-run --save-temps -O2 \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS +// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --wrapper-verbose --dry-run --save-temps -O2 \ +// RUN: --lto-in-process --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-IN-PROC +// AMDGPU-LTO-TEMPS-NOT: Linking bitcode files // AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.o -save-temps +// AMDGPU-LTO-IN-PROC: Linking bitcode files +// AMDGPU-LTO-IN-PROC: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.s -save-temps + // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu @@ -93,7 +99,7 @@ __attribute__((visibility("protected"), used)) int x; // CUDA: clang{{.*}} -o [[IMG_SM70:.+]] --target=nvptx64-nvidia-cuda -march=sm_70 // CUDA: clang{{.*}} -o [[IMG_SM52:.+]] --target=nvptx64-nvidia-cuda -march=sm_52 -// CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_70,file=[[IMG_SM70]] --image=profile=sm_52,file=[[IMG_SM52]] +// CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_70,file=[[IMG_SM70]] --image=profile=sm_52,file=[[IMG_SM52]] // CUDA: usr/bin/ld{{.*}} {{.*}}.openmp.image.{{.*}}.o {{.*}}.cuda.image.{{.*}}.o // RUN: clang-offload-packager -o %t.out \ diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 2db7b6ac823ec..b676651b2a3d8 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -781,6 +781,8 @@ bool isValidCIdentifier(StringRef S) { Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles, SmallVectorImpl<StringRef> &OutputFiles, const ArgList &Args) { + if (Verbose) + llvm::errs() << "Linking bitcode files\n"; llvm::TimeTraceScope TimeScope("Link bitcode files"); const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); @@ -1017,6 +1019,8 @@ Expected<StringRef> writeOffloadFile(const OffloadFile &File) { // Compile the module to an object file using the appropriate target machine for // the host triple. Expected<StringRef> compileModule(Module &M, OffloadKind Kind) { + if (Verbose) + llvm::errs() << "Compiling module\n"; llvm::TimeTraceScope TimeScope("Compile module"); std::string Msg; const Target *T = TargetRegistry::lookupTarget(M.getTargetTriple(), Msg); @@ -1309,7 +1313,7 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles( // First link and remove all the input files containing bitcode if // the target linker does not support it natively. SmallVector<StringRef> InputFiles; - if (!linkerSupportsLTO(LinkerArgs)) + if (!linkerSupportsLTO(LinkerArgs) || Args.hasArg(OPT_lto_in_process)) if (Error Err = linkBitcodeFiles(Input, InputFiles, LinkerArgs)) return Err; diff --git a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td index a87d5ec8aa9b3..75344c03c6493 100644 --- a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td +++ b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td @@ -102,6 +102,10 @@ def offload_opt_eq_minus : Joined<["--", "-"], "offload-opt=">, Flags<[HelpHidde HelpText<"Options passed to LLVM, not including the Clang invocation. Use " "'--offload-opt=--help' for a list of options.">; +// Arguments for LTO +def lto_in_process : Flag<["--"], "lto-in-process">, + Flags<[WrapperOnlyOption]>, HelpText<"Use in-process LTO even if linker supports LTO">; + // Standard linker flags also used by the linker wrapper. def sysroot_EQ : Joined<["--"], "sysroot=">, HelpText<"Set the system root">; >From cf86d2fe66be2ed148a297439fa36be5f411b0f5 Mon Sep 17 00:00:00 2001 From: Matthew Curtis <macur...@amd.com> Date: Sun, 21 Jul 2024 11:02:04 -0500 Subject: [PATCH 2/3] [clang-linker-wrapper] Add '--lto-debug-pass-manager' flag --- clang/test/Driver/linker-wrapper.c | 4 ++++ .../clang-linker-wrapper/ClangLinkerWrapper.cpp | 13 ++++++++++--- .../tools/clang-linker-wrapper/LinkerWrapperOpts.td | 2 ++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index 8238f80c67299..0f589b90decd7 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -49,6 +49,8 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --wrapper-verbose --dry-run --save-temps -O2 \ // RUN: --lto-in-process --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-IN-PROC +// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --wrapper-verbose --dry-run --save-temps -O2 \ +// RUN: --lto-debug-pass-manager --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-OPTS // AMDGPU-LTO-TEMPS-NOT: Linking bitcode files // AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.o -save-temps @@ -56,6 +58,8 @@ __attribute__((visibility("protected"), used)) int x; // AMDGPU-LTO-IN-PROC: Linking bitcode files // AMDGPU-LTO-IN-PROC: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.s -save-temps +// AMDGPU-LTO-OPTS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.o -save-temps -Wl,--save-temps -Wl,--lto-debug-pass-manager + // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index b676651b2a3d8..53b60428df95f 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -581,8 +581,12 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) { if (SaveTemps) CmdArgs.push_back("-save-temps"); - if (SaveTemps && linkerSupportsLTO(Args)) - CmdArgs.push_back("-Wl,--save-temps"); + if (linkerSupportsLTO(Args)) { + if (SaveTemps) + CmdArgs.push_back("-Wl,--save-temps"); + if (Args.hasArg(OPT_lto_debug_pass_manager)) + CmdArgs.push_back("-Wl,--lto-debug-pass-manager"); + } if (Args.hasArg(OPT_embed_bitcode)) CmdArgs.push_back("-Wl,--lto-emit-llvm"); @@ -767,6 +771,8 @@ std::unique_ptr<lto::LTO> createLTO( // TODO: Handle remark files Conf.HasWholeProgramVisibility = Args.hasArg(OPT_whole_program); + Conf.DebugPassManager = Args.hasArg(OPT_lto_debug_pass_manager); + return std::make_unique<lto::LTO>(std::move(Conf), Backend); } @@ -1201,7 +1207,8 @@ bundleLinkedOutput(ArrayRef<OffloadingImage> Images, const ArgList &Args, } } -/// Returns a new ArgList containg arguments used for the device linking phase. +/// Returns a new ArgList containing arguments used for the device linking +/// phase. DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input, const InputArgList &Args) { DerivedArgList DAL = DerivedArgList(DerivedArgList(Args)); diff --git a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td index 75344c03c6493..26cbe2ab9e7ae 100644 --- a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td +++ b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td @@ -105,6 +105,8 @@ def offload_opt_eq_minus : Joined<["--", "-"], "offload-opt=">, Flags<[HelpHidde // Arguments for LTO def lto_in_process : Flag<["--"], "lto-in-process">, Flags<[WrapperOnlyOption]>, HelpText<"Use in-process LTO even if linker supports LTO">; +def lto_debug_pass_manager : Flag<["--"], "lto-debug-pass-manager">, + Flags<[WrapperOnlyOption]>, HelpText<"Prints debug information for the new pass manager during LTO">; // Standard linker flags also used by the linker wrapper. def sysroot_EQ : Joined<["--"], "sysroot=">, HelpText<"Set the system root">; >From 06fc379cfc7ad19e3c66533560b0fc9533b766dd Mon Sep 17 00:00:00 2001 From: Matthew Curtis <macur...@amd.com> Date: Fri, 26 Jul 2024 15:45:07 -0500 Subject: [PATCH 3/3] [clang-linker-wrapper] Forward offload options to device linker --- clang/test/Driver/linker-wrapper.c | 12 ++++++++++-- .../clang-linker-wrapper/ClangLinkerWrapper.cpp | 4 ++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index 0f589b90decd7..7f7693a3433c7 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -50,7 +50,11 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --wrapper-verbose --dry-run --save-temps -O2 \ // RUN: --lto-in-process --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-IN-PROC // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --wrapper-verbose --dry-run --save-temps -O2 \ -// RUN: --lto-debug-pass-manager --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-OPTS +// RUN: --lto-debug-pass-manager \ +// RUN: -offload-opt=-print-pipeline-passes \ +// RUN: -offload-opt=-print-before=openmp-opt \ +// RUN: -offload-opt=-print-after=openmp-opt \ +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-OPTS // AMDGPU-LTO-TEMPS-NOT: Linking bitcode files // AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.o -save-temps @@ -58,7 +62,11 @@ __attribute__((visibility("protected"), used)) int x; // AMDGPU-LTO-IN-PROC: Linking bitcode files // AMDGPU-LTO-IN-PROC: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.s -save-temps -// AMDGPU-LTO-OPTS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.o -save-temps -Wl,--save-temps -Wl,--lto-debug-pass-manager +// AMDGPU-LTO-OPTS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 {{.*}}.o -save-temps -Wl,--save-temps +// AMDGPU-LTO-OPTS-SAME: -Wl,--lto-debug-pass-manager +// AMDGPU-LTO-OPTS-SAME: -Wl,-mllvm=-print-pipeline-passes +// AMDGPU-LTO-OPTS-SAME: -Wl,-mllvm=-print-before=openmp-opt +// AMDGPU-LTO-OPTS-SAME: -Wl,-mllvm=-print-after=openmp-opt // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \ diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 53b60428df95f..39b9876ecbd87 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -586,6 +586,10 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) { CmdArgs.push_back("-Wl,--save-temps"); if (Args.hasArg(OPT_lto_debug_pass_manager)) CmdArgs.push_back("-Wl,--lto-debug-pass-manager"); + for (const opt::Arg *Arg : Args.filtered(OPT_offload_opt_eq_minus)) { + CmdArgs.push_back( + Args.MakeArgString("-Wl,--mllvm=" + StringRef(Arg->getValue()))); + } } if (Args.hasArg(OPT_embed_bitcode)) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits