saiislam updated this revision to Diff 361633.
saiislam marked 2 inline comments as done.
saiislam added a comment.

Added instructions to generate a fat archive and reduced the size of attached 
libFatArchive.a used for testing.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105191/new/

https://reviews.llvm.org/D105191

Files:
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/CommonArgs.cpp
  clang/lib/Driver/ToolChains/CommonArgs.h
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/test/Driver/Inputs/hip_dev_lib/libFatArchive.a
  clang/test/Driver/fat_archive.cpp

Index: clang/test/Driver/fat_archive.cpp
===================================================================
--- /dev/null
+++ clang/test/Driver/fat_archive.cpp
@@ -0,0 +1,91 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// See the steps to create a fat archive near the end of the file.
+
+// Test succeeds if no linked error i.e. all external symbols in the archive
+// could be resolved correctly.
+// RUN: env LIBRARY_PATH=%T/../../../../../runtimes/runtimes-bins/openmp/libomptarget %clang -O2 -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s -L%S/Inputs/hip_dev_lib -lFatArchive -o - | FileCheck %s -check-prefix=LINKERROR
+// LINKERROR-NOT: error: linker command failed with exit code 1
+
+// Given a FatArchive, clang-offload-bundler should be called to create a
+// device specific archive, which should be passed to llvm-link.
+// RUN: env LIBRARY_PATH=%T/../../../../../runtimes/runtimes-bins/openmp/libomptarget %clang -O2 -### -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s -L%S/Inputs/hip_dev_lib -lFatArchive 2>&1 | FileCheck %s
+// CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-pc-linux-gnu"{{.*}}"-o" "[[HOSTOBJ:.*.o]]" "-x" "ir"{{.*}}
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm-bc"{{.*}}"-target-cpu" "gfx906"{{.*}}"-o" "[[HOSTBC:.*.bc]]" "-x" "c++"{{.*}}.cpp
+// CHECK: clang-offload-bundler" "-unbundle" "-type=a" "-inputs={{.*}}/Inputs/hip_dev_lib/libFatArchive.a" "-targets=openmp-amdgcn-amd-amdhsa--gfx906" "-outputs=[[DEVICESPECIFICARCHIVE:.*.a]]" "-allow-missing-bundles"
+// CHECK: llvm-link{{.*}}"[[HOSTBC]]" "[[DEVICESPECIFICARCHIVE]]" "-o" "{{.*}}-gfx906-linked-{{.*}}.bc"
+// CHECK: ld.lld"{{.*}}" "-L{{.*}}/Inputs/hip_dev_lib" "{{.*}}"[[HOSTOBJ]]" "-lFatArchive" "{{.*}}" "-lomp{{.*}}-lomptarget"
+// expected-no-diagnostics
+
+// Tests for linker and loader errors in case external symbols are not found in
+// the FatArchive.
+// RUN: env LIBRARY_PATH=%T/../../../../../runtimes/runtimes-bins/openmp/libomptarget not %clang -O2 -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s -L%S/Inputs/hip_dev_lib -lFatArchive -DMISSING=1 2>&1 | FileCheck %s -check-prefix=MISSINGSYM
+// MISSINGSYM: ld.lld: error: undefined symbol: func_missing
+// MISSINGSYM: error: linker command failed with exit code 1
+
+#ifndef HEADER
+#define HEADER
+
+#define N 10
+
+#pragma omp declare target
+// Functions defined in Fat Archive.
+extern "C" void func_present(float *, float *, unsigned);
+
+#ifdef MISSING
+// Function not defined in the fat archive.
+extern "C" void func_missing(float *, float *, unsigned);
+#endif
+
+#pragma omp end declare target
+
+int main() {
+  float in[N], out[N], sum = 0;
+  unsigned i;
+
+#pragma omp parallel for
+  for (i = 0; i < N; ++i) {
+    in[i] = i;
+  }
+
+  func_present(in, out, N); // Returns out[i] = a[i] * 0
+
+#ifdef MISSING
+  func_missing(in, out, N); // Should throw an error here
+#endif
+
+#pragma omp parallel for reduction(+ \
+                                   : sum)
+  for (i = 0; i < N; ++i)
+    sum += out[i];
+
+  if (!sum)
+    return 0;
+  return sum;
+}
+
+#endif
+
+/************************************************
+   Steps to create Fat Archive (libFatArchive.a) 
+*************************************************
+***************** File: func_1.c ****************
+void func_present(float* in, float* out, unsigned n){
+  unsigned i;
+  #pragma omp target teams distribute parallel for map(to: in[0:n]) map(from: out[0:n])
+  for(i=0; i<n; ++i){
+    out[i] = in[i] * 0;
+  }
+}
+*************************************************
+1. Compile source file(s) to generate object file(s)
+    clang -O2 -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_1.c -o func_1_gfx906.o
+    clang -O2 -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_1.c -o func_1_gfx908.o
+    clang -O2 -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_2.c -o func_2_gfx906.o
+    clang -O2 -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_2.c -o func_2_gfx908.o
+
+2. Create a fat archive by combining all the object file(s)
+    llvm-ar cr libFatArchive.a func_1_gfx906.o func_1_gfx908.o func_2_gfx906.o func_2_gfx908.o
+************************************************/
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -632,6 +632,9 @@
     CmdArgs.push_back(CubinF);
   }
 
+  AddStaticDeviceLibs(C, *this, JA, Inputs, Args, CmdArgs, "nvptx", GPUArch,
+                      false, false);
+
   const char *Exec =
       Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
   C.addCommand(std::make_unique<Command>(
@@ -754,6 +757,8 @@
     std::string BitcodeSuffix = "nvptx-" + GpuArch.str();
     addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
                        getTriple());
+    AddStaticDeviceLibs(getDriver(), DriverArgs, CC1Args, "nvptx", GpuArch,
+                        /* bitcode SDL?*/ true, /* PostClang Link? */ true);
   }
 }
 
Index: clang/lib/Driver/ToolChains/CommonArgs.h
===================================================================
--- clang/lib/Driver/ToolChains/CommonArgs.h
+++ clang/lib/Driver/ToolChains/CommonArgs.h
@@ -49,6 +49,37 @@
                     llvm::opt::ArgStringList &CmdArgs,
                     const llvm::opt::ArgList &Args);
 
+void AddStaticDeviceLibs(Compilation &C, const Tool &T, const JobAction &JA,
+                         const InputInfoList &Inputs,
+                         const llvm::opt::ArgList &DriverArgs,
+                         llvm::opt::ArgStringList &CmdArgs, StringRef Arch,
+                         StringRef Target, bool isBitCodeSDL,
+                         bool postClangLink);
+void AddStaticDeviceLibs(const Driver &D, const llvm::opt::ArgList &DriverArgs,
+                         llvm::opt::ArgStringList &CmdArgs, StringRef Arch,
+                         StringRef Target, bool isBitCodeSDL,
+                         bool postClangLink);
+void AddStaticDeviceLibs(Compilation *C, const Tool *T, const JobAction *JA,
+                         const InputInfoList *Inputs, const Driver &D,
+                         const llvm::opt::ArgList &DriverArgs,
+                         llvm::opt::ArgStringList &CmdArgs, StringRef Arch,
+                         StringRef Target, bool isBitCodeSDL,
+                         bool postClangLink);
+
+bool SDLSearch(const Driver &D, const llvm::opt::ArgList &DriverArgs,
+               llvm::opt::ArgStringList &CmdArgs,
+               SmallVector<std::string, 8> LibraryPaths, std::string Lib,
+               StringRef Arch, StringRef Target, bool isBitCodeSDL,
+               bool postClangLink);
+
+bool GetSDLFromOffloadArchive(Compilation &C, const Driver &D, const Tool &T,
+                              const JobAction &JA, const InputInfoList &Inputs,
+                              const llvm::opt::ArgList &DriverArgs,
+                              llvm::opt::ArgStringList &CC1Args,
+                              SmallVector<std::string, 8> LibraryPaths,
+                              StringRef Lib, StringRef Arch, StringRef Target,
+                              bool isBitCodeSDL, bool postClangLink);
+
 const char *SplitDebugName(const JobAction &JA, const llvm::opt::ArgList &Args,
                            const InputInfo &Input, const InputInfo &Output);
 
Index: clang/lib/Driver/ToolChains/CommonArgs.cpp
===================================================================
--- clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -34,6 +34,7 @@
 #include "clang/Driver/Util.h"
 #include "clang/Driver/XRayArgs.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -1587,6 +1588,219 @@
   }
 }
 
+/// SDLSearch: Search for Static Device Library
+bool tools::SDLSearch(const Driver &D, const llvm::opt::ArgList &DriverArgs,
+                      llvm::opt::ArgStringList &CC1Args,
+                      SmallVector<std::string, 8> LibraryPaths, std::string Lib,
+                      StringRef Arch, StringRef Target, bool isBitCodeSDL,
+                      bool postClangLink) {
+  SmallVector<std::string, 12> SDLs;
+  if (isBitCodeSDL) {
+    // For bitcode SDL, search for these 12 relative SDL filenames
+    SDLs.push_back(
+        Twine("/libdevice/libbc-" + Lib + "-" + Arch + "-" + Target + ".a")
+            .str());
+    SDLs.push_back(
+        Twine("/libbc-" + Lib + "-" + Arch + "-" + Target + ".a").str());
+    SDLs.push_back(Twine("/libdevice/libbc-" + Lib + "-" + Arch + ".a").str());
+    SDLs.push_back(Twine("/libbc-" + Lib + "-" + Arch + ".a").str());
+    SDLs.push_back(Twine("/libdevice/libbc-" + Lib + ".a").str());
+    SDLs.push_back(Twine("/libbc-" + Lib + ".a").str());
+
+    SDLs.push_back(
+        Twine("/libdevice/lib" + Lib + "-" + Arch + "-" + Target + ".bc")
+            .str());
+    SDLs.push_back(
+        Twine("/lib" + Lib + "-" + Arch + "-" + Target + ".bc").str());
+    SDLs.push_back(Twine("/libdevice/lib" + Lib + "-" + Arch + ".bc").str());
+    SDLs.push_back(Twine("/lib" + Lib + "-" + Arch + ".bc").str());
+    SDLs.push_back(Twine("/libdevice/lib" + Lib + ".bc").str());
+    SDLs.push_back(Twine("/lib" + Lib + ".bc").str());
+  } else {
+    // Otherwise only 4 names to search for machine-code SDL
+    SDLs.push_back(
+        Twine("/libdevice/lib" + Lib + "-" + Arch + "-" + Target + ".a").str());
+    SDLs.push_back(
+        Twine("/lib" + Lib + "-" + Arch + "-" + Target + ".a").str());
+    SDLs.push_back(Twine("/libdevice/lib" + Lib + "-" + Arch + ".a").str());
+    SDLs.push_back(Twine("/lib" + Lib + "-" + Arch + ".a").str());
+  }
+
+  // Add file for archive of bundles, this is the final fallback
+  bool FoundSDL = false;
+  for (auto LPath : LibraryPaths) {
+    for (auto SDL : SDLs) {
+      auto FullName = Twine(LPath + SDL).str();
+      if (llvm::sys::fs::exists(FullName)) {
+        if (postClangLink)
+          CC1Args.push_back("-mlink-builtin-bitcode");
+        CC1Args.push_back(DriverArgs.MakeArgString(FullName));
+        FoundSDL = true;
+        break;
+      }
+    }
+    if (FoundSDL)
+      break;
+  }
+  return FoundSDL;
+}
+
+bool tools::GetSDLFromOffloadArchive(
+    Compilation &C, const Driver &D, const Tool &T, const JobAction &JA,
+    const InputInfoList &Inputs, const llvm::opt::ArgList &DriverArgs,
+    llvm::opt::ArgStringList &CC1Args, SmallVector<std::string, 8> LibraryPaths,
+    StringRef Lib, StringRef Arch, StringRef Target, bool isBitCodeSDL,
+    bool postClangLink) {
+
+  // We don't support bitcode archive bundles for nvptx
+  if (isBitCodeSDL && Arch.contains("nvptx"))
+    return false;
+
+  bool FoundAOB = false;
+  SmallVector<std::string, 2> AOBFileNames;
+  std::string ArchiveOfBundles;
+  for (auto LPath : LibraryPaths) {
+    ArchiveOfBundles.clear();
+
+    AOBFileNames.push_back(Twine(LPath + "/libdevice/lib" + Lib + ".a").str());
+    AOBFileNames.push_back(Twine(LPath + "/lib" + Lib + ".a").str());
+
+    for (auto AOB : AOBFileNames) {
+      if (llvm::sys::fs::exists(AOB)) {
+        ArchiveOfBundles = AOB;
+        FoundAOB = true;
+        break;
+      }
+    }
+
+    if (!FoundAOB)
+      continue;
+
+    StringRef Prefix = isBitCodeSDL ? "libbc-" : "lib";
+    std::string OutputLib = D.GetTemporaryPath(
+        Twine(Prefix + Lib + "-" + Arch + "-" + Target).str(), "a");
+
+    C.addTempFile(C.getArgs().MakeArgString(OutputLib.c_str()));
+
+    ArgStringList CmdArgs;
+    SmallString<128> DeviceTriple;
+    DeviceTriple += Action::GetOffloadKindName(JA.getOffloadingDeviceKind());
+    DeviceTriple += "-";
+    std::string NormalizedTriple = T.getToolChain().getTriple().normalize();
+    DeviceTriple += NormalizedTriple;
+    if (!Target.empty()) {
+      // If Target is present it can only appear as the 6th hypen
+      // sepearated field of Bundle Entry ID. So, pad required number of
+      // hyphens in Triple.
+      for (int i = 4 - StringRef(NormalizedTriple).count("-"); i > 0; i--)
+        DeviceTriple += "-";
+      DeviceTriple += Target;
+    }
+
+    std::string UnbundleArg("-unbundle");
+    std::string TypeArg("-type=a");
+    std::string InputArg("-inputs=" + ArchiveOfBundles);
+    std::string OffloadArg("-targets=" + std::string(DeviceTriple));
+    std::string OutputArg("-outputs=" + OutputLib);
+
+    const char *UBProgram = DriverArgs.MakeArgString(
+        T.getToolChain().GetProgramPath("clang-offload-bundler"));
+
+    ArgStringList UBArgs;
+    UBArgs.push_back(C.getArgs().MakeArgString(UnbundleArg.c_str()));
+    UBArgs.push_back(C.getArgs().MakeArgString(TypeArg.c_str()));
+    UBArgs.push_back(C.getArgs().MakeArgString(InputArg.c_str()));
+    UBArgs.push_back(C.getArgs().MakeArgString(OffloadArg.c_str()));
+    UBArgs.push_back(C.getArgs().MakeArgString(OutputArg.c_str()));
+
+    // Add this flag to not exit from clang-offload-bundler if no compatible
+    // code object is found in heterogenous archive library.
+    std::string AdditionalArgs("-allow-missing-bundles");
+    UBArgs.push_back(C.getArgs().MakeArgString(AdditionalArgs.c_str()));
+
+    C.addCommand(std::make_unique<Command>(
+        JA, T, ResponseFileSupport::AtFileCurCP(), UBProgram, UBArgs, Inputs,
+        InputInfo(&JA, C.getArgs().MakeArgString(OutputLib.c_str()))));
+    if (postClangLink)
+      CC1Args.push_back("-mlink-builtin-bitcode");
+
+    CC1Args.push_back(DriverArgs.MakeArgString(OutputLib));
+    break;
+  }
+
+  return FoundAOB;
+}
+
+void tools::AddStaticDeviceLibs(Compilation &C, const Tool &T,
+                                const JobAction &JA,
+                                const InputInfoList &Inputs,
+                                const llvm::opt::ArgList &DriverArgs,
+                                llvm::opt::ArgStringList &CC1Args,
+                                StringRef Arch, StringRef Target,
+                                bool isBitCodeSDL, bool postClangLink) {
+  AddStaticDeviceLibs(&C, &T, &JA, &Inputs, C.getDriver(), DriverArgs, CC1Args,
+                      Arch, Target, isBitCodeSDL, postClangLink);
+}
+
+void tools::AddStaticDeviceLibs(const Driver &D,
+                                const llvm::opt::ArgList &DriverArgs,
+                                llvm::opt::ArgStringList &CC1Args,
+                                StringRef Arch, StringRef Target,
+                                bool isBitCodeSDL, bool postClangLink) {
+  AddStaticDeviceLibs(nullptr, nullptr, nullptr, nullptr, D, DriverArgs,
+                      CC1Args, Arch, Target, isBitCodeSDL, postClangLink);
+}
+
+void tools::AddStaticDeviceLibs(Compilation *C, const Tool *T,
+                                const JobAction *JA,
+                                const InputInfoList *Inputs, const Driver &D,
+                                const llvm::opt::ArgList &DriverArgs,
+                                llvm::opt::ArgStringList &CC1Args,
+                                StringRef Arch, StringRef Target,
+                                bool isBitCodeSDL, bool postClangLink) {
+
+  SmallVector<std::string, 8> LibraryPaths;
+  // Add search directories from LIBRARY_PATH env variable
+  llvm::Optional<std::string> LibPath =
+      llvm::sys::Process::GetEnv("LIBRARY_PATH");
+  if (LibPath) {
+    SmallVector<StringRef, 8> Frags;
+    const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
+    llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
+    for (StringRef Path : Frags)
+      LibraryPaths.emplace_back(Path.trim());
+  }
+
+  // Add directories from user-specified -L options
+  for (std::string Search_Dir : DriverArgs.getAllArgValues(options::OPT_L))
+    LibraryPaths.emplace_back(Search_Dir);
+
+  // Add path to lib-debug folders
+  SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(D.Dir);
+  llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
+  LibraryPaths.emplace_back(DefaultLibPath.c_str());
+
+  // Build list of Static Device Libraries SDLs specified by -l option
+  llvm::SmallSet<std::string, 16> SDLNames;
+  static const StringRef HostOnlyArchives[] = {
+      "omp", "cudart", "m", "gcc", "gcc_s", "pthread", "hip_hcc"};
+  for (auto SDLName : DriverArgs.getAllArgValues(options::OPT_l)) {
+    if (!HostOnlyArchives->contains(SDLName)) {
+      SDLNames.insert(SDLName);
+    }
+  }
+
+  for (auto SDLName : SDLNames) {
+    // This is the only call to SDLSearch
+    if (!SDLSearch(D, DriverArgs, CC1Args, LibraryPaths, SDLName, Arch, Target,
+                   isBitCodeSDL, postClangLink)) {
+      GetSDLFromOffloadArchive(*C, D, *T, *JA, *Inputs, DriverArgs, CC1Args,
+                               LibraryPaths, SDLName, Arch, Target,
+                               isBitCodeSDL, postClangLink);
+    }
+  }
+}
+
 static llvm::opt::Arg *
 getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) {
   // The last of -mcode-object-v3, -mno-code-object-v3 and
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -7679,6 +7679,23 @@
         Triples += "-";
       Triples += CurDep->getOffloadingArch();
     }
+
+    // TODO: Replace parsing of -march flag. Can be done by storing GPUArch
+    //       with each toolchain.
+    StringRef GPUArchName;
+    if ((CurKind == Action::OFK_OpenMP)) {
+      // Extract GPUArch from -march argument in TC argument list.
+      for (uint ArgIndex = 0; ArgIndex < TCArgs.size(); ArgIndex++) {
+        StringRef ArchStr = StringRef(TCArgs.getArgString(ArgIndex));
+        auto Arch = ArchStr.startswith_insensitive("-march=");
+        if (Arch) {
+          GPUArchName = ArchStr.substr(7);
+          Triples += "--";
+          break;
+        }
+      }
+      Triples += GPUArchName.str();
+    }
   }
   CmdArgs.push_back(TCArgs.MakeArgString(Triples));
 
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -92,6 +92,10 @@
   for (const auto &II : Inputs)
     if (II.isFilename())
       CmdArgs.push_back(II.getFilename());
+  AddStaticDeviceLibs(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn",
+                      SubArchName,
+                      /* bitcode SDL?*/ true,
+                      /* PostClang Link? */ false);
   // Add an intermediate output file.
   CmdArgs.push_back("-o");
   const char *OutputFileName =
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to