steven_wu created this revision.
steven_wu added a subscriber: cfe-commits.
steven_wu added a dependency: D17390: Introduce -fembed-bitcode driver option.

Teach clang to embed bitcode inside bitcode. When -fembed-bitcode cc1
option is used, clang will embed both the input bitcode and cc1
commandline into the bitcode in special sections before compiling to
the object file.  Using -fembed-bitcode-marker will only introduce a
marker in both sections.

Depends on D17390

http://reviews.llvm.org/D17392

Files:
  include/clang/CodeGen/BackendUtil.h
  include/clang/Frontend/CodeGenOptions.def
  include/clang/Frontend/CodeGenOptions.h
  lib/CodeGen/BackendUtil.cpp
  lib/CodeGen/CodeGenAction.cpp
  lib/Driver/Tools.cpp
  lib/Frontend/CompilerInvocation.cpp
  test/Frontend/embed-bitcode.ll

Index: test/Frontend/embed-bitcode.ll
===================================================================
--- /dev/null
+++ test/Frontend/embed-bitcode.ll
@@ -0,0 +1,48 @@
+; check .ll input
+; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm \
+; RUN:    -fembed-bitcode -x ir %s -o - \
+; RUN:    | FileCheck %s
+; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm \
+; RUN:    -fembed-bitcode-marker -x ir %s -o - \
+; RUN:    | FileCheck %s -check-prefix=CHECK-MARKER
+; RUN: %clang_cc1 -triple aarch64-unknown-linux-gnueabi -emit-llvm \
+; RUN:    -fembed-bitcode -x ir %s -o - \
+; RUN:    | FileCheck %s -check-prefix=CHECK-ELF
+
+; check .bc input
+; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm-bc \
+; RUN:    -x ir %s -o %t.bc
+; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm \
+; RUN:    -fembed-bitcode -x ir %t.bc -o - \
+; RUN:    | FileCheck %s
+; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm \
+; RUN:    -fembed-bitcode-marker -x ir %t.bc -o - \
+; RUN:    | FileCheck %s -check-prefix=CHECK-MARKER
+
+; run through -fembed-bitcode twice and make sure it doesn't crash
+; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm-bc \
+; RUN:    -fembed-bitcode -x ir %s -o - \
+; RUN: | %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm \
+; RUN:    -fembed-bitcode -x ir - -o /dev/null
+
+; check the magic number of bitcode at the beginning of the string
+; CHECK: @llvm.embedded.module
+; CHECK: c"\DE\C0\17\0B
+; CHECK: section "__LLVM,__bitcode"
+; CHECK: @llvm.cmdline
+; CHECK: section "__LLVM,__cmdline"
+
+; CHECK-ELF: @llvm.embedded.module
+; CHECK-ELF: section ".llvmbc"
+; CHECK-ELF: @llvm.cmdline
+; CHECK-ELF: section ".llvmcmd"
+
+; CHECK-MARKER: @llvm.embedded.module
+; CHECK-MARKER: constant [0 x i8] zeroinitializer
+; CHECK-MARKER: section "__LLVM,__bitcode"
+; CHECK-MARKER: @llvm.cmdline
+; CHECK-MARKER: section "__LLVM,__cmdline"
+
+define i32 @f0() {
+  ret i32 0
+}
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -616,6 +616,34 @@
       }
     }
   }
+	// Handle -fembed-bitcode option.
+  Opts.EmbedBitcode = Args.hasArg(OPT_fembed_bitcode);
+  Opts.EmbedMarkerOnly = Args.hasArg(OPT_fembed_bitcode_marker);
+  // FIXME: For backend options that are not yet recorded as function
+  // attributes in the IR, keep track of them so we can embed them in a
+  // separate data section and use them when building the bitcode.
+  if (Opts.EmbedBitcode) {
+    for (ArgList::const_iterator A = Args.begin(), AE = Args.end();
+         A != AE; ++ A) {
+      // Do not encode output and input.
+      if ((*A)->getOption().getID() == options::OPT_o ||
+          (*A)->getOption().getID() == options::OPT_INPUT ||
+          (*A)->getOption().getID() == options::OPT_x ||
+          (*A)->getOption().getID() == options::OPT_fembed_bitcode ||
+          ((*A)->getOption().getGroup().isValid() &&
+           (*A)->getOption().getGroup().getID() == options::OPT_W_Group))
+        continue;
+      ArgStringList ASL;
+      (*A)->render(Args, ASL);
+      for (ArgStringList::iterator it = ASL.begin(), ie = ASL.end();
+          it != ie; ++ it) {
+        StringRef ArgStr(*it);
+        Opts.CmdArgs.insert(Opts.CmdArgs.end(), ArgStr.begin(), ArgStr.end());
+        // using \00 to seperate each commandline options.
+        Opts.CmdArgs.push_back('\0');
+      }
+    }
+  }
 
   Opts.InstrumentFunctions = Args.hasArg(OPT_finstrument_functions);
   Opts.InstrumentForProfiling = Args.hasArg(OPT_pg);
Index: lib/Driver/Tools.cpp
===================================================================
--- lib/Driver/Tools.cpp
+++ lib/Driver/Tools.cpp
@@ -5553,7 +5553,10 @@
   // With -save-temps, we want to save the unoptimized bitcode output from the
   // CompileJobAction, use -disable-llvm-passes to get pristine IR generated
   // by the frontend.
-  if (C.getDriver().isSaveTempsEnabled() && isa<CompileJobAction>(JA))
+  // When -fembed-bitcode is enabled, optimized bitcode is emitted because it
+  // has slightly different breakdown between stages.
+  if (C.getDriver().isSaveTempsEnabled() &&
+      !C.getDriver().embedBitcodeEnabled() && isa<CompileJobAction>(JA))
     CmdArgs.push_back("-disable-llvm-passes");
 
   if (Output.getType() == types::TY_Dependencies) {
Index: lib/CodeGen/CodeGenAction.cpp
===================================================================
--- lib/CodeGen/CodeGenAction.cpp
+++ lib/CodeGen/CodeGenAction.cpp
@@ -173,6 +173,8 @@
           return;
       }
 
+      EmbedBitcode(TheModule.get(), CodeGenOpts, llvm::MemoryBufferRef());
+
       EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts,
                         C.getTargetInfo().getDataLayoutString(),
                         getModule(), Action, AsmOutStream);
@@ -823,6 +825,9 @@
       TheModule->setTargetTriple(TargetOpts.Triple);
     }
 
+    EmbedBitcode(TheModule.get(), CI.getCodeGenOpts(),
+                 MainFile->getMemBufferRef());
+
     LLVMContext &Ctx = TheModule->getContext();
     Ctx.setInlineAsmDiagnosticHandler(BitcodeInlineAsmDiagHandler);
     EmitBackendOutput(CI.getDiagnostics(), CI.getCodeGenOpts(), TargetOpts,
Index: lib/CodeGen/BackendUtil.cpp
===================================================================
--- lib/CodeGen/BackendUtil.cpp
+++ lib/CodeGen/BackendUtil.cpp
@@ -16,9 +16,11 @@
 #include "clang/Frontend/Utils.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Bitcode/BitcodeWriterPass.h"
+#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/IR/DataLayout.h"
@@ -721,3 +723,86 @@
     }
   }
 }
+
+static const char* getSectionNameForBitcode(const Triple &T) {
+  switch (T.getObjectFormat()) {
+  case Triple::MachO:
+    return "__LLVM,__bitcode";
+  case Triple::COFF:
+  case Triple::ELF:
+  case Triple::UnknownObjectFormat:
+    return ".llvmbc";
+  }
+}
+
+static const char* getSectionNameForCommandline(const Triple &T) {
+  switch (T.getObjectFormat()) {
+  case Triple::MachO:
+    return "__LLVM,__cmdline";
+  case Triple::COFF:
+  case Triple::ELF:
+  case Triple::UnknownObjectFormat:
+    return ".llvmcmd";
+  }
+}
+
+// With -fembed-bitcode, save a copy of the llvm IR as data in the
+// __LLVM,__bitcode section.
+void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts,
+                         llvm::MemoryBufferRef Buf)
+{
+  if (!CGOpts.EmbedBitcode && !CGOpts.EmbedMarkerOnly)
+    return;
+
+  // Embed the bitcode for the llvm module.
+  std::string Data;
+  ArrayRef<uint8_t> ModuleData;
+  Triple T(M->getTargetTriple());
+  if (!CGOpts.EmbedMarkerOnly) {
+    if (!isBitcode((const unsigned char*)Buf.getBufferStart(),
+                   (const unsigned char*)Buf.getBufferEnd())) {
+      // If the input is LLVM Assembly, bitcode is produced by serializing
+      // the module. Use-lists order need to be perserved in this case.
+      llvm::raw_string_ostream OS(Data);
+      llvm::WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true);
+      ModuleData = ArrayRef<uint8_t>((uint8_t*)OS.str().data(),
+                                     OS.str().size());
+    } else {
+      // If the input is LLVM bitcode, write the input byte stream directly.
+      ModuleData = ArrayRef<uint8_t>((uint8_t*)Buf.getBufferStart(),
+                                     Buf.getBufferSize());
+    }
+  }
+  llvm::Constant *ModuleConstant =
+      llvm::ConstantDataArray::get(M->getContext(), ModuleData);
+  // Use Appending linkage so it doesn't get optimized out.
+  llvm::GlobalVariable *GV = new llvm::GlobalVariable(
+      *M, ModuleConstant->getType(), true, llvm::GlobalValue::AppendingLinkage,
+      ModuleConstant);
+  GV->setSection(getSectionNameForBitcode(T));
+  if (llvm::GlobalVariable *Old =
+          M->getGlobalVariable("llvm.embedded.module")) {
+    assert(Old->use_empty() && "llvm.embedded.module must have no uses");
+    GV->takeName(Old);
+    Old->eraseFromParent();
+  } else {
+    GV->setName("llvm.embedded.module");
+  }
+
+  // Embed command-line options.
+  ArrayRef<uint8_t> CmdData((uint8_t*)CGOpts.CmdArgs.data(),
+                            CGOpts.CmdArgs.size());
+  llvm::Constant *CmdConstant =
+    llvm::ConstantDataArray::get(M->getContext(), CmdData);
+  GV = new llvm::GlobalVariable(*M, CmdConstant->getType(), true,
+                                llvm::GlobalValue::AppendingLinkage,
+                                CmdConstant);
+  GV->setSection(getSectionNameForCommandline(T));
+  if (llvm::GlobalVariable *Old = M->getGlobalVariable("llvm.cmdline")) {
+    assert(Old->use_empty() && "llvm.cmdline must have no uses");
+    GV->takeName(Old);
+    Old->eraseFromParent();
+  } else {
+    GV->setName("llvm.cmdline");
+  }
+}
Index: include/clang/Frontend/CodeGenOptions.h
===================================================================
--- include/clang/Frontend/CodeGenOptions.h
+++ include/clang/Frontend/CodeGenOptions.h
@@ -198,6 +198,9 @@
   /// Set of sanitizer checks that trap rather than diagnose.
   SanitizerSet SanitizeTrap;
 
+  /// List of backend command-line options for -fembed-bitcode.
+  std::vector<uint8_t> CmdArgs;
+
   /// \brief A list of all -fno-builtin-* function names (e.g., memset).
   std::vector<std::string> NoBuiltinFuncs;
 
Index: include/clang/Frontend/CodeGenOptions.def
===================================================================
--- include/clang/Frontend/CodeGenOptions.def
+++ include/clang/Frontend/CodeGenOptions.def
@@ -53,6 +53,8 @@
                                      ///< frontend.
 CODEGENOPT(DisableRedZone    , 1, 0) ///< Set when -mno-red-zone is enabled.
 CODEGENOPT(DisableTailCalls  , 1, 0) ///< Do not emit tail calls.
+CODEGENOPT(EmbedBitcode      , 1, 0) ///< Embed LLVM IR bitcode as data.
+CODEGENOPT(EmbedMarkerOnly   , 1, 0) ///< Only create bitcode section as marker
 CODEGENOPT(EmitDeclMetadata  , 1, 0) ///< Emit special metadata indicating what
                                      ///< Decl* various IR entities came from. 
                                      ///< Only useful when running CodeGen as a
Index: include/clang/CodeGen/BackendUtil.h
===================================================================
--- include/clang/CodeGen/BackendUtil.h
+++ include/clang/CodeGen/BackendUtil.h
@@ -16,6 +16,7 @@
 
 namespace llvm {
   class Module;
+  class MemoryBufferRef;
 }
 
 namespace clang {
@@ -37,6 +38,9 @@
                          const TargetOptions &TOpts, const LangOptions &LOpts,
                          StringRef TDesc, llvm::Module *M, BackendAction Action,
                          raw_pwrite_stream *OS);
+
+  void EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts,
+                    llvm::MemoryBufferRef Buf);
 }
 
 #endif
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to