hfinkel created this revision.
Herald added subscribers: mcrosier, emaste.

This patch introduces the runtime components of a TBAA sanitizer: a sanitizer 
for type-based aliasing violations.

C/C++ have type-based aliasing rules, and LLVM's optimizer can exploit  these 
given TBAA metadata added by Clang. Roughly, a pointer of given  type cannot be 
used to access an object of a different type (with, of  course, certain 
exceptions). Unfortunately, there's a lot of code in the wild that violates 
these rules (e.g. for type punning), and such code often must be built with 
-fno-strict-aliasing. Performance is often sacrificed as a result. Part of the 
problem is the difficulty of finding TBAA violations. Hopefully, this sanitizer 
will help.

https://reviews.llvm.org/D32197 (Runtime)
https://reviews.llvm.org/D32198 (LLVM)

The Clang changes seems mostly formulaic, the one specific change being that 
when the TBAA sanitizer is enabled, TBAA is always generated, even at -O0.

Clang's TBAA representation currently has a problem representing unions, as 
demonstrated by the one XFAIL'd test in the runtime patch. We'll update the 
TBAA representation to fix this, and at the same time, update the sanitizer.


https://reviews.llvm.org/D32199

Files:
  include/clang/Basic/Attr.td
  include/clang/Basic/AttrDocs.td
  include/clang/Basic/Sanitizers.def
  include/clang/Driver/SanitizerArgs.h
  lib/CodeGen/BackendUtil.cpp
  lib/CodeGen/CGDeclCXX.cpp
  lib/CodeGen/CodeGenFunction.cpp
  lib/CodeGen/CodeGenModule.cpp
  lib/CodeGen/CodeGenTBAA.cpp
  lib/Driver/SanitizerArgs.cpp
  lib/Driver/ToolChains/CommonArgs.cpp
  lib/Driver/ToolChains/FreeBSD.cpp
  lib/Driver/ToolChains/Linux.cpp
  lib/Frontend/CompilerInvocation.cpp
  lib/Lex/PPMacroExpansion.cpp
  lib/Sema/SemaDeclAttr.cpp
  test/CodeGen/sanitize-tbaa-attr.cpp
  test/Driver/sanitizer-ld.c

Index: test/Driver/sanitizer-ld.c
===================================================================
--- test/Driver/sanitizer-ld.c
+++ test/Driver/sanitizer-ld.c
@@ -181,6 +181,18 @@
 
 // RUN: %clangxx -no-canonical-prefixes %s -### -o %t.o 2>&1 \
 // RUN:     -target x86_64-unknown-linux -fuse-ld=ld -stdlib=platform -lstdc++ \
+// RUN:     -fsanitize=tbaa \
+// RUN:     -resource-dir=%S/Inputs/resource_dir \
+// RUN:     --sysroot=%S/Inputs/basic_linux_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-TBAASAN-LINUX-CXX %s
+//
+// CHECK-TBAASAN-LINUX-CXX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
+// CHECK-TBAASAN-LINUX-CXX-NOT: stdc++
+// CHECK-TBAASAN-LINUX-CXX: "-whole-archive" "{{.*}}libclang_rt.tbaasan-x86_64.a" "-no-whole-archive"
+// CHECK-TBAASAN-LINUX-CXX: stdc++
+
+// RUN: %clangxx -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -target x86_64-unknown-linux -fuse-ld=ld -stdlib=platform -lstdc++ \
 // RUN:     -fsanitize=memory \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
Index: test/CodeGen/sanitize-tbaa-attr.cpp
===================================================================
--- /dev/null
+++ test/CodeGen/sanitize-tbaa-attr.cpp
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck -check-prefix=WITHOUT %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s -fsanitize=tbaa | FileCheck -check-prefix=TBAASAN %s
+// RUN: echo "src:%s" | sed -e 's/\\/\\\\/g' > %t
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s -fsanitize=tbaa -fsanitize-blacklist=%t | FileCheck -check-prefix=BL %s
+
+// The sanitize_tbaa attribute should be attached to functions
+// when TBAASanitizer is enabled, unless no_sanitize_tbaa attribute
+// is present.
+
+// WITHOUT:  NoTBAASAN1{{.*}}) [[NOATTR:#[0-9]+]]
+// BL:  NoTBAASAN1{{.*}}) [[NOATTR:#[0-9]+]]
+// TBAASAN:  NoTBAASAN1{{.*}}) [[NOATTR:#[0-9]+]]
+__attribute__((no_sanitize_tbaa))
+int NoTBAASAN1(int *a) { return *a; }
+
+// WITHOUT:  NoTBAASAN2{{.*}}) [[NOATTR]]
+// BL:  NoTBAASAN2{{.*}}) [[NOATTR]]
+// TBAASAN:  NoTBAASAN2{{.*}}) [[NOATTR]]
+__attribute__((no_sanitize_tbaa))
+int NoTBAASAN2(int *a);
+int NoTBAASAN2(int *a) { return *a; }
+
+// WITHOUT:  NoTBAASAN3{{.*}}) [[NOATTR:#[0-9]+]]
+// BL:  NoTBAASAN3{{.*}}) [[NOATTR:#[0-9]+]]
+// TBAASAN:  NoTBAASAN3{{.*}}) [[NOATTR:#[0-9]+]]
+__attribute__((no_sanitize("tbaa")))
+int NoTBAASAN3(int *a) { return *a; }
+
+// WITHOUT:  TBAASANOk{{.*}}) [[NOATTR]]
+// BL:  TBAASANOk{{.*}}) [[NOATTR]]
+// TBAASAN: TBAASANOk{{.*}}) [[WITH:#[0-9]+]]
+int TBAASANOk(int *a) { return *a; }
+
+// WITHOUT:  TemplateTBAASANOk{{.*}}) [[NOATTR]]
+// BL:  TemplateTBAASANOk{{.*}}) [[NOATTR]]
+// TBAASAN: TemplateTBAASANOk{{.*}}) [[WITH]]
+template<int i>
+int TemplateTBAASANOk() { return i; }
+
+// WITHOUT:  TemplateNoTBAASAN{{.*}}) [[NOATTR]]
+// BL:  TemplateNoTBAASAN{{.*}}) [[NOATTR]]
+// TBAASAN: TemplateNoTBAASAN{{.*}}) [[NOATTR]]
+template<int i>
+__attribute__((no_sanitize_tbaa))
+int TemplateNoTBAASAN() { return i; }
+
+int force_instance = TemplateTBAASANOk<42>()
+                   + TemplateNoTBAASAN<42>();
+
+// Check that __cxx_global_var_init* get the sanitize_tbaa attribute.
+int global1 = 0;
+int global2 = *(int*)((char*)&global1+1);
+// WITHOUT: @__cxx_global_var_init{{.*}}[[NOATTR:#[0-9]+]]
+// BL: @__cxx_global_var_init{{.*}}[[NOATTR:#[0-9]+]]
+// TBAASAN: @__cxx_global_var_init{{.*}}[[WITH:#[0-9]+]]
+
+// WITHOUT: attributes [[NOATTR]] = { noinline nounwind{{.*}} }
+
+// BL: attributes [[NOATTR]] = { noinline nounwind{{.*}} }
+
+// TBAASAN: attributes [[NOATTR]] = { noinline nounwind{{.*}} }
+// TBAASAN: attributes [[WITH]] = { noinline nounwind sanitize_tbaa{{.*}} }
+
+// TBAASAN: Simple C++ TBAA
Index: lib/Sema/SemaDeclAttr.cpp
===================================================================
--- lib/Sema/SemaDeclAttr.cpp
+++ lib/Sema/SemaDeclAttr.cpp
@@ -5733,7 +5733,8 @@
                                 .Case("no_address_safety_analysis", "address")
                                 .Case("no_sanitize_address", "address")
                                 .Case("no_sanitize_thread", "thread")
-                                .Case("no_sanitize_memory", "memory");
+                                .Case("no_sanitize_memory", "memory")
+                                .Case("no_sanitize_tbaa", "tbaa");
   if (isGlobalVar(D) && SanitizerName != "address")
     S.Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
         << Attr.getName() << ExpectedFunction;
Index: lib/Lex/PPMacroExpansion.cpp
===================================================================
--- lib/Lex/PPMacroExpansion.cpp
+++ lib/Lex/PPMacroExpansion.cpp
@@ -1137,6 +1137,7 @@
       .Case("dataflow_sanitizer", LangOpts.Sanitize.has(SanitizerKind::DataFlow))
       .Case("efficiency_sanitizer",
             LangOpts.Sanitize.hasOneOf(SanitizerKind::Efficiency))
+      .Case("tbaa_sanitizer", LangOpts.Sanitize.has(SanitizerKind::TBAA))
       // Objective-C features
       .Case("objc_arr", LangOpts.ObjCAutoRefCount) // FIXME: REMOVE?
       .Case("objc_arc", LangOpts.ObjCAutoRefCount)
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -2563,12 +2563,13 @@
       Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple;
   }
 
-  // FIXME: Override value name discarding when asan or msan is used because the
-  // backend passes depend on the name of the alloca in order to print out
-  // names.
+  // FIXME: Override value name discarding when asan, msan, or tbaa is used
+  // because the backend passes depend on the name of the alloca in order to
+  // print out names.
   Res.getCodeGenOpts().DiscardValueNames &=
       !LangOpts.Sanitize.has(SanitizerKind::Address) &&
-      !LangOpts.Sanitize.has(SanitizerKind::Memory);
+      !LangOpts.Sanitize.has(SanitizerKind::Memory) &&
+      !LangOpts.Sanitize.has(SanitizerKind::TBAA);
 
   // FIXME: ParsePreprocessorArgs uses the FileManager to read the contents of
   // PCH file and find the original header name. Remove the need to do that in
Index: lib/Driver/ToolChains/Linux.cpp
===================================================================
--- lib/Driver/ToolChains/Linux.cpp
+++ lib/Driver/ToolChains/Linux.cpp
@@ -874,6 +874,8 @@
   if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86)
     Res |= SanitizerKind::Leak;
   if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64)
+    Res |= SanitizerKind::TBAA;
+  if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64)
     Res |= SanitizerKind::Thread;
   if (IsX86_64 || IsMIPS64 || IsPowerPC64 || IsAArch64)
     Res |= SanitizerKind::Memory;
Index: lib/Driver/ToolChains/FreeBSD.cpp
===================================================================
--- lib/Driver/ToolChains/FreeBSD.cpp
+++ lib/Driver/ToolChains/FreeBSD.cpp
@@ -387,6 +387,7 @@
   if (IsX86_64 || IsMIPS64) {
     Res |= SanitizerKind::Leak;
     Res |= SanitizerKind::Thread;
+    Res |= SanitizerKind::TBAA;
   }
   if (IsX86 || IsX86_64) {
     Res |= SanitizerKind::SafeStack;
Index: lib/Driver/ToolChains/CommonArgs.cpp
===================================================================
--- lib/Driver/ToolChains/CommonArgs.cpp
+++ lib/Driver/ToolChains/CommonArgs.cpp
@@ -560,6 +560,8 @@
   }
   if (SanArgs.needsEsanRt())
     StaticRuntimes.push_back("esan");
+  if (SanArgs.needsTBAAsanRt())
+    StaticRuntimes.push_back("tbaasan");
 }
 
 // Should be called before we add system libraries (C++ ABI, libstdc++/libc++,
Index: lib/Driver/SanitizerArgs.cpp
===================================================================
--- lib/Driver/SanitizerArgs.cpp
+++ lib/Driver/SanitizerArgs.cpp
@@ -30,9 +30,10 @@
   NeedsUbsanCxxRt = Vptr | CFI,
   NotAllowedWithTrap = Vptr,
   RequiresPIE = DataFlow,
-  NeedsUnwindTables = Address | Thread | Memory | DataFlow,
+  NeedsUnwindTables = Address | Thread | Memory | DataFlow | TBAA,
   SupportsCoverage =
-      Address | Memory | Leak | Undefined | Integer | Nullability | DataFlow,
+      Address | Memory | Leak | Undefined | Integer | Nullability | DataFlow |
+      TBAA,
   RecoverableByDefault = Undefined | Integer | Nullability,
   Unrecoverable = Unreachable | Return,
   LegacyFsanitizeRecoverMask = Undefined | Integer,
@@ -98,6 +99,8 @@
     BlacklistFile = "dfsan_abilist.txt";
   else if (Kinds & CFI)
     BlacklistFile = "cfi_blacklist.txt";
+  else if (Kinds & TBAA)
+    BlacklistFile = "tbaasan_blacklist.txt";
 
   if (BlacklistFile) {
     clang::SmallString<64> Path(D.ResourceDir);
@@ -321,7 +324,10 @@
       std::make_pair(Efficiency, Leak),
       std::make_pair(Efficiency, Thread),
       std::make_pair(Efficiency, Memory),
-      std::make_pair(Efficiency, KernelAddress)};
+      std::make_pair(Efficiency, KernelAddress),
+      std::make_pair(TBAA, Address), std::make_pair(TBAA, KernelAddress),
+      std::make_pair(TBAA, Memory), std::make_pair(TBAA, Leak),
+      std::make_pair(TBAA, Thread), std::make_pair(TBAA, Efficiency)};
   for (auto G : IncompatibleGroups) {
     SanitizerMask Group = G.first;
     if (Kinds & Group) {
Index: lib/CodeGen/CodeGenTBAA.cpp
===================================================================
--- lib/CodeGen/CodeGenTBAA.cpp
+++ lib/CodeGen/CodeGenTBAA.cpp
@@ -90,8 +90,10 @@
 
 llvm::MDNode *
 CodeGenTBAA::getTBAAInfo(QualType QTy) {
-  // At -O0 or relaxed aliasing, TBAA is not emitted for regular types.
-  if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing)
+  // At -O0 or relaxed aliasing, TBAA is not emitted for regular types (unless
+  // we're running the TBAA sanitizer).
+  if (!Features.Sanitize.has(SanitizerKind::TBAA) &&
+      (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing))
     return nullptr;
 
   // If the type has the may_alias attribute (even on a typedef), it is
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -124,8 +124,10 @@
   if (LangOpts.CUDA)
     createCUDARuntime();
 
-  // Enable TBAA unless it's suppressed. ThreadSanitizer needs TBAA even at O0.
+  // Enable TBAA unless it's suppressed. ThreadSanitizer needs TBAA even at O0
+  // (as does the TBAA sanitizer).
   if (LangOpts.Sanitize.has(SanitizerKind::Thread) ||
+      LangOpts.Sanitize.has(SanitizerKind::TBAA) ||
       (!CodeGenOpts.RelaxedAliasing && CodeGenOpts.OptimizationLevel > 0))
     TBAA.reset(new CodeGenTBAA(Context, VMContext, CodeGenOpts, getLangOpts(),
                                getCXXABI().getMangleContext()));
Index: lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- lib/CodeGen/CodeGenFunction.cpp
+++ lib/CodeGen/CodeGenFunction.cpp
@@ -752,6 +752,8 @@
     Fn->addFnAttr(llvm::Attribute::SanitizeMemory);
   if (SanOpts.has(SanitizerKind::SafeStack))
     Fn->addFnAttr(llvm::Attribute::SafeStack);
+  if (SanOpts.has(SanitizerKind::TBAA))
+    Fn->addFnAttr(llvm::Attribute::SanitizeTBAA);
 
   // Ignore TSan memory acesses from within ObjC/ObjC++ dealloc, initialize,
   // .cxx_destruct, __destroy_helper_block_ and all of their calees at run time.
Index: lib/CodeGen/CGDeclCXX.cpp
===================================================================
--- lib/CodeGen/CGDeclCXX.cpp
+++ lib/CodeGen/CGDeclCXX.cpp
@@ -288,6 +288,8 @@
       Fn->addFnAttr(llvm::Attribute::SanitizeMemory);
     if (getLangOpts().Sanitize.has(SanitizerKind::SafeStack))
       Fn->addFnAttr(llvm::Attribute::SafeStack);
+    if (getLangOpts().Sanitize.has(SanitizerKind::TBAA))
+      Fn->addFnAttr(llvm::Attribute::SanitizeTBAA);
   }
 
   return Fn;
Index: lib/CodeGen/BackendUtil.cpp
===================================================================
--- lib/CodeGen/BackendUtil.cpp
+++ lib/CodeGen/BackendUtil.cpp
@@ -257,6 +257,11 @@
   PM.add(createEfficiencySanitizerPass(Opts));
 }
 
+static void addTBAASanitizerPass(const PassManagerBuilder &Builder,
+                                 legacy::PassManagerBase &PM) {
+  PM.add(createTBAASanitizerPass());
+}
+
 static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
                                          const CodeGenOptions &CodeGenOpts) {
   TargetLibraryInfoImpl *TLII = new TargetLibraryInfoImpl(TargetTriple);
@@ -554,6 +559,13 @@
                            addEfficiencySanitizerPass);
   }
 
+  if (LangOpts.Sanitize.has(SanitizerKind::TBAA)) {
+    PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
+                           addTBAASanitizerPass);
+    PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
+                           addTBAASanitizerPass);
+  }
+
   // Set up the per-function pass manager.
   FPM.add(new TargetLibraryInfoWrapperPass(*TLII));
   if (CodeGenOpts.VerifyModule)
Index: include/clang/Driver/SanitizerArgs.h
===================================================================
--- include/clang/Driver/SanitizerArgs.h
+++ include/clang/Driver/SanitizerArgs.h
@@ -65,6 +65,7 @@
   bool needsEsanRt() const {
     return Sanitizers.hasOneOf(SanitizerKind::Efficiency);
   }
+  bool needsTBAAsanRt() const { return Sanitizers.has(SanitizerKind::TBAA); }
 
   bool requiresPIE() const;
   bool needsUnwindTables() const;
Index: include/clang/Basic/Sanitizers.def
===================================================================
--- include/clang/Basic/Sanitizers.def
+++ include/clang/Basic/Sanitizers.def
@@ -126,6 +126,9 @@
 SANITIZER_GROUP("efficiency-all", Efficiency,
                 EfficiencyCacheFrag | EfficiencyWorkingSet)
 
+// TBAASanitizer
+SANITIZER("tbaa", TBAA)
+
 // Magic group, containing all sanitizers. For example, "-fno-sanitize=all"
 // can be used to disable all the sanitizers.
 SANITIZER_GROUP("all", All, ~0ULL)
Index: include/clang/Basic/AttrDocs.td
===================================================================
--- include/clang/Basic/AttrDocs.td
+++ include/clang/Basic/AttrDocs.td
@@ -1676,6 +1676,19 @@
   }];
 }
 
+def NoSanitizeTBAADocs : Documentation {
+  let Category = DocCatFunction;
+  let Heading = "no_sanitize_tbaa";
+  let Content = [{
+.. _langext-tbaa_sanitizer:
+
+Use ``__attribute__((no_sanitize_tbaa))`` on a function declaration to
+specify that checks for type-based aliasing violations should not be inserted
+(e.g. by TBAASanitizer). The function may still be instrumented by the tool
+to avoid false positives in other places.
+  }];
+}
+
 def DocCatTypeSafety : DocumentationCategory<"Type Safety Checking"> {
   let Content = [{
 Clang supports additional attributes to enable checking type safety properties
Index: include/clang/Basic/Attr.td
===================================================================
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -1845,11 +1845,12 @@
   let Spellings = [GCC<"no_address_safety_analysis">,
                    GCC<"no_sanitize_address">,
                    GCC<"no_sanitize_thread">,
-                   GNU<"no_sanitize_memory">];
+                   GNU<"no_sanitize_memory">,
+                   GNU<"no_sanitize_tbaa">];
   let Subjects = SubjectList<[Function, GlobalVar], ErrorDiag,
         "ExpectedFunctionOrGlobalVar">;
   let Documentation = [NoSanitizeAddressDocs, NoSanitizeThreadDocs,
-                       NoSanitizeMemoryDocs];
+                       NoSanitizeMemoryDocs, NoSanitizeTBAADocs];
   let ASTNode = 0;
 }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to