[PATCH] D154014: [SpecialCaseList] Use Globs instead of Regex

Ellis Hoag via Phabricator via cfe-commits Thu, 20 Jul 2023 18:08:23 -0700

ellis updated this revision to Diff 542732.
ellis added a comment.

If `#!special-case-list-v2` is the first line in the special case list, then we 
will use globs to match patterns. Otherwise, we fall back to the original 
behavior of using regexes to match patterns. Once this feature is stable, and 
after a version cut, we can remove the regex case.



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154014/new/

https://reviews.llvm.org/D154014

Files:
  clang/docs/SanitizerSpecialCaseList.rst
  clang/lib/Basic/ProfileList.cpp
  clang/lib/Basic/SanitizerSpecialCaseList.cpp
  llvm/include/llvm/Support/SpecialCaseList.h
  llvm/lib/Support/SpecialCaseList.cpp
  llvm/unittests/Support/SpecialCaseListTest.cpp

Index: llvm/unittests/Support/SpecialCaseListTest.cpp
===================================================================
--- llvm/unittests/Support/SpecialCaseListTest.cpp
+++ llvm/unittests/Support/SpecialCaseListTest.cpp
@@ -10,8 +10,11 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/VirtualFileSystem.h"
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
+using testing::HasSubstr;
+using testing::StartsWith;
 using namespace llvm;
 
 namespace {
@@ -19,24 +22,32 @@
 class SpecialCaseListTest : public ::testing::Test {
 protected:
   std::unique_ptr<SpecialCaseList> makeSpecialCaseList(StringRef List,
-                                                       std::string &Error) {
-    std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(List);
+                                                       std::string &Error,
+                                                       bool UseGlobs = true) {
+    auto S = List.str();
+    if (UseGlobs)
+      S = (Twine("#!special-case-list-v2\n") + S).str();
+    std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(S);
     return SpecialCaseList::create(MB.get(), Error);
   }
 
-  std::unique_ptr<SpecialCaseList> makeSpecialCaseList(StringRef List) {
+  std::unique_ptr<SpecialCaseList> makeSpecialCaseList(StringRef List,
+                                                       bool UseGlobs = true) {
     std::string Error;
-    auto SCL = makeSpecialCaseList(List, Error);
+    auto SCL = makeSpecialCaseList(List, Error, UseGlobs);
     assert(SCL);
     assert(Error == "");
     return SCL;
   }
 
-  std::string makeSpecialCaseListFile(StringRef Contents) {
+  std::string makeSpecialCaseListFile(StringRef Contents,
+                                      bool UseGlobs = true) {
     int FD;
     SmallString<64> Path;
     sys::fs::createTemporaryFile("SpecialCaseListTest", "temp", FD, Path);
     raw_fd_ostream OF(FD, true, true);
+    if (UseGlobs)
+      OF << "#!special-case-list-v2\n";
     OF << Contents;
     OF.close();
     return std::string(Path.str());
@@ -59,10 +70,10 @@
   EXPECT_FALSE(SCL->inSection("", "fun", "hello"));
   EXPECT_FALSE(SCL->inSection("", "src", "hello", "category"));
 
-  EXPECT_EQ(3u, SCL->inSectionBlame("", "src", "hello"));
-  EXPECT_EQ(4u, SCL->inSectionBlame("", "src", "bye"));
-  EXPECT_EQ(5u, SCL->inSectionBlame("", "src", "hi", "category"));
-  EXPECT_EQ(6u, SCL->inSectionBlame("", "src", "zzzz", "category"));
+  EXPECT_EQ(4u, SCL->inSectionBlame("", "src", "hello"));
+  EXPECT_EQ(5u, SCL->inSectionBlame("", "src", "bye"));
+  EXPECT_EQ(6u, SCL->inSectionBlame("", "src", "hi", "category"));
+  EXPECT_EQ(7u, SCL->inSectionBlame("", "src", "zzzz", "category"));
   EXPECT_EQ(0u, SCL->inSectionBlame("", "src", "hi"));
   EXPECT_EQ(0u, SCL->inSectionBlame("", "fun", "hello"));
   EXPECT_EQ(0u, SCL->inSectionBlame("", "src", "hello", "category"));
@@ -74,31 +85,29 @@
                                          "\n"
                                          "[not valid\n",
                                          Error));
-  EXPECT_TRUE(
-      ((StringRef)Error).startswith("malformed section header on line 3:"));
+  EXPECT_THAT(Error, StartsWith("malformed section header on line 4:"));
 
   EXPECT_EQ(nullptr, makeSpecialCaseList("\n\n\n"
                                          "[not valid\n",
                                          Error));
-  EXPECT_TRUE(
-      ((StringRef)Error).startswith("malformed section header on line 4:"));
+  EXPECT_THAT(Error, StartsWith("malformed section header on line 5:"));
 }
 
-TEST_F(SpecialCaseListTest, SectionRegexErrorHandling) {
+TEST_F(SpecialCaseListTest, SectionGlobErrorHandling) {
   std::string Error;
   EXPECT_EQ(makeSpecialCaseList("[address", Error), nullptr);
-  EXPECT_TRUE(((StringRef)Error).startswith("malformed section header "));
+  EXPECT_THAT(Error, StartsWith("malformed section header "));
 
   EXPECT_EQ(makeSpecialCaseList("[[]", Error), nullptr);
-  EXPECT_TRUE(((StringRef)Error).startswith("malformed regex for section [: "));
+  EXPECT_EQ(Error, "malformed section at line 2: '[': invalid glob pattern: [");
 
   EXPECT_EQ(makeSpecialCaseList("src:=", Error), nullptr);
-  EXPECT_TRUE(((StringRef)Error).endswith("Supplied regexp was blank"));
+  EXPECT_THAT(Error, HasSubstr("Supplied glob was blank"));
 }
 
 TEST_F(SpecialCaseListTest, Section) {
   std::unique_ptr<SpecialCaseList> SCL = makeSpecialCaseList("src:global\n"
-                                                             "[sect1|sect2]\n"
+                                                             "[{sect1,sect2}]\n"
                                                              "src:test1\n"
                                                              "[sect3*]\n"
                                                              "src:test2\n");
@@ -152,19 +161,14 @@
 TEST_F(SpecialCaseListTest, InvalidSpecialCaseList) {
   std::string Error;
   EXPECT_EQ(nullptr, makeSpecialCaseList("badline", Error));
-  EXPECT_EQ("malformed line 1: 'badline'", Error);
+  EXPECT_EQ("malformed line 2: 'badline'", Error);
   EXPECT_EQ(nullptr, makeSpecialCaseList("src:bad[a-", Error));
-  EXPECT_EQ("malformed regex in line 1: 'bad[a-': invalid character range",
-            Error);
-  EXPECT_EQ(nullptr, makeSpecialCaseList("src:a.c\n"
-                                   "fun:fun(a\n",
-                                   Error));
-  EXPECT_EQ("malformed regex in line 2: 'fun(a': parentheses not balanced",
+  EXPECT_EQ("malformed glob in line 2: 'bad[a-': invalid glob pattern: bad[a-",
             Error);
   std::vector<std::string> Files(1, "unexisting");
   EXPECT_EQ(nullptr,
             SpecialCaseList::create(Files, *vfs::getRealFileSystem(), Error));
-  EXPECT_EQ(0U, Error.find("can't open file 'unexisting':"));
+  EXPECT_THAT(Error, StartsWith("can't open file 'unexisting':"));
 }
 
 TEST_F(SpecialCaseListTest, EmptySpecialCaseList) {
@@ -191,7 +195,7 @@
 }
 
 TEST_F(SpecialCaseListTest, NoTrigramsInRules) {
-  std::unique_ptr<SpecialCaseList> SCL = makeSpecialCaseList("fun:b.r\n"
+  std::unique_ptr<SpecialCaseList> SCL = makeSpecialCaseList("fun:b?r\n"
                                                              "fun:za*az\n");
   EXPECT_TRUE(SCL->inSection("", "fun", "bar"));
   EXPECT_FALSE(SCL->inSection("", "fun", "baz"));
@@ -245,4 +249,58 @@
   EXPECT_FALSE(SCL->inSection("", "src", "hello\\\\world"));
 }
 
+TEST_F(SpecialCaseListTest, Version1) {
+  std::unique_ptr<SpecialCaseList> SCL =
+      makeSpecialCaseList("[sect1|sect2]\n"
+                          // Does not match foo!
+                          "fun:foo.*\n"
+                          "fun:abc|def\n"
+                          "fun:b.r\n",
+                          /*UseGlobs=*/false);
+
+  EXPECT_TRUE(SCL->inSection("sect1", "fun", "fooz"));
+  EXPECT_TRUE(SCL->inSection("sect2", "fun", "fooz"));
+  EXPECT_FALSE(SCL->inSection("sect3", "fun", "fooz"));
+
+  // `foo.*` does not match `foo` because the pattern is translated to `foo..*`
+  EXPECT_FALSE(SCL->inSection("sect1", "fun", "foo"));
+
+  EXPECT_TRUE(SCL->inSection("sect1", "fun", "abc"));
+  EXPECT_TRUE(SCL->inSection("sect2", "fun", "abc"));
+  EXPECT_FALSE(SCL->inSection("sect3", "fun", "abc"));
+
+  EXPECT_TRUE(SCL->inSection("sect1", "fun", "def"));
+  EXPECT_TRUE(SCL->inSection("sect2", "fun", "def"));
+  EXPECT_FALSE(SCL->inSection("sect3", "fun", "def"));
+
+  EXPECT_TRUE(SCL->inSection("sect1", "fun", "bar"));
+  EXPECT_TRUE(SCL->inSection("sect2", "fun", "bar"));
+  EXPECT_FALSE(SCL->inSection("sect3", "fun", "bar"));
+}
+
+TEST_F(SpecialCaseListTest, Version2) {
+  std::unique_ptr<SpecialCaseList> SCL = makeSpecialCaseList("[{sect1,sect2}]\n"
+                                                             "fun:foo*\n"
+                                                             "fun:{abc,def}\n"
+                                                             "fun:b?r\n");
+  EXPECT_TRUE(SCL->inSection("sect1", "fun", "fooz"));
+  EXPECT_TRUE(SCL->inSection("sect2", "fun", "fooz"));
+  EXPECT_FALSE(SCL->inSection("sect3", "fun", "fooz"));
+
+  EXPECT_TRUE(SCL->inSection("sect1", "fun", "foo"));
+  EXPECT_TRUE(SCL->inSection("sect2", "fun", "foo"));
+  EXPECT_FALSE(SCL->inSection("sect3", "fun", "foo"));
+
+  EXPECT_TRUE(SCL->inSection("sect1", "fun", "abc"));
+  EXPECT_TRUE(SCL->inSection("sect2", "fun", "abc"));
+  EXPECT_FALSE(SCL->inSection("sect3", "fun", "abc"));
+
+  EXPECT_TRUE(SCL->inSection("sect1", "fun", "def"));
+  EXPECT_TRUE(SCL->inSection("sect2", "fun", "def"));
+  EXPECT_FALSE(SCL->inSection("sect3", "fun", "def"));
+
+  EXPECT_TRUE(SCL->inSection("sect1", "fun", "bar"));
+  EXPECT_TRUE(SCL->inSection("sect2", "fun", "bar"));
+  EXPECT_FALSE(SCL->inSection("sect3", "fun", "bar"));
+}
 }
Index: llvm/lib/Support/SpecialCaseList.cpp
===================================================================
--- llvm/lib/Support/SpecialCaseList.cpp
+++ llvm/lib/Support/SpecialCaseList.cpp
@@ -14,58 +14,69 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/SpecialCaseList.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/LineIterator.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Regex.h"
 #include "llvm/Support/VirtualFileSystem.h"
+#include <stdio.h>
 #include <string>
 #include <system_error>
 #include <utility>
 
-#include <stdio.h>
 namespace llvm {
 
-bool SpecialCaseList::Matcher::insert(std::string Regexp,
-                                      unsigned LineNumber,
-                                      std::string &REError) {
-  if (Regexp.empty()) {
-    REError = "Supplied regexp was blank";
-    return false;
-  }
+Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber,
+                                       bool UseGlobs) {
+  if (Pattern.empty())
+    return createStringError(errc::invalid_argument,
+                             Twine("Supplied ") +
+                                 (UseGlobs ? "glob" : "regex") + " was blank");
+
+  if (!UseGlobs) {
+    // Replace * with .*
+    auto Regexp = Pattern.str();
+    for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
+         pos += strlen(".*")) {
+      Regexp.replace(pos, strlen("*"), ".*");
+    }
 
-  if (Regex::isLiteralERE(Regexp)) {
-    Strings[Regexp] = LineNumber;
-    return true;
-  }
+    Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
 
-  // Replace * with .*
-  for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
-       pos += strlen(".*")) {
-    Regexp.replace(pos, strlen("*"), ".*");
-  }
+    // Check that the regexp is valid.
+    Regex CheckRE(Regexp);
+    std::string REError;
+    if (!CheckRE.isValid(REError))
+      return createStringError(errc::invalid_argument, REError);
 
-  Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
+    RegExes.emplace_back(std::make_pair(
+        std::make_unique<Regex>(std::move(CheckRE)), LineNumber));
 
-  // Check that the regexp is valid.
-  Regex CheckRE(Regexp);
-  if (!CheckRE.isValid(REError))
-    return false;
+    return Error::success();
+  }
 
-  RegExes.emplace_back(
-      std::make_pair(std::make_unique<Regex>(std::move(CheckRE)), LineNumber));
-  return true;
+  auto [It, DidEmplace] = Globs.try_emplace(Pattern);
+  if (DidEmplace) {
+    // We must be sure to use the string in the map rather than the provided
+    // reference which could be destroyed before match() is called
+    Pattern = It->getKey();
+    auto &Pair = It->getValue();
+    if (auto Err = GlobPattern::create(Pattern).moveInto(Pair.first))
+      return Err;
+    Pair.second = LineNumber;
+  }
+  return Error::success();
 }
 
 unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
-  auto It = Strings.find(Query);
-  if (It != Strings.end())
-    return It->second;
-  for (const auto &RegExKV : RegExes)
-    if (RegExKV.first->match(Query))
-      return RegExKV.second;
+  for (const auto &[Pattern, Pair] : Globs)
+    if (Pair.first.match(Query))
+      return Pair.second;
+  for (const auto &[Regex, LineNumber] : RegExes)
+    if (Regex->match(Query))
+      return LineNumber;
   return 0;
 }
 
+// TODO: Refactor this to return Expected<...>
 std::unique_ptr<SpecialCaseList>
 SpecialCaseList::create(const std::vector<std::string> &Paths,
                         llvm::vfs::FileSystem &FS, std::string &Error) {
@@ -94,7 +105,6 @@
 
 bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
                                      vfs::FileSystem &VFS, std::string &Error) {
-  StringMap<size_t> Sections;
   for (const auto &Path : Paths) {
     ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
         VFS.getBufferForFile(Path);
@@ -103,7 +113,7 @@
       return false;
     }
     std::string ParseError;
-    if (!parse(FileOrErr.get().get(), Sections, ParseError)) {
+    if (!parse(FileOrErr.get().get(), ParseError)) {
       Error = (Twine("error parsing file '") + Path + "': " + ParseError).str();
       return false;
     }
@@ -113,82 +123,79 @@
 
 bool SpecialCaseList::createInternal(const MemoryBuffer *MB,
                                      std::string &Error) {
-  StringMap<size_t> Sections;
-  if (!parse(MB, Sections, Error))
+  if (!parse(MB, Error))
     return false;
   return true;
 }
 
-bool SpecialCaseList::parse(const MemoryBuffer *MB,
-                            StringMap<size_t> &SectionsMap,
-                            std::string &Error) {
-  // Iterate through each line in the exclusion list file.
-  SmallVector<StringRef, 16> Lines;
-  MB->getBuffer().split(Lines, '\n');
+Expected<SpecialCaseList::Section *>
+SpecialCaseList::addSection(StringRef SectionStr, unsigned LineNo,
+                            bool UseGlobs) {
+  auto [It, DidEmplace] = Sections.try_emplace(SectionStr);
+  auto &Section = It->getValue();
+  if (DidEmplace)
+    if (auto Err = Section.SectionMatcher->insert(SectionStr, LineNo, UseGlobs))
+      return createStringError(errc::invalid_argument,
+                               "malformed section at line " + Twine(LineNo) +
+                                   ": '" + SectionStr +
+                                   "': " + toString(std::move(Err)));
+  return &Section;
+}
 
-  unsigned LineNo = 1;
-  StringRef Section = "*";
+bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
+  Section *CurrentSection;
+  if (auto Err = addSection("*", 1).moveInto(CurrentSection)) {
+    Error = toString(std::move(Err));
+    return false;
+  }
 
-  for (auto I = Lines.begin(), E = Lines.end(); I != E; ++I, ++LineNo) {
-    *I = I->trim();
-    // Ignore empty lines and lines starting with "#"
-    if (I->empty() || I->startswith("#"))
+  // In https://reviews.llvm.org/D154014 we transitioned to using globs instead
+  // of regexes to match patterns in special case lists. Since this was a
+  // breaking change, we will temporarily support the original behavior using
+  // regexes. If "#!special-case-list-v2" is the first line of the file, then
+  // we will use the new behavior using globs. For more details, see
+  // https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666
+  bool UseGlobs = MB->getBuffer().startswith("#!special-case-list-v2\n");
+
+  for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
+       !LineIt.is_at_eof(); LineIt++) {
+    unsigned LineNo = LineIt.line_number();
+    StringRef Line = LineIt->trim();
+    if (Line.empty())
       continue;
 
     // Save section names
-    if (I->startswith("[")) {
-      if (!I->endswith("]")) {
-        Error = (Twine("malformed section header on line ") + Twine(LineNo) +
-                 ": " + *I).str();
-        return false;
-      }
-
-      Section = I->slice(1, I->size() - 1);
-
-      std::string REError;
-      Regex CheckRE(Section);
-      if (!CheckRE.isValid(REError)) {
+    if (Line.startswith("[")) {
+      if (!Line.endswith("]")) {
         Error =
-            (Twine("malformed regex for section ") + Section + ": '" + REError)
+            ("malformed section header on line " + Twine(LineNo) + ": " + Line)
                 .str();
         return false;
       }
 
+      if (auto Err = addSection(Line.drop_front().drop_back(), LineNo, UseGlobs)
+                         .moveInto(CurrentSection)) {
+        Error = toString(std::move(Err));
+        return false;
+      }
       continue;
     }
 
-    // Get our prefix and unparsed regexp.
-    std::pair<StringRef, StringRef> SplitLine = I->split(":");
-    StringRef Prefix = SplitLine.first;
-    if (SplitLine.second.empty()) {
+    // Get our prefix and unparsed glob.
+    auto [Prefix, Postfix] = Line.split(":");
+    if (Postfix.empty()) {
       // Missing ':' in the line.
-      Error = (Twine("malformed line ") + Twine(LineNo) + ": '" +
-               SplitLine.first + "'").str();
+      Error = ("malformed line " + Twine(LineNo) + ": '" + Line + "'").str();
       return false;
     }
 
-    std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("=");
-    std::string Regexp = std::string(SplitRegexp.first);
-    StringRef Category = SplitRegexp.second;
-
-    // Create this section if it has not been seen before.
-    if (!SectionsMap.contains(Section)) {
-      std::unique_ptr<Matcher> M = std::make_unique<Matcher>();
-      std::string REError;
-      if (!M->insert(std::string(Section), LineNo, REError)) {
-        Error = (Twine("malformed section ") + Section + ": '" + REError).str();
-        return false;
-      }
-
-      SectionsMap[Section] = Sections.size();
-      Sections.emplace_back(std::move(M));
-    }
-
-    auto &Entry = Sections[SectionsMap[Section]].Entries[Prefix][Category];
-    std::string REError;
-    if (!Entry.insert(std::move(Regexp), LineNo, REError)) {
-      Error = (Twine("malformed regex in line ") + Twine(LineNo) + ": '" +
-               SplitLine.second + "': " + REError).str();
+    auto [Pattern, Category] = Postfix.split("=");
+    auto &Entry = CurrentSection->Entries[Prefix][Category];
+    if (auto Err = Entry.insert(Pattern, LineNo, UseGlobs)) {
+      Error =
+          (Twine("malformed ") + (UseGlobs ? "glob" : "regex") + " in line " +
+           Twine(LineNo) + ": '" + Pattern + "': " + toString(std::move(Err)))
+              .str();
       return false;
     }
   }
@@ -205,13 +212,14 @@
 unsigned SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
                                          StringRef Query,
                                          StringRef Category) const {
-  for (const auto &SectionIter : Sections)
-    if (SectionIter.SectionMatcher->match(Section)) {
-      unsigned Blame =
-          inSectionBlame(SectionIter.Entries, Prefix, Query, Category);
+  for (const auto &It : Sections) {
+    const auto &S = It.getValue();
+    if (S.SectionMatcher->match(Section)) {
+      unsigned Blame = inSectionBlame(S.Entries, Prefix, Query, Category);
       if (Blame)
         return Blame;
     }
+  }
   return 0;
 }
 
@@ -226,4 +234,4 @@
   return II->getValue().match(Query);
 }
 
-}  // namespace llvm
+} // namespace llvm
Index: llvm/include/llvm/Support/SpecialCaseList.h
===================================================================
--- llvm/include/llvm/Support/SpecialCaseList.h
+++ llvm/include/llvm/Support/SpecialCaseList.h
@@ -5,47 +5,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //===----------------------------------------------------------------------===//
 //
-// This is a utility class used to parse user-provided text files with
-// "special case lists" for code sanitizers. Such files are used to
-// define an "ABI list" for DataFlowSanitizer and allow/exclusion lists for
-// sanitizers like AddressSanitizer or UndefinedBehaviorSanitizer.
-//
-// Empty lines and lines starting with "#" are ignored. Sections are defined
-// using a '[section_name]' header and can be used to specify sanitizers the
-// entries below it apply to. Section names are regular expressions, and
-// entries without a section header match all sections (e.g. an '[*]' header
-// is assumed.)
-// The remaining lines should have the form:
-//   prefix:wildcard_expression[=category]
-// If category is not specified, it is assumed to be empty string.
-// Definitions of "prefix" and "category" are sanitizer-specific. For example,
-// sanitizer exclusion support prefixes "src", "mainfile", "fun" and "global".
-// Wildcard expressions define, respectively, source files, main files,
-// functions or globals which shouldn't be instrumented.
-// Examples of categories:
-//   "functional": used in DFSan to list functions with pure functional
-//                 semantics.
-//   "init": used in ASan exclusion list to disable initialization-order bugs
-//           detection for certain globals or source files.
-// Full special case list file example:
-// ---
-// [address]
-// # Excluded items:
-// fun:*_ZN4base6subtle*
-// global:*global_with_bad_access_or_initialization*
-// global:*global_with_initialization_issues*=init
-// type:*Namespace::ClassName*=init
-// src:file_with_tricky_code.cc
-// src:ignore-global-initializers-issues.cc=init
-// mainfile:main_file.cc
-//
-// [dataflow]
-// # Functions with pure functional semantics:
-// fun:cos=functional
-// fun:sin=functional
-// ---
-// Note that the wild card is in fact an llvm::Regex, but * is automatically
-// replaced with .*
+// This file implements a Special Case List for code sanitizers.
 //
 //===----------------------------------------------------------------------===//
 
@@ -53,6 +13,7 @@
 #define LLVM_SUPPORT_SPECIALCASELIST_H
 
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Support/GlobPattern.h"
 #include "llvm/Support/Regex.h"
 #include <memory>
 #include <string>
@@ -66,6 +27,45 @@
 class FileSystem;
 }
 
+/// This is a utility class used to parse user-provided text files with
+/// "special case lists" for code sanitizers. Such files are used to
+/// define an "ABI list" for DataFlowSanitizer and allow/exclusion lists for
+/// sanitizers like AddressSanitizer or UndefinedBehaviorSanitizer.
+///
+/// Empty lines and lines starting with "#" are ignored. Sections are defined
+/// using a '[section_name]' header and can be used to specify sanitizers the
+/// entries below it apply to. Section names are globs, and
+/// entries without a section header match all sections (e.g. an '[*]' header
+/// is assumed.)
+/// The remaining lines should have the form:
+///   prefix:glob_pattern[=category]
+/// If category is not specified, it is assumed to be empty string.
+/// Definitions of "prefix" and "category" are sanitizer-specific. For example,
+/// sanitizer exclusion support prefixes "src", "mainfile", "fun" and "global".
+/// "glob_pattern" defines source files, main files, functions or globals which
+/// shouldn't be instrumented.
+/// Examples of categories:
+///   "functional": used in DFSan to list functions with pure functional
+///                 semantics.
+///   "init": used in ASan exclusion list to disable initialization-order bugs
+///           detection for certain globals or source files.
+/// Full special case list file example:
+/// ---
+/// [address]
+/// # Excluded items:
+/// fun:*_ZN4base6subtle*
+/// global:*global_with_bad_access_or_initialization*
+/// global:*global_with_initialization_issues*=init
+/// type:*Namespace::ClassName*=init
+/// src:file_with_tricky_code.cc
+/// src:ignore-global-initializers-issues.cc=init
+/// mainfile:main_file.cc
+///
+/// [dataflow]
+/// # Functions with pure functional semantics:
+/// fun:cos=functional
+/// fun:sin=functional
+/// ---
 class SpecialCaseList {
 public:
   /// Parses the special case list entries from files. On failure, returns
@@ -88,7 +88,7 @@
   /// \code
   ///   @Prefix:<E>=@Category
   /// \endcode
-  /// where @Query satisfies wildcard expression <E> in a given @Section.
+  /// where @Query satisfies the glob <E> in a given @Section.
   bool inSection(StringRef Section, StringRef Prefix, StringRef Query,
                  StringRef Category = StringRef()) const;
 
@@ -97,7 +97,7 @@
   /// \code
   ///   @Prefix:<E>=@Category
   /// \endcode
-  /// where @Query satisfies wildcard expression <E> in a given @Section.
+  /// where @Query satisfies the glob <E> in a given @Section.
   /// Returns zero if there is no exclusion entry corresponding to this
   /// expression.
   unsigned inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query,
@@ -114,19 +114,16 @@
   SpecialCaseList(SpecialCaseList const &) = delete;
   SpecialCaseList &operator=(SpecialCaseList const &) = delete;
 
-  /// Represents a set of regular expressions.  Regular expressions which are
-  /// "literal" (i.e. no regex metacharacters) are stored in Strings.  The
-  /// reason for doing so is efficiency; StringMap is much faster at matching
-  /// literal strings than Regex.
+  /// Represents a set of globs and their line numbers
   class Matcher {
   public:
-    bool insert(std::string Regexp, unsigned LineNumber, std::string &REError);
+    Error insert(StringRef Pattern, unsigned LineNumber, bool UseRegex);
     // Returns the line number in the source file that this query matches to.
     // Returns zero if no match is found.
     unsigned match(StringRef Query) const;
 
   private:
-    StringMap<unsigned> Strings;
+    StringMap<std::pair<GlobPattern, unsigned>> Globs;
     std::vector<std::pair<std::unique_ptr<Regex>, unsigned>> RegExes;
   };
 
@@ -134,16 +131,19 @@
 
   struct Section {
     Section(std::unique_ptr<Matcher> M) : SectionMatcher(std::move(M)){};
+    Section() : Section(std::make_unique<Matcher>()) {}
 
     std::unique_ptr<Matcher> SectionMatcher;
     SectionEntries Entries;
   };
 
-  std::vector<Section> Sections;
+  StringMap<Section> Sections;
+
+  Expected<Section *> addSection(StringRef SectionStr, unsigned LineNo,
+                                 bool UseGlobs = true);
 
   /// Parses just-constructed SpecialCaseList entries from a memory buffer.
-  bool parse(const MemoryBuffer *MB, StringMap<size_t> &SectionsMap,
-             std::string &Error);
+  bool parse(const MemoryBuffer *MB, std::string &Error);
 
   // Helper method for derived classes to search by Prefix, Query, and Category
   // once they have already resolved a section entry.
Index: clang/lib/Basic/SanitizerSpecialCaseList.cpp
===================================================================
--- clang/lib/Basic/SanitizerSpecialCaseList.cpp
+++ clang/lib/Basic/SanitizerSpecialCaseList.cpp
@@ -37,7 +37,8 @@
 }
 
 void SanitizerSpecialCaseList::createSanitizerSections() {
-  for (auto &S : Sections) {
+  for (auto &It : Sections) {
+    auto &S = It.second;
     SanitizerMask Mask;
 
 #define SANITIZER(NAME, ID)                                                    \
Index: clang/lib/Basic/ProfileList.cpp
===================================================================
--- clang/lib/Basic/ProfileList.cpp
+++ clang/lib/Basic/ProfileList.cpp
@@ -36,8 +36,8 @@
   bool isEmpty() const { return Sections.empty(); }
 
   bool hasPrefix(StringRef Prefix) const {
-    for (auto &SectionIter : Sections)
-      if (SectionIter.Entries.count(Prefix) > 0)
+    for (const auto &It : Sections)
+      if (It.second.Entries.count(Prefix) > 0)
         return true;
     return false;
   }
Index: clang/docs/SanitizerSpecialCaseList.rst
===================================================================
--- clang/docs/SanitizerSpecialCaseList.rst
+++ clang/docs/SanitizerSpecialCaseList.rst
@@ -15,7 +15,7 @@
 Goal and usage
 ==============
 
-User of sanitizer tools, such as :doc:`AddressSanitizer`, :doc:`ThreadSanitizer`
+Users of sanitizer tools, such as :doc:`AddressSanitizer`, :doc:`ThreadSanitizer`
 or :doc:`MemorySanitizer` may want to disable or alter some checks for
 certain source-level entities to:
 
@@ -54,37 +54,48 @@
 Ignorelists consist of entries, optionally grouped into sections. Empty lines
 and lines starting with "#" are ignored.
 
-Section names are regular expressions written in square brackets that denote
+.. note::
+
+  In `D154014 <https://reviews.llvm.org/D154014>`_ we transitioned to using globs instead
+  of regexes to match patterns in special case lists. Since this was a
+  breaking change, we will temporarily support the original behavior using
+  regexes. If ``#!special-case-list-v2`` is the first line of the file, then
+  we will use the new behavior using globs. For more details, see
+  `this discourse post <https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666>`_.
+
+
+Section names are globs written in square brackets that denote
 which sanitizer the following entries apply to. For example, ``[address]``
-specifies AddressSanitizer while ``[cfi-vcall|cfi-icall]`` specifies Control
+specifies AddressSanitizer while ``[{cfi-vcall,cfi-icall}]`` specifies Control
 Flow Integrity virtual and indirect call checking. Entries without a section
 will be placed under the ``[*]`` section applying to all enabled sanitizers.
 
-Entries contain an entity type, followed by a colon and a regular expression,
+Entries contain an entity type, followed by a colon and a glob,
 specifying the names of the entities, optionally followed by an equals sign and
-a tool-specific category, e.g. ``fun:*ExampleFunc=example_category``.  The
-meaning of ``*`` in regular expression for entity names is different - it is
-treated as in shell wildcarding. Two generic entity types are ``src`` and
+a tool-specific category, e.g. ``fun:*ExampleFunc=example_category``.
+Two generic entity types are ``src`` and
 ``fun``, which allow users to specify source files and functions, respectively.
 Some sanitizer tools may introduce custom entity types and categories - refer to
 tool-specific docs.
 
 .. code-block:: bash
 
+    #!special-case-list-v2
+    # The line above is explained in the note above
     # Lines starting with # are ignored.
-    # Turn off checks for the source file (use absolute path or path relative
-    # to the current working directory):
-    src:/path/to/source/file.c
+    # Turn off checks for the source file
+    # Entries without sections are placed into [*] and apply to all sanitizers
+    src:path/to/source/file.c
+    src:*/source/file.c
     # Turn off checks for this main file, including files included by it.
     # Useful when the main file instead of an included file should be ignored.
     mainfile:file.c
     # Turn off checks for a particular functions (use mangled names):
-    fun:MyFooBar
     fun:_Z8MyFooBarv
-    # Extended regular expressions are supported:
-    fun:bad_(foo|bar)
+    # Glob brace expansions and character ranges are supported
+    fun:bad_{foo,bar}
     src:bad_source[1-9].c
-    # Shell like usage of * is supported (* is treated as .*):
+    # "*" matches zero or more characters
     src:bad/sources/*
     fun:*BadFunction*
     # Specific sanitizer tools may introduce categories.
@@ -92,10 +103,9 @@
     # Sections can be used to limit ignorelist entries to specific sanitizers
     [address]
     fun:*BadASanFunc*
-    # Section names are regular expressions
-    [cfi-vcall|cfi-icall]
+    # Section names are globs
+    [{cfi-vcall,cfi-icall}]
     fun:*BadCfiCall
-    # Entries without sections are placed into [*] and apply to all sanitizers
 
 ``mainfile`` is similar to applying ``-fno-sanitize=`` to a set of files but
 does not need plumbing into the build system. This works well for internal

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D154014: [SpecialCaseList] Use Globs instead of Regex

Reply via email to