Author: Fangrui Song Date: 2023-12-11T15:30:28-08:00 New Revision: 81d1df2a39f0616be4b530cbf86b3f575442a347
URL: https://github.com/llvm/llvm-project/commit/81d1df2a39f0616be4b530cbf86b3f575442a347 DIFF: https://github.com/llvm/llvm-project/commit/81d1df2a39f0616be4b530cbf86b3f575442a347.diff LOG: [SpecialCaseList] Use glob by default (#74809) https://reviews.llvm.org/D154014 addes glob support and enables it when `#!special-case-list-v2` is the first line. This patch makes the glob support the default (faster than regex after https://reviews.llvm.org/D156046) and switches to the deprecated regex support if `#!special-case-list-v1` is the first line. I have surveyed many ignore lists. All ignore lists I find only use basic `*` `.` and don't use regex metacharacters such as `(` and `)`. (As neither `src:` nor `fun:` benefits from using regex.) They are unaffected by the transition (with a caution that regex `src:x/a.pb.*` matches `x/axpbx` but glob `src:x/a.pb.*` doesn't). There is no deprecating warning. If a user finds `#!special-case-list-v1`, they shall read that the old syntax is deprecated. Link: https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666 Added: Modified: clang/docs/SanitizerSpecialCaseList.rst llvm/lib/Support/SpecialCaseList.cpp llvm/unittests/Support/SpecialCaseListTest.cpp Removed: ################################################################################ diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst index ab39276b04395..c7fb0fa3f8a82 100644 --- a/clang/docs/SanitizerSpecialCaseList.rst +++ b/clang/docs/SanitizerSpecialCaseList.rst @@ -56,13 +56,18 @@ and lines starting with "#" are ignored. .. note:: - In `D154014 <https://reviews.llvm.org/D154014>`_ we transitioned to using globs instead - of regexes to match patterns in special case lists. Since this was a - breaking change, we will temporarily support the original behavior using - regexes. If ``#!special-case-list-v2`` is the first line of the file, then - we will use the new behavior using globs. For more details, see - `this discourse post <https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666>`_. + Prior to Clang 18, section names and entries described below use a variant of + regex where ``*`` is translated to ``.*``. Clang 18 (`D154014 + <https://reviews.llvm.org/D154014>`) switches to glob and plans to remove + regex support in Clang 19. + For Clang 18, regex is supported if ``#!special-case-list-v1`` is the first + line of the file. + + Many special case lists use ``.`` to indicate the literal character and do + not use regex metacharacters such as ``(``, ``)``. They are unaffected by the + regex to glob transition. For more details, see `this discourse post + <https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666>`_. Section names are globs written in square brackets that denote which sanitizer the following entries apply to. For example, ``[address]`` @@ -80,7 +85,6 @@ tool-specific docs. .. code-block:: bash - #!special-case-list-v2 # The line above is explained in the note above # Lines starting with # are ignored. # Turn off checks for the source file diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index ac8877cca8bc6..7a23421eaeb89 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -150,13 +150,12 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) { return false; } - // In https://reviews.llvm.org/D154014 we transitioned to using globs instead - // of regexes to match patterns in special case lists. Since this was a - // breaking change, we will temporarily support the original behavior using - // regexes. If "#!special-case-list-v2" is the first line of the file, then - // we will use the new behavior using globs. For more details, see + // In https://reviews.llvm.org/D154014 we added glob support and planned to + // remove regex support in patterns. We temporarily support the original + // behavior using regexes if "#!special-case-list-v1" is the first line of the + // file. For more details, see // https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666 - bool UseGlobs = MB->getBuffer().starts_with("#!special-case-list-v2\n"); + bool UseGlobs = !MB->getBuffer().starts_with("#!special-case-list-v1\n"); for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); !LineIt.is_at_eof(); LineIt++) { diff --git a/llvm/unittests/Support/SpecialCaseListTest.cpp b/llvm/unittests/Support/SpecialCaseListTest.cpp index 81faeca5d6357..4289a5e702155 100644 --- a/llvm/unittests/Support/SpecialCaseListTest.cpp +++ b/llvm/unittests/Support/SpecialCaseListTest.cpp @@ -25,8 +25,8 @@ class SpecialCaseListTest : public ::testing::Test { std::string &Error, bool UseGlobs = true) { auto S = List.str(); - if (UseGlobs) - S = (Twine("#!special-case-list-v2\n") + S).str(); + if (!UseGlobs) + S = (Twine("#!special-case-list-v1\n") + S).str(); std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(S); return SpecialCaseList::create(MB.get(), Error); } @@ -46,8 +46,8 @@ class SpecialCaseListTest : public ::testing::Test { SmallString<64> Path; sys::fs::createTemporaryFile("SpecialCaseListTest", "temp", FD, Path); raw_fd_ostream OF(FD, true, true); - if (UseGlobs) - OF << "#!special-case-list-v2\n"; + if (!UseGlobs) + OF << "#!special-case-list-v1\n"; OF << Contents; OF.close(); return std::string(Path.str()); @@ -70,10 +70,10 @@ TEST_F(SpecialCaseListTest, Basic) { EXPECT_FALSE(SCL->inSection("", "fun", "hello")); EXPECT_FALSE(SCL->inSection("", "src", "hello", "category")); - EXPECT_EQ(4u, SCL->inSectionBlame("", "src", "hello")); - EXPECT_EQ(5u, SCL->inSectionBlame("", "src", "bye")); - EXPECT_EQ(6u, SCL->inSectionBlame("", "src", "hi", "category")); - EXPECT_EQ(7u, SCL->inSectionBlame("", "src", "zzzz", "category")); + EXPECT_EQ(3u, SCL->inSectionBlame("", "src", "hello")); + EXPECT_EQ(4u, SCL->inSectionBlame("", "src", "bye")); + EXPECT_EQ(5u, SCL->inSectionBlame("", "src", "hi", "category")); + EXPECT_EQ(6u, SCL->inSectionBlame("", "src", "zzzz", "category")); EXPECT_EQ(0u, SCL->inSectionBlame("", "src", "hi")); EXPECT_EQ(0u, SCL->inSectionBlame("", "fun", "hello")); EXPECT_EQ(0u, SCL->inSectionBlame("", "src", "hello", "category")); @@ -85,12 +85,12 @@ TEST_F(SpecialCaseListTest, CorrectErrorLineNumberWithBlankLine) { "\n" "[not valid\n", Error)); - EXPECT_THAT(Error, StartsWith("malformed section header on line 4:")); + EXPECT_THAT(Error, StartsWith("malformed section header on line 3:")); EXPECT_EQ(nullptr, makeSpecialCaseList("\n\n\n" "[not valid\n", Error)); - EXPECT_THAT(Error, StartsWith("malformed section header on line 5:")); + EXPECT_THAT(Error, StartsWith("malformed section header on line 4:")); } TEST_F(SpecialCaseListTest, SectionGlobErrorHandling) { @@ -101,7 +101,7 @@ TEST_F(SpecialCaseListTest, SectionGlobErrorHandling) { EXPECT_EQ(makeSpecialCaseList("[[]", Error), nullptr); EXPECT_EQ( Error, - "malformed section at line 2: '[': invalid glob pattern, unmatched '['"); + "malformed section at line 1: '[': invalid glob pattern, unmatched '['"); EXPECT_EQ(makeSpecialCaseList("src:=", Error), nullptr); EXPECT_THAT(Error, HasSubstr("Supplied glob was blank")); @@ -163,10 +163,10 @@ TEST_F(SpecialCaseListTest, Substring) { TEST_F(SpecialCaseListTest, InvalidSpecialCaseList) { std::string Error; EXPECT_EQ(nullptr, makeSpecialCaseList("badline", Error)); - EXPECT_EQ("malformed line 2: 'badline'", Error); + EXPECT_EQ("malformed line 1: 'badline'", Error); EXPECT_EQ(nullptr, makeSpecialCaseList("src:bad[a-", Error)); EXPECT_EQ( - "malformed glob in line 2: 'bad[a-': invalid glob pattern, unmatched '['", + "malformed glob in line 1: 'bad[a-': invalid glob pattern, unmatched '['", Error); std::vector<std::string> Files(1, "unexisting"); EXPECT_EQ(nullptr, _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits