Author: Owen Pan Date: 2023-12-24T01:05:10-08:00 New Revision: 8f9803b5ab0b03c31c8cb182b44bd2eb70d9d8b0
URL: https://github.com/llvm/llvm-project/commit/8f9803b5ab0b03c31c8cb182b44bd2eb70d9d8b0 DIFF: https://github.com/llvm/llvm-project/commit/8f9803b5ab0b03c31c8cb182b44bd2eb70d9d8b0.diff LOG: [clang-format] Add an fnmatch-like function for .clang-format-ignore (#76021) This is needed because Windows doesn't have anything equivalent to the POSIX fnmatch() function. Added: clang/lib/Format/MatchFilePath.cpp clang/lib/Format/MatchFilePath.h clang/unittests/Format/MatchFilePathTest.cpp Modified: clang/lib/Format/CMakeLists.txt clang/unittests/Format/CMakeLists.txt Removed: ################################################################################ diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt index 015ec7c0cc84e3..84a3c136f650a8 100644 --- a/clang/lib/Format/CMakeLists.txt +++ b/clang/lib/Format/CMakeLists.txt @@ -11,6 +11,7 @@ add_clang_library(clangFormat IntegerLiteralSeparatorFixer.cpp MacroCallReconstructor.cpp MacroExpander.cpp + MatchFilePath.cpp NamespaceEndCommentsFixer.cpp ObjCPropertyAttributeOrderFixer.cpp QualifierAlignmentFixer.cpp diff --git a/clang/lib/Format/MatchFilePath.cpp b/clang/lib/Format/MatchFilePath.cpp new file mode 100644 index 00000000000000..412ee4954587e0 --- /dev/null +++ b/clang/lib/Format/MatchFilePath.cpp @@ -0,0 +1,122 @@ +//===--- MatchFilePath.cpp - Match file path with pattern -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements the functionality of matching a file path name to +/// a pattern, similar to the POSIX fnmatch() function. +/// +//===----------------------------------------------------------------------===// + +#include "MatchFilePath.h" + +using namespace llvm; + +namespace clang { +namespace format { + +// Check whether `FilePath` matches `Pattern` based on POSIX (1003.1-2008) +// 2.13.1, 2.13.2, and Rule 1 of 2.13.3. +bool matchFilePath(StringRef Pattern, StringRef FilePath) { + assert(!Pattern.empty()); + assert(!FilePath.empty()); + + // No match if `Pattern` ends with a non-meta character not equal to the last + // character of `FilePath`. + if (const auto C = Pattern.back(); !strchr("?*]", C) && C != FilePath.back()) + return false; + + constexpr auto Separator = '/'; + const auto EOP = Pattern.size(); // End of `Pattern`. + const auto End = FilePath.size(); // End of `FilePath`. + unsigned I = 0; // Index to `Pattern`. + + for (unsigned J = 0; J < End; ++J) { + if (I == EOP) + return false; + + switch (const auto F = FilePath[J]; Pattern[I]) { + case '\\': + if (++I == EOP || F != Pattern[I]) + return false; + break; + case '?': + if (F == Separator) + return false; + break; + case '*': { + while (++I < EOP && Pattern[I] == '*') { // Skip consecutive stars. + } + const auto K = FilePath.find(Separator, J); // Index of next `Separator`. + const bool NoMoreSeparatorsInFilePath = K == StringRef::npos; + if (I == EOP) // `Pattern` ends with a star. + return NoMoreSeparatorsInFilePath; + // `Pattern` ends with a lone backslash. + if (Pattern[I] == '\\' && ++I == EOP) + return false; + // The star is followed by a (possibly escaped) `Separator`. + if (Pattern[I] == Separator) { + if (NoMoreSeparatorsInFilePath) + return false; + J = K; // Skip to next `Separator` in `FilePath`. + break; + } + // Recurse. + for (auto Pat = Pattern.substr(I); J < End && FilePath[J] != Separator; + ++J) { + if (matchFilePath(Pat, FilePath.substr(J))) + return true; + } + return false; + } + case '[': + // Skip e.g. `[!]`. + if (I + 3 < EOP || (I + 3 == EOP && Pattern[I + 1] != '!')) { + // Skip unpaired `[`, brackets containing slashes, and `[]`. + if (const auto K = Pattern.find_first_of("]/", I + 1); + K != StringRef::npos && Pattern[K] == ']' && K > I + 1) { + if (F == Separator) + return false; + ++I; // After the `[`. + bool Negated = false; + if (Pattern[I] == '!') { + Negated = true; + ++I; // After the `!`. + } + bool Match = false; + do { + if (I + 2 < K && Pattern[I + 1] == '-') { + Match = Pattern[I] <= F && F <= Pattern[I + 2]; + I += 3; // After the range, e.g. `A-Z`. + } else { + Match = F == Pattern[I++]; + } + } while (!Match && I < K); + if (Negated ? Match : !Match) + return false; + I = K + 1; // After the `]`. + continue; + } + } + [[fallthrough]]; // Match `[` literally. + default: + if (F != Pattern[I]) + return false; + } + + ++I; + } + + // Match trailing stars with null strings. + while (I < EOP && Pattern[I] == '*') + ++I; + + return I == EOP; +} + +} // namespace format +} // namespace clang diff --git a/clang/lib/Format/MatchFilePath.h b/clang/lib/Format/MatchFilePath.h new file mode 100644 index 00000000000000..482dab7c748e51 --- /dev/null +++ b/clang/lib/Format/MatchFilePath.h @@ -0,0 +1,22 @@ +//===--- MatchFilePath.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_FORMAT_MATCHFILEPATH_H +#define LLVM_CLANG_LIB_FORMAT_MATCHFILEPATH_H + +#include "llvm/ADT/StringRef.h" + +namespace clang { +namespace format { + +bool matchFilePath(llvm::StringRef Pattern, llvm::StringRef FilePath); + +} // end namespace format +} // end namespace clang + +#endif diff --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt index 53136328928f5c..71f5886d946c80 100644 --- a/clang/unittests/Format/CMakeLists.txt +++ b/clang/unittests/Format/CMakeLists.txt @@ -27,6 +27,7 @@ add_clang_unittest(FormatTests IntegerLiteralSeparatorTest.cpp MacroCallReconstructorTest.cpp MacroExpanderTest.cpp + MatchFilePathTest.cpp NamespaceEndCommentsFixerTest.cpp ObjCPropertyAttributeOrderFixerTest.cpp QualifierFixerTest.cpp diff --git a/clang/unittests/Format/MatchFilePathTest.cpp b/clang/unittests/Format/MatchFilePathTest.cpp new file mode 100644 index 00000000000000..55723584ddc800 --- /dev/null +++ b/clang/unittests/Format/MatchFilePathTest.cpp @@ -0,0 +1,169 @@ +//===- unittest/Format/MatchFilePathTest.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../../lib/Format/MatchFilePath.h" +#include "gtest/gtest.h" + +namespace clang { +namespace format { +namespace { + +class MatchFilePathTest : public ::testing::Test { +protected: + bool match(llvm::StringRef FilePath, llvm::StringRef Pattern) { + return matchFilePath(Pattern, FilePath); + } +}; + +// Most of the test cases below are from: +// https://github.com/python/cpython/blob/main/Lib/test/test_fnmatch.py + +TEST_F(MatchFilePathTest, Wildcard) { + EXPECT_TRUE(match("abc", "?*?")); + EXPECT_TRUE(match("abc", "???*")); + EXPECT_TRUE(match("abc", "*???")); + EXPECT_TRUE(match("abc", "???")); + EXPECT_TRUE(match("abc", "*")); + EXPECT_TRUE(match("abc", "ab[cd]")); + EXPECT_TRUE(match("abc", "ab[!de]")); + EXPECT_FALSE(match("abc", "ab[de]")); + EXPECT_FALSE(match("a", "??")); + EXPECT_FALSE(match("a", "b")); +} + +TEST_F(MatchFilePathTest, Backslash) { + EXPECT_TRUE(match("a?", R"(a\?)")); + EXPECT_FALSE(match("a\\", R"(a\)")); + EXPECT_TRUE(match("\\", R"([\])")); + EXPECT_TRUE(match("a", R"([!\])")); + EXPECT_FALSE(match("\\", R"([!\])")); +} + +TEST_F(MatchFilePathTest, Newline) { + EXPECT_TRUE(match("foo\nbar", "foo*")); + EXPECT_TRUE(match("foo\nbar\n", "foo*")); + EXPECT_FALSE(match("\nfoo", "foo*")); + EXPECT_TRUE(match("\n", "*")); +} + +TEST_F(MatchFilePathTest, Star) { + EXPECT_TRUE(match(std::string(50, 'a'), "*a*a*a*a*a*a*a*a*a*a")); + EXPECT_FALSE(match((std::string(50, 'a') + 'b'), "*a*a*a*a*a*a*a*a*a*a")); +} + +TEST_F(MatchFilePathTest, CaseSensitive) { + EXPECT_TRUE(match("abc", "abc")); + EXPECT_FALSE(match("AbC", "abc")); + EXPECT_FALSE(match("abc", "AbC")); + EXPECT_TRUE(match("AbC", "AbC")); +} + +TEST_F(MatchFilePathTest, PathSeparators) { + EXPECT_TRUE(match("usr/bin", "usr/bin")); + EXPECT_TRUE(match("usr\\bin", R"(usr\\bin)")); +} + +TEST_F(MatchFilePathTest, NumericEscapeSequence) { + EXPECT_TRUE(match("test", "te*")); + EXPECT_TRUE(match("test\xff", "te*\xff")); + EXPECT_TRUE(match("foo\nbar", "foo*")); +} + +TEST_F(MatchFilePathTest, ValidBrackets) { + EXPECT_TRUE(match("z", "[az]")); + EXPECT_FALSE(match("z", "[!az]")); + EXPECT_TRUE(match("a", "[aa]")); + EXPECT_TRUE(match("^", "[^az]")); + EXPECT_TRUE(match("[", "[[az]")); + EXPECT_FALSE(match("]", "[!]]")); +} + +TEST_F(MatchFilePathTest, InvalidBrackets) { + EXPECT_TRUE(match("[", "[")); + EXPECT_TRUE(match("[]", "[]")); + EXPECT_TRUE(match("[!", "[!")); + EXPECT_TRUE(match("[!]", "[!]")); +} + +TEST_F(MatchFilePathTest, Range) { + EXPECT_TRUE(match("c", "[b-d]")); + EXPECT_FALSE(match("c", "[!b-d]")); + EXPECT_TRUE(match("y", "[b-dx-z]")); + EXPECT_FALSE(match("y", "[!b-dx-z]")); +} + +TEST_F(MatchFilePathTest, Hyphen) { + EXPECT_FALSE(match("#", "[!-#]")); + EXPECT_FALSE(match("-", "[!--.]")); + EXPECT_TRUE(match("_", "[^-`]")); + EXPECT_TRUE(match("]", "[[-^]")); + EXPECT_TRUE(match("]", R"([\-^])")); + EXPECT_TRUE(match("-", "[b-]")); + EXPECT_FALSE(match("-", "[!b-]")); + EXPECT_TRUE(match("-", "[-b]")); + EXPECT_FALSE(match("-", "[!-b]")); + EXPECT_TRUE(match("-", "[-]")); + EXPECT_FALSE(match("-", "[!-]")); +} + +TEST_F(MatchFilePathTest, UpperLELower) { + EXPECT_FALSE(match("c", "[d-b]")); + EXPECT_TRUE(match("c", "[!d-b]")); + EXPECT_TRUE(match("y", "[d-bx-z]")); + EXPECT_FALSE(match("y", "[!d-bx-z]")); + EXPECT_TRUE(match("_", "[d-b^-`]")); + EXPECT_TRUE(match("]", "[d-b[-^]")); + EXPECT_TRUE(match("b", "[b-b]")); +} + +TEST_F(MatchFilePathTest, SlashAndBackslashInBrackets) { + EXPECT_FALSE(match("/", "[/]")); + EXPECT_TRUE(match("\\", R"([\])")); + EXPECT_TRUE(match("[/]", "[/]")); + EXPECT_TRUE(match("\\", R"([\t])")); + EXPECT_TRUE(match("t", R"([\t])")); + EXPECT_FALSE(match("\t", R"([\t])")); +} + +TEST_F(MatchFilePathTest, SlashAndBackslashInRange) { + EXPECT_FALSE(match("a/b", "a[.-0]b")); + EXPECT_TRUE(match("a\\b", "a[Z-^]b")); + EXPECT_FALSE(match("a/b", "a[/-0]b")); + EXPECT_TRUE(match("a[/-0]b", "a[/-0]b")); + EXPECT_FALSE(match("a/b", "a[.-/]b")); + EXPECT_TRUE(match("a[.-/]b", "a[.-/]b")); + EXPECT_TRUE(match("a\\b", R"(a[\-^]b)")); + EXPECT_TRUE(match("a\\b", R"(a[Z-\]b)")); +} + +TEST_F(MatchFilePathTest, Brackets) { + EXPECT_TRUE(match("[", "[[]")); + EXPECT_TRUE(match("&", "[a&&b]")); + EXPECT_TRUE(match("|", "[a||b]")); + EXPECT_TRUE(match("~", "[a~~b]")); + EXPECT_TRUE(match(",", "[a-z+--A-Z]")); + EXPECT_FALSE(match(".", "[a-z--/A-Z]")); +} + +TEST_F(MatchFilePathTest, Path) { + EXPECT_TRUE(match(".clang-format", "*")); + EXPECT_TRUE(match(".git", "*git*")); + EXPECT_TRUE(match(".gitignore", "*git*")); + EXPECT_TRUE(match("foo/bar", "foo*/*bar")); + EXPECT_TRUE(match("foo/bar", "*/*")); + EXPECT_TRUE(match("foo/bar", R"(*foo*\/*bar*)")); + EXPECT_FALSE(match("foo/bar", "foo*")); + EXPECT_FALSE(match("foo/bar", "foo?bar")); + EXPECT_FALSE(match("foo/bar", "foo*bar")); + EXPECT_FALSE(match("foobar", "foo*/*")); + EXPECT_FALSE(match("foo\\", R"(foo*\)")); +} + +} // namespace +} // namespace format +} // namespace clang _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits