njames93 updated this revision to Diff 444820.
njames93 added a comment.
More code cleanup.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D129706/new/
https://reviews.llvm.org/D129706
Files:
clang-tools-extra/clang-tidy/GlobList.cpp
clang-tools-extra/clang-tidy/GlobList.h
clang-tools-extra/unittests/clang-tidy/GlobListTest.cpp
Index: clang-tools-extra/unittests/clang-tidy/GlobListTest.cpp
===================================================================
--- clang-tools-extra/unittests/clang-tidy/GlobListTest.cpp
+++ clang-tools-extra/unittests/clang-tidy/GlobListTest.cpp
@@ -16,6 +16,13 @@
EXPECT_FALSE(Filter.contains("aaa"));
}
+TYPED_TEST(GlobListTest, NotEmpty) {
+ TypeParam Filter("*,-");
+
+ EXPECT_FALSE(Filter.contains(""));
+ EXPECT_TRUE(Filter.contains("aaa"));
+}
+
TYPED_TEST(GlobListTest, Nothing) {
TypeParam Filter("-*");
@@ -57,6 +64,51 @@
EXPECT_FALSE(Filter.contains("bbbb"));
}
+TYPED_TEST(GlobListTest, PreAndPostFixPattern) {
+ TypeParam Filter("a*b");
+
+ EXPECT_TRUE(Filter.contains("ab"));
+ EXPECT_TRUE(Filter.contains("aMiddleb"));
+ EXPECT_FALSE(Filter.contains("aba"));
+ EXPECT_FALSE(Filter.contains("bab"));
+}
+
+TYPED_TEST(GlobListTest, DoubleWildcard) {
+ {
+ TypeParam Filter("a**b");
+
+ EXPECT_TRUE(Filter.isEquivalentTo("a*b"));
+ EXPECT_TRUE(Filter.isEquivalentTo("a***b"));
+
+ EXPECT_TRUE(Filter.contains("ab"));
+ EXPECT_TRUE(Filter.contains("aMiddleb"));
+ EXPECT_FALSE(Filter.contains("aba"));
+ EXPECT_FALSE(Filter.contains("bab"));
+ }
+ {
+ TypeParam Filter("a**");
+
+ EXPECT_TRUE(Filter.isEquivalentTo("a*"));
+ EXPECT_TRUE(Filter.isEquivalentTo("a***"));
+
+ EXPECT_TRUE(Filter.contains("ab"));
+ EXPECT_TRUE(Filter.contains("aMiddleb"));
+ EXPECT_TRUE(Filter.contains("aba"));
+ EXPECT_FALSE(Filter.contains("bab"));
+ }
+ {
+ TypeParam Filter("**b");
+
+ EXPECT_TRUE(Filter.isEquivalentTo("*b"));
+ EXPECT_TRUE(Filter.isEquivalentTo("***b"));
+
+ EXPECT_TRUE(Filter.contains("ab"));
+ EXPECT_TRUE(Filter.contains("aMiddleb"));
+ EXPECT_FALSE(Filter.contains("aba"));
+ EXPECT_TRUE(Filter.contains("bab"));
+ }
+}
+
TYPED_TEST(GlobListTest, PatternPriority) {
// The last glob that matches the string decides whether that string is
// included or excluded.
@@ -107,6 +159,9 @@
TYPED_TEST(GlobListTest, NewlineCharactersAsSeparators) {
TypeParam Filter("a* \n b,\n-c*,dd");
+ // FIXME: Define this behaviour, There are 2 seperator characters next to each
+ // other (,\n), Technically that should be represented as containing an empty
+ // item.
EXPECT_FALSE(Filter.contains(""));
EXPECT_TRUE(Filter.contains("aaa"));
EXPECT_TRUE(Filter.contains("b"));
@@ -117,5 +172,45 @@
EXPECT_FALSE(Filter.contains("ddd"));
}
+TYPED_TEST(GlobListTest, IgnoreNegativeMatches) {
+ TypeParam Filter("a,-a,b*,-b*,-*", false);
+
+ EXPECT_FALSE(Filter.contains(""));
+ EXPECT_TRUE(Filter.contains("a"));
+ EXPECT_TRUE(Filter.contains("b"));
+ EXPECT_TRUE(Filter.contains("bar"));
+}
+
+void checkCanonicalize(StringRef Glob, StringRef Expected) {
+ GlobList Orig(Glob);
+ std::string Canonicalized;
+ llvm::raw_string_ostream Str(Canonicalized);
+ Orig.print(Str);
+ Str.flush();
+ EXPECT_EQ(Canonicalized, Expected)
+ << "While canonicalizing glob: \"" << Glob << "\"";
+ EXPECT_TRUE(Orig.isEquivalentTo(Expected))
+ << "While canonicalizing glob: \"" << Glob << "\"";
+ ;
+ GlobList Canonical(Canonicalized);
+ EXPECT_TRUE(Orig.isEquivalentTo(Canonical))
+ << "While canonicalizing glob: \"" << Glob << "\"";
+ ;
+ EXPECT_TRUE(Canonical.isEquivalentTo(Glob))
+ << "While canonicalizing glob: \"" << Glob << "\"";
+ ;
+}
+
+TEST(GlobListTest, Printing) {
+ // A positive wildcard is always printed with any previous items removed
+ // checkCanonicalize(" *", "*");
+ // checkCanonicalize(" * , -AAA ", "*,-AAA");
+ checkCanonicalize("Unused , *, -A*", "*,-A*");
+ // A negative wildcard is removed if there is nothing else after it.
+ checkCanonicalize("-* ", "-*");
+ checkCanonicalize("-*, AAA ", "AAA");
+ checkCanonicalize("Unused\n-*\nRest\n*Others", "Rest,*Others");
+}
+
} // namespace tidy
} // namespace clang
Index: clang-tools-extra/clang-tidy/GlobList.h
===================================================================
--- clang-tools-extra/clang-tidy/GlobList.h
+++ clang-tools-extra/clang-tidy/GlobList.h
@@ -10,10 +10,10 @@
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GLOBLIST_H
#include "clang/Basic/LLVM.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Regex.h"
+#include <climits>
+#include <vector>
namespace clang {
namespace tidy {
@@ -25,7 +25,9 @@
/// them in the order of appearance in the list.
class GlobList {
public:
- virtual ~GlobList() = default;
+ virtual ~GlobList();
+
+ GlobList(const GlobList &) = delete;
/// \p Globs is a comma-separated list of globs (only the '*' metacharacter is
/// supported) with an optional '-' prefix to denote exclusion.
@@ -41,12 +43,43 @@
/// matching glob's Positive flag.
virtual bool contains(StringRef S) const;
+ bool isEquivalentTo(const GlobList &Other) const;
+
+ bool isEquivalentTo(StringRef S, bool KeepNegativeGlobs = true) const;
+
+ void print(raw_ostream &OS) const;
+
private:
- struct GlobListItem {
- bool IsPositive;
- llvm::Regex Regex;
+ static constexpr size_t ItemBits = 15;
+ // Pretty arbitrary choice here.
+ static constexpr size_t NumInlineElements = 24;
+ struct GlobStartInfo {
+ uint16_t Items : ItemBits;
+ uint16_t IsPositive : 1;
+ };
+
+ bool isInlineStorage() const;
+
+ char *getStorage();
+
+ const char *getStorage() const;
+
+ union DataItem {
+ GlobStartInfo StartInfo;
+ uint16_t Size;
+ };
+
+ /// First item we encounter is a \c GlobStartInfo object. The next \c
+ /// StartInfo.Items items in here will be a raw uint16_t. After that, if there
+ /// are more items, the next will be another \c GlobStartInfo object.
+ std::vector<DataItem> Data;
+ union {
+ char Inline[NumInlineElements];
+ char *Ptr;
};
- SmallVector<GlobListItem, 0> Items;
+ size_t RawStringSize : sizeof(size_t) * CHAR_BIT - 1;
+ // The fallback value if we don't match anything.
+ size_t UnmatchedResult : 1;
};
/// A \p GlobList that caches search results, so that search is performed only
Index: clang-tools-extra/clang-tidy/GlobList.cpp
===================================================================
--- clang-tools-extra/clang-tidy/GlobList.cpp
+++ clang-tools-extra/clang-tidy/GlobList.cpp
@@ -7,63 +7,295 @@
//===----------------------------------------------------------------------===//
#include "GlobList.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+#include <limits>
+#include <numeric>
namespace clang {
namespace tidy {
-// Returns true if GlobList starts with the negative indicator ('-'), removes it
-// from the GlobList.
-static bool consumeNegativeIndicator(StringRef &GlobList) {
- GlobList = GlobList.trim();
- if (GlobList.startswith("-")) {
- GlobList = GlobList.substr(1);
- return true;
+static size_t getMaxAllocSize(StringRef Str) {
+ size_t Count = 0;
+ for (char C : Str) {
+ switch (C) {
+ case '\n': // Seperator character.
+ case ',': // Seperator character.
+ case '*': // Wildcards aren't stored.
+ continue;
+ default:
+ ++Count;
+ }
}
- return false;
+ return Count;
}
-// Converts first glob from the comma-separated list of globs to Regex and
-// removes it and the trailing comma from the GlobList.
-static llvm::Regex consumeGlob(StringRef &GlobList) {
- StringRef UntrimmedGlob = GlobList.substr(0, GlobList.find_first_of(",\n"));
- StringRef Glob = UntrimmedGlob.trim();
- GlobList = GlobList.substr(UntrimmedGlob.size() + 1);
- SmallString<128> RegexText("^");
- StringRef MetaChars("()^$|*+?.[]\\{}");
- for (char C : Glob) {
- if (C == '*')
- RegexText.push_back('.');
- else if (MetaChars.contains(C))
- RegexText.push_back('\\');
- RegexText.push_back(C);
+static std::pair<StringRef, bool> splitGlob(StringRef &Glob) {
+ size_t GlobStart = Glob.find_last_of(",\n") + 1;
+ StringRef Trimmed = Glob.substr(GlobStart).ltrim(" \t\v\f\r");
+ if (GlobStart != 0) {
+ // Bit of a hack this, Basically let ',\n' not be parsed as an empty string.
+ // This behaviour needs defining.
+ size_t NextOffset = Glob.find_last_not_of(
+ Glob[GlobStart - 1] == ',' ? " \t\r\n\f\v" : " \t\r\n\f\v,",
+ GlobStart - 1);
+
+ if (NextOffset != StringRef::npos) {
+ Glob = Glob.substr(0, NextOffset + 1);
+ return {Trimmed, true};
+ }
}
- RegexText.push_back('$');
- return llvm::Regex(RegexText);
+ return {Trimmed, false};
+}
+
+static void initialGlobTrim(StringRef &Globs) {
+ // FIXME: trimming the newline character here is suspicious as its recognised
+ // as a seperator.
+ Globs = Globs.trim(" \t\r\n\f\v");
}
-GlobList::GlobList(StringRef Globs, bool KeepNegativeGlobs /* =true */) {
- Items.reserve(Globs.count(',') + Globs.count('\n') + 1);
+GlobList::GlobList(StringRef Globs, bool KeepNegativeGlobs /* =true */)
+ : RawStringSize(0), UnmatchedResult(false) {
+ initialGlobTrim(Globs);
+ // Upperbound for the allocated size.
+ size_t MaxAllocSize = getMaxAllocSize(Globs);
+ char *Storage = Inline;
+ auto AddSizedItem = [this](size_t Size) {
+ assert(Size < std::numeric_limits<uint16_t>::max() && "Chunk too large");
+ Data.emplace_back();
+ Data.back().Size = Size;
+ };
+ bool HasMoreItems;
+ StringRef Trimmed;
do {
- GlobListItem Item;
- Item.IsPositive = !consumeNegativeIndicator(Globs);
- Item.Regex = consumeGlob(Globs);
- if (Item.IsPositive || KeepNegativeGlobs)
- Items.push_back(std::move(Item));
- } while (!Globs.empty());
+ GlobStartInfo S;
+ std::tie(Trimmed, HasMoreItems) = splitGlob(Globs);
+ S.IsPositive = !Trimmed.consume_front("-");
+ if (S.IsPositive || KeepNegativeGlobs) {
+ // The match-all glob means anything before it won't ever be read.
+ // Therefore we can stop building the glob if we reach this item.
+ // We also don't need to actually store this glob as we can just update
+ // the default result if no items match.
+ if (Trimmed == "*") {
+ UnmatchedResult = S.IsPositive;
+ return;
+ }
+ size_t StartIndex = Data.size();
+ Data.emplace_back();
+ if (Trimmed.consume_front("*"))
+ AddSizedItem(0);
+ const bool LastEmpty = Trimmed.consume_back("*");
+ // FIXME: This wouldn't support escaping the wildcard (\*), but that's
+ // not a known usecase for GlobList in clang-tidy.
+ for (StringRef Item : llvm::split(Trimmed, "*")) {
+ // Don't store empty strings here as they can only exist with sucessive
+ // wildcards(**)
+ if (Item.empty())
+ continue;
+ assert(RawStringSize + Item.size() <= MaxAllocSize);
+ // Lazily allocate out of line storage if needed.
+ if (isInlineStorage() &&
+ RawStringSize + Item.size() > NumInlineElements) {
+ // We cant store the Pointer directly in Ptr as it would clobber the
+ // Inline storage.
+ char *NewPtr = new char[MaxAllocSize];
+ ::memcpy(NewPtr, Inline, RawStringSize);
+ Ptr = NewPtr;
+ Storage = Ptr;
+ }
+ llvm::copy(Item, &Storage[RawStringSize]);
+ RawStringSize += Item.size();
+ AddSizedItem(Item.size());
+ }
+ if (LastEmpty)
+ AddSizedItem(0);
+ size_t ItemCount = Data.size() - StartIndex - 1;
+ assert(ItemCount < (1 << ItemBits) && "Too many items");
+ S.Items = ItemCount;
+ // The starting node goes at the end of the items, because we traverse
+ // the glob list in reverse order.
+ Data[StartIndex].StartInfo = S;
+ }
+ } while (HasMoreItems);
}
-bool GlobList::contains(StringRef S) const {
- // Iterating the container backwards as the last match determins if S is in
- // the list.
- for (const GlobListItem &Item : llvm::reverse(Items)) {
- if (Item.Regex.match(S))
- return Item.IsPositive;
+bool GlobList::isEquivalentTo(StringRef Globs, bool KeepNegativeGlobs) const {
+ initialGlobTrim(Globs);
+ bool HasMoreItems;
+ StringRef Trimmed;
+ auto Begin = Data.begin();
+ auto End = Data.end();
+ size_t CurSize = 0;
+ bool Error = false;
+ const char *Storage = getStorage();
+ do {
+ GlobStartInfo S;
+ std::tie(Trimmed, HasMoreItems) = splitGlob(Globs);
+ S.IsPositive = !Trimmed.consume_front("-");
+ if (S.IsPositive || KeepNegativeGlobs) {
+ // The match-all glob means anything before it won't ever be read.
+ if (Trimmed == "*") {
+ Error = UnmatchedResult != S.IsPositive;
+ break;
+ }
+ if (Begin == End || S.IsPositive != Begin->StartInfo.IsPositive)
+ return false;
+ size_t Items = 0;
+ size_t ExpectedItems = Begin->StartInfo.Items;
+ ++Begin;
+ auto EatItem = [&Items, ExpectedItems, &Begin](size_t Size = 0) {
+ return ++Items <= ExpectedItems && (Begin++)->Size == Size;
+ };
+
+ if (Trimmed.consume_front("*") && !EatItem())
+ return false;
+
+ const bool LastEmpty = Trimmed.consume_back("*");
+ for (llvm::StringRef Item : llvm::split(Trimmed, "*")) {
+ if (Item.empty())
+ continue;
+ assert(Item.size() + CurSize <= RawStringSize);
+ if (!EatItem(Item.size()) ||
+ memcmp(Item.data(), &Storage[CurSize], Item.size()) != 0)
+ return false;
+ CurSize += Item.size();
+ }
+ if ((LastEmpty && !EatItem()) || Items != ExpectedItems)
+ return false;
+ }
+ } while (HasMoreItems);
+ if (!Error && Begin == End) {
+ assert(CurSize == RawStringSize);
+ return true;
}
return false;
}
+GlobList::~GlobList() {
+ if (!isInlineStorage())
+ delete[] Ptr;
+}
+
+static bool matchGlobList(StringRef Source, llvm::ArrayRef<uint16_t> Offsets,
+ StringRef Match) {
+ assert(!Offsets.empty());
+ // If we only had one item originally, then we are working with an exact match
+ // with no wild cards.
+ if (Offsets.size() == 1) {
+ assert(Source.size() == Offsets.front());
+ return Match == Source;
+ }
+
+ if (!Match.consume_front(Source.take_front(Offsets.front())))
+ return false;
+ Source = Source.drop_front(Offsets.front());
+
+ for (uint16_t Offset : Offsets.drop_front().drop_back()) {
+ StringRef SearchItem = Source.take_front(Offset);
+ Source = Source.drop_front(Offset);
+ size_t Idx = Match.find(SearchItem);
+ if (Idx == StringRef::npos)
+ return false;
+ Match = Match.drop_front(Idx + Offset);
+ }
+
+ assert(Source.size() == Offsets.back());
+ // For the last item, we know previously there was a wildcard, so we only need
+ // to check for a suffix match.
+ return Match.endswith(Source);
+}
+
+bool GlobList::contains(StringRef S) const {
+ StringRef Source(getStorage(), RawStringSize);
+ ArrayRef<DataItem> Items(Data);
+ while (!Items.empty()) {
+ const GlobStartInfo &StartNode = Items.front().StartInfo;
+ Items = Items.drop_front();
+ assert(Items.size() >= StartNode.Items);
+ if (StartNode.Items == 0) {
+ if (S.empty())
+ return StartNode.IsPositive;
+ continue;
+ }
+ ArrayRef<DataItem> GlobItems = Items.take_front(StartNode.Items);
+ Items = Items.drop_front(StartNode.Items);
+ ArrayRef<uint16_t> Offsets =
+ llvm::makeArrayRef(&GlobItems.front().Size, StartNode.Items);
+ size_t Size = std::accumulate(Offsets.begin(), Offsets.end(), size_t(0U));
+ if (matchGlobList(Source.take_front(Size), Offsets, S))
+ return StartNode.IsPositive;
+ Source = Source.drop_front(Size);
+ }
+ return UnmatchedResult;
+}
+
+bool GlobList::isEquivalentTo(const GlobList &Other) const {
+ // Check the cheap to determine lengths first.
+ if (std::make_tuple(RawStringSize, UnmatchedResult, Data.size()) !=
+ std::make_tuple(Other.RawStringSize, Other.UnmatchedResult,
+ Other.Data.size()))
+ return false;
+ // Then check the elements, These are POD types so memcmp is safe for this.
+ if (memcmp(Data.data(), Other.Data.data(), Data.size() * sizeof(Data[0])) !=
+ 0)
+ return false;
+ return strncmp(getStorage(), Other.getStorage(), RawStringSize) == 0;
+}
+
+void GlobList::print(raw_ostream &OS) const {
+ StringRef Source(getStorage(), RawStringSize);
+ // For ease of matching, we store the globs in reverse order to what they are
+ // written in. Therefore when we print the glob, we need to reverse the order
+ // they are stored in.
+ llvm::SmallVector<std::pair<const DataItem *, size_t>> RevPrint;
+ ArrayRef<DataItem> Items(Data);
+ size_t Cur = 0;
+ while (!Items.empty()) {
+ const DataItem *Start = Items.begin();
+ RevPrint.emplace_back(Start, Cur);
+ ArrayRef<uint16_t> Offsets(&Start[1].Size, Start->StartInfo.Items);
+ Items = Items.drop_front(1 + Start->StartInfo.Items);
+ Cur += std::accumulate(Offsets.begin(), Offsets.end(), 0U);
+ }
+ if (UnmatchedResult)
+ OS << "*";
+ else if (RevPrint.empty())
+ OS << "-*";
+ bool Sep = UnmatchedResult;
+ for (const auto &Item : llvm::reverse(RevPrint)) {
+ if (Sep)
+ OS << ',';
+ Sep = true;
+ if (!Item.first->StartInfo.IsPositive)
+ OS << '-';
+ ArrayRef<uint16_t> Offsets(&Item.first[1].Size,
+ Item.first->StartInfo.Items);
+ if (Offsets.empty())
+ continue;
+ OS << Source.substr(Item.second, Offsets.front());
+ size_t NextOff = Item.second + Offsets.front();
+ for (uint16_t Item : Offsets.drop_front()) {
+ OS << '*' << Source.substr(NextOff, Item);
+ NextOff += Item;
+ }
+ }
+}
+
+bool GlobList::isInlineStorage() const {
+ return RawStringSize <= NumInlineElements;
+}
+
+char *GlobList::getStorage() { return isInlineStorage() ? Inline : Ptr; }
+
+const char *GlobList::getStorage() const {
+ return const_cast<GlobList *>(this)->getStorage();
+}
+
bool CachedGlobList::contains(StringRef S) const {
auto Entry = Cache.try_emplace(S);
bool &Value = Entry.first->getValue();
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits