sammccall created this revision.
sammccall added a reviewer: hokein.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.
sammccall requested review of this revision.

A recent change increased the stack size of memoizedMatchesAncestorOfRecursively
leading to stack overflows on real code involving large fold expressions.
It's not totally unreasonable to choke on very deep ASTs, but as common
infrastructure it's be nice if ASTMatchFinder is more robust.
(It already uses data recursion for the regular "downward" traversal.)


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D86964

Files:
  clang/lib/ASTMatchers/ASTMatchFinder.cpp

Index: clang/lib/ASTMatchers/ASTMatchFinder.cpp
===================================================================
--- clang/lib/ASTMatchers/ASTMatchFinder.cpp
+++ clang/lib/ASTMatchers/ASTMatchFinder.cpp
@@ -544,8 +544,9 @@
     // don't invalidate any iterators.
     if (ResultCache.size() > MaxMemoizationEntries)
       ResultCache.clear();
-    return memoizedMatchesAncestorOfRecursively(Node, Ctx, Matcher, Builder,
-                                                MatchMode);
+    if (MatchMode == AncestorMatchMode::AMM_ParentOnly)
+      return matchesParentOf(Node, Matcher, Builder);
+    return matchesAnyAncestorOf(Node, Ctx, Matcher, Builder);
   }
 
   // Matches all registered matchers on the given node and calls the
@@ -699,9 +700,23 @@
   void matchDispatch(const void *) { /* Do nothing. */ }
   /// @}
 
+  // Returns whether a direct parent of \p Node matches \p Matcher.
+  // Unlike matchesAnyAncestorOf there's no memoization: it doesn't save much.
+  bool matchesParentOf(const DynTypedNode &Node, const DynTypedMatcher &Matcher,
+                       BoundNodesTreeBuilder *Builder) {
+    for (const auto &Parent : ActiveASTContext->getParents(Node)) {
+      BoundNodesTreeBuilder BuilderCopy = *Builder;
+      if (Matcher.matches(Parent, this, &BuilderCopy)) {
+        *Builder = std::move(BuilderCopy);
+        return true;
+      }
+    }
+    return false;
+  }
+
   // Returns whether an ancestor of \p Node matches \p Matcher.
   //
-  // The order of matching ((which can lead to different nodes being bound in
+  // The order of matching (which can lead to different nodes being bound in
   // case there are multiple matches) is breadth first search.
   //
   // To allow memoization in the very common case of having deeply nested
@@ -712,51 +727,62 @@
   // Once there are multiple parents, the breadth first search order does not
   // allow simple memoization on the ancestors. Thus, we only memoize as long
   // as there is a single parent.
-  bool memoizedMatchesAncestorOfRecursively(const DynTypedNode &Node,
-                                            ASTContext &Ctx,
-                                            const DynTypedMatcher &Matcher,
-                                            BoundNodesTreeBuilder *Builder,
-                                            AncestorMatchMode MatchMode) {
-    // For AST-nodes that don't have an identity, we can't memoize.
-    // When doing a single-level match, we don't need to memoize because
-    // ParentMap (in ASTContext) already memoizes the result.
-    if (!Builder->isComparable() ||
-        MatchMode == AncestorMatchMode::AMM_ParentOnly)
-      return matchesAncestorOfRecursively(Node, Ctx, Matcher, Builder,
-                                          MatchMode);
-
-    MatchKey Key;
-    Key.MatcherID = Matcher.getID();
-    Key.Node = Node;
-    Key.BoundNodes = *Builder;
-    Key.Traversal = Ctx.getParentMapContext().getTraversalKind();
-    Key.Type = MatchType::Ancestors;
-
-    // Note that we cannot use insert and reuse the iterator, as recursive
-    // calls to match might invalidate the result cache iterators.
-    MemoizationMap::iterator I = ResultCache.find(Key);
-    if (I != ResultCache.end()) {
-      *Builder = I->second.Nodes;
-      return I->second.ResultOfMatch;
-    }
+  //
+  // We avoid a recursive implementation to prevent excessive stack use on
+  // very deep ASTs (similarly to RecursiveASTVisitor's data recursion).
+  bool matchesAnyAncestorOf(DynTypedNode Node, ASTContext &Ctx,
+                            const DynTypedMatcher &Matcher,
+                            BoundNodesTreeBuilder *Builder) {
 
-    MemoizedMatchResult Result;
-    Result.Nodes = *Builder;
-    Result.ResultOfMatch = matchesAncestorOfRecursively(
-        Node, Ctx, Matcher, &Result.Nodes, MatchMode);
+    // Memoization keys that must be updated with the result.
+    std::vector<MatchKey> Keys;
+    // When returning, update the memoization cache.
+    auto Finish = [&](bool Result) {
+      for (const auto &Key : Keys) {
+        MemoizedMatchResult &CachedResult = ResultCache[Key];
+        CachedResult.ResultOfMatch = Result;
+        CachedResult.Nodes = *Builder;
+      }
+      return Result;
+    };
+
+    // Loop while there's a single parent and we want to attempt memoization.
+    DynTypedNodeList Parents{ArrayRef<DynTypedNode>()}; // after loop: size != 1
+    for (;;) {
+      // A cache key only makes sense if memoization is possible.
+      if (Builder->isComparable()) {
+        Keys.emplace_back();
+        Keys.back().MatcherID = Matcher.getID();
+        Keys.back().Node = Node;
+        Keys.back().BoundNodes = *Builder;
+        Keys.back().Traversal = Ctx.getParentMapContext().getTraversalKind();
+        Keys.back().Type = MatchType::Ancestors;
+
+        // Check the cache.
+        MemoizationMap::iterator I = ResultCache.find(Keys.back());
+        if (I != ResultCache.end()) {
+          Keys.pop_back(); // Don't populate the cache for the matching node!
+          *Builder = I->second.Nodes;
+          return Finish(I->second.ResultOfMatch);
+        }
+      }
 
-    MemoizedMatchResult &CachedResult = ResultCache[Key];
-    CachedResult = std::move(Result);
+      Parents = ActiveASTContext->getParents(Node);
+      // Either no parents or multiple parents: leave chain+memoize mode and
+      // enter bfs+forgetful mode.
+      if (Parents.size() != 1)
+        break;
 
-    *Builder = CachedResult.Nodes;
-    return CachedResult.ResultOfMatch;
-  }
+      // Check the next parent.
+      Node = *Parents.begin();
+      BoundNodesTreeBuilder BuilderCopy = *Builder;
+      if (Matcher.matches(Node, this, &BuilderCopy)) {
+        *Builder = std::move(BuilderCopy);
+        return Finish(true);
+      }
+    }
+    // We reached the end of the chain.
 
-  bool matchesAncestorOfRecursively(const DynTypedNode &Node, ASTContext &Ctx,
-                                    const DynTypedMatcher &Matcher,
-                                    BoundNodesTreeBuilder *Builder,
-                                    AncestorMatchMode MatchMode) {
-    const auto &Parents = ActiveASTContext->getParents(Node);
     if (Parents.empty()) {
       // Nodes may have no parents if:
       //  a) the node is the TranslationUnitDecl
@@ -775,46 +801,29 @@
         llvm_unreachable("Parent map should be complete!");
       }
 #endif
-      return false;
-    }
-    if (Parents.size() == 1) {
-      // Only one parent - do recursive memoization.
-      const DynTypedNode Parent = Parents[0];
-      BoundNodesTreeBuilder BuilderCopy = *Builder;
-      if (Matcher.matches(Parent, this, &BuilderCopy)) {
-        *Builder = std::move(BuilderCopy);
-        return true;
-      }
-      if (MatchMode != ASTMatchFinder::AMM_ParentOnly) {
-        return memoizedMatchesAncestorOfRecursively(Parent, Ctx, Matcher,
-                                                    Builder, MatchMode);
-        // Once we get back from the recursive call, the result will be the
-        // same as the parent's result.
-      }
     } else {
-      // Multiple parents - BFS over the rest of the nodes.
-      llvm::DenseSet<const void *> Visited;
+      // BFS starting from the parents not yet considered.
+      // Memoization of newly visited nodes is not possible (but we still update
+      // results for the elements in the chain we found above).
       std::deque<DynTypedNode> Queue(Parents.begin(), Parents.end());
+      llvm::DenseSet<const void *> Visited;
       while (!Queue.empty()) {
         BoundNodesTreeBuilder BuilderCopy = *Builder;
         if (Matcher.matches(Queue.front(), this, &BuilderCopy)) {
           *Builder = std::move(BuilderCopy);
-          return true;
+          return Finish(true);
         }
-        if (MatchMode != ASTMatchFinder::AMM_ParentOnly) {
-          for (const auto &Parent :
-               ActiveASTContext->getParents(Queue.front())) {
-            // Make sure we do not visit the same node twice.
-            // Otherwise, we'll visit the common ancestors as often as there
-            // are splits on the way down.
-            if (Visited.insert(Parent.getMemoizationData()).second)
-              Queue.push_back(Parent);
-          }
+        for (const auto &Parent : ActiveASTContext->getParents(Queue.front())) {
+          // Make sure we do not visit the same node twice.
+          // Otherwise, we'll visit the common ancestors as often as there
+          // are splits on the way down.
+          if (Visited.insert(Parent.getMemoizationData()).second)
+            Queue.push_back(Parent);
         }
         Queue.pop_front();
       }
     }
-    return false;
+    return Finish(false);
   }
 
   // Implements a BoundNodesTree::Visitor that calls a MatchCallback with
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to