https://github.com/higher-performance updated https://github.com/llvm/llvm-project/pull/129934
>From bcdf8831a7727bb9f6be712e37fafdccf181f1c5 Mon Sep 17 00:00:00 2001 From: higher-performance <higher.performance.git...@gmail.com> Date: Wed, 5 Mar 2025 15:49:06 -0500 Subject: [PATCH] Reduce memory usage in AST parent map generation by lazily checking if nodes have been seen --- clang/lib/AST/ParentMapContext.cpp | 52 ++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/ParentMapContext.cpp b/clang/lib/AST/ParentMapContext.cpp index e9387ec79c373..afbf961d94629 100644 --- a/clang/lib/AST/ParentMapContext.cpp +++ b/clang/lib/AST/ParentMapContext.cpp @@ -60,6 +60,30 @@ class ParentMapContext::ParentMap { template <typename, typename...> friend struct ::MatchParents; + template <class T> struct IndirectDenseMapInfo { + using Ptr = T *; + using Base = llvm::DenseMapInfo<std::remove_cv_t<T>>; + static inline Ptr getEmptyKey() { + return static_cast<Ptr>(llvm::DenseMapInfo<void *>::getEmptyKey()); + } + static inline Ptr getTombstoneKey() { + return static_cast<Ptr>(llvm::DenseMapInfo<void *>::getTombstoneKey()); + } + static unsigned getHashValue(Ptr Val) { + return Val == getEmptyKey() || Val == getTombstoneKey() + ? 0 + : Base::getHashValue(*Val); + } + static bool isEqual(Ptr LHS, Ptr RHS) { + if (LHS == getEmptyKey() || LHS == getTombstoneKey() || + RHS == getEmptyKey() || RHS == getTombstoneKey()) { + return LHS == RHS; + } + return Base::isEqual(*LHS, *RHS); + } + }; + using MapInfo = IndirectDenseMapInfo<const DynTypedNode>; + /// Contains parents of a node. class ParentVector { public: @@ -70,16 +94,38 @@ class ParentMapContext::ParentMap { push_back(Value); } bool contains(const DynTypedNode &Value) { - return Seen.contains(Value); + assert(Value.getMemoizationData()); + bool found = FragileLazySeenCache.contains(&Value); + while (!found && ItemsProcessed < Items.size()) { + const auto it = Items.begin() + ItemsProcessed; + found |= MapInfo::isEqual(&*it, &Value); + FragileLazySeenCache.insert(&*it); + ++ItemsProcessed; + } + return found; } void push_back(const DynTypedNode &Value) { - if (!Value.getMemoizationData() || Seen.insert(Value).second) + if (!Value.getMemoizationData() || !contains(Value)) { + const size_t OldCapacity = Items.capacity(); Items.push_back(Value); + if (OldCapacity != Items.capacity()) { + // Pointers are invalidated; remove them. + ItemsProcessed = 0; + // Free memory to avoid doubling peak memory usage during rehashing + FragileLazySeenCache.clear(); + } + } } llvm::ArrayRef<DynTypedNode> view() const { return Items; } private: + // BE CAREFUL. Pointers into this container are stored in the + // `FragileLazySeenCache` set below. llvm::SmallVector<DynTypedNode, 2> Items; - llvm::SmallDenseSet<DynTypedNode, 2> Seen; + // This cache is fragile because it contains pointers that are invalidated + // when the vector capacity changes. + llvm::SmallDenseSet<const DynTypedNode *, 2, MapInfo> FragileLazySeenCache; + // Lazily tracks which items have been processed for the cache. + size_t ItemsProcessed = 0; }; /// Maps from a node to its parents. This is used for nodes that have _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits