https://github.com/higher-performance updated https://github.com/llvm/llvm-project/pull/129934
>From 9f12c5856691268177e748d2a98de24ee30bdfd1 Mon Sep 17 00:00:00 2001 From: higher-performance <higher.performance.git...@gmail.com> Date: Wed, 5 Mar 2025 15:49:06 -0500 Subject: [PATCH] Reduce memory usage in AST parent map generation by lazily checking if nodes have been seen --- clang/lib/AST/ParentMapContext.cpp | 50 ++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/ParentMapContext.cpp b/clang/lib/AST/ParentMapContext.cpp index e9387ec79c373..b7156b892b01a 100644 --- a/clang/lib/AST/ParentMapContext.cpp +++ b/clang/lib/AST/ParentMapContext.cpp @@ -60,6 +60,29 @@ class ParentMapContext::ParentMap { template <typename, typename...> friend struct ::MatchParents; + template <class T> struct IndirectDenseMapInfo { + using Ptr = T *; + using Base = llvm::DenseMapInfo<std::remove_cv_t<T>>; + static inline Ptr getEmptyKey() { + return static_cast<Ptr>(llvm::DenseMapInfo<void *>::getEmptyKey()); + } + static inline Ptr getTombstoneKey() { + return static_cast<Ptr>(llvm::DenseMapInfo<void *>::getTombstoneKey()); + } + static unsigned getHashValue(Ptr Val) { + return Val == getEmptyKey() || Val == getTombstoneKey() + ? 0 + : Base::getHashValue(*Val); + } + static bool isEqual(Ptr LHS, Ptr RHS) { + if (LHS == getEmptyKey() || LHS == getTombstoneKey() || + RHS == getEmptyKey() || RHS == getTombstoneKey()) { + return LHS == RHS; + } + return Base::isEqual(*LHS, *RHS); + } + }; + /// Contains parents of a node. class ParentVector { public: @@ -70,16 +93,37 @@ class ParentMapContext::ParentMap { push_back(Value); } bool contains(const DynTypedNode &Value) { - return Seen.contains(Value); + assert(Value.getMemoizationData()); + bool found = FragileLazySeenCache.contains(&Value); + while (!found && ItemsProcessed < Items.size()) { + found |= FragileLazySeenCache.insert(&Items[ItemsProcessed]).second; + ++ItemsProcessed; + } + return found; } void push_back(const DynTypedNode &Value) { - if (!Value.getMemoizationData() || Seen.insert(Value).second) + if (!Value.getMemoizationData() || !contains(Value)) { + const size_t OldCapacity = Items.capacity(); Items.push_back(Value); + if (OldCapacity != Items.capacity()) { + // Pointers are invalidated; remove them. + ItemsProcessed = 0; + // Free memory to avoid doubling peak memory usage during rehashing + FragileLazySeenCache.clear(); + } + } } llvm::ArrayRef<DynTypedNode> view() const { return Items; } private: + // BE CAREFUL. Pointers into this container are stored in the cache. llvm::SmallVector<DynTypedNode, 2> Items; - llvm::SmallDenseSet<DynTypedNode, 2> Seen; + // This cache is fragile because it contains pointers that are invalidated + // when the vector capacity changes. + llvm::SmallDenseSet<const DynTypedNode *, 2, + IndirectDenseMapInfo<const DynTypedNode>> + FragileLazySeenCache; + // Lazily tracks which items have been processed for the cache. + size_t ItemsProcessed = 0; }; /// Maps from a node to its parents. This is used for nodes that have _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits