Author: Duncan P. N. Exon Smith Date: 2021-01-21T12:11:41-08:00 New Revision: d7ff0036463fbf049a240fe3792fcfcd8081c41e
URL: https://github.com/llvm/llvm-project/commit/d7ff0036463fbf049a240fe3792fcfcd8081c41e DIFF: https://github.com/llvm/llvm-project/commit/d7ff0036463fbf049a240fe3792fcfcd8081c41e.diff LOG: ADT: Fix reference invalidation in SmallVector::emplace_back and assign(N,V) This fixes the final (I think?) reference invalidation in `SmallVector` that we need to fix to align with `std::vector`. (There is still some left in the range insert / append / assign, but the standard calls that UB for `std::vector` so I think we don't care?) For POD-like types, reimplement `emplace_back()` in terms of `push_back()`, taking a copy even for large `T` rather than lose the realloc optimization in `grow_pod()`. For other types, split the grow operation in three and construct the new element in the middle. - `mallocForGrow()` calculates the new capacity and returns the result of `safe_malloc()`. We only need a single definition per `SmallVectorBase` so this is defined in SmallVector.cpp to avoid code size bloat. Moving this part of non-POD grow to the source file also allows the logic to be easily shared with `grow_pod`, and `report_size_overflow()` and `report_at_maximum_capacity()` can move there too. - `moveElementsForGrow()` moves elements from the old to the new allocation. - `takeAllocationForGrow()` frees the old allocation and saves the new allocation and capacity . `SmallVector:assign(size_type, const T&)` also uses the split-grow operations for non-POD, but it also has a semantic change when not growing. Previously, assign would start with `clear()`, and so the old elements were destructed and all elements of the new vector were copy-constructed (potentially invalidating references). The new implementation skips destruction and uses copy-assignment for the prefix of the new vector that fits. The new semantics match what libc++ does for `std::vector::assign()`. Note that the following is another possible implementation: ``` void assign(size_type NumElts, ValueParamT Elt) { std::fill_n(this->begin(), std::min(NumElts, this->size()), Elt); this->resize(NumElts, Elt); } ``` The downside of this simpler implementation is that if the vector has to grow there will be `size()` redundant copy operations. (I had planned on splitting this patch up into three for committing (after getting performance numbers / initial review), but I've realized that if this does for some reason need to be reverted we'll probably want to revert the whole package...) Differential Revision: https://reviews.llvm.org/D94739 Added: Modified: llvm/include/llvm/ADT/SmallVector.h llvm/lib/Support/SmallVector.cpp llvm/unittests/ADT/SmallVectorTest.cpp Removed: ################################################################################ diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index 2e47846ee7cf..dd72937c19e2 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -56,18 +56,16 @@ template <class Size_T> class SmallVectorBase { SmallVectorBase(void *FirstEl, size_t TotalCapacity) : BeginX(FirstEl), Capacity(TotalCapacity) {} + /// This is a helper for \a grow() that's out of line to reduce code + /// duplication. This function will report a fatal error if it can't grow at + /// least to \p MinSize. + void *mallocForGrow(size_t MinSize, size_t TSize, size_t &NewCapacity); + /// This is an implementation of the grow() method which only works /// on POD-like data types and is out of line to reduce code duplication. /// This function will report a fatal error if it cannot increase capacity. void grow_pod(void *FirstEl, size_t MinSize, size_t TSize); - /// Report that MinSize doesn't fit into this vector's size type. Throws - /// std::length_error or calls report_fatal_error. - LLVM_ATTRIBUTE_NORETURN static void report_size_overflow(size_t MinSize); - /// Report that this vector is already at maximum capacity. Throws - /// std::length_error or calls report_fatal_error. - LLVM_ATTRIBUTE_NORETURN static void report_at_maximum_capacity(); - public: size_t size() const { return Size; } size_t capacity() const { return Capacity; } @@ -211,15 +209,6 @@ class SmallVectorTemplateCommon bool> = false> void assertSafeToAddRange(ItTy, ItTy) {} - /// Check whether any argument will be invalidated by growing for - /// emplace_back. - template <class ArgType1, class... ArgTypes> - void assertSafeToEmplace(ArgType1 &Arg1, ArgTypes &... Args) { - this->assertSafeToAdd(&Arg1); - this->assertSafeToEmplace(Args...); - } - void assertSafeToEmplace() {} - /// Reserve enough space to add one element, and return the updated element /// pointer in case it was a reference to the storage. template <class U> @@ -359,6 +348,21 @@ class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> { /// element, or MinSize more elements if specified. void grow(size_t MinSize = 0); + /// Create a new allocation big enough for \p MinSize and pass back its size + /// in \p NewCapacity. This is the first section of \a grow(). + T *mallocForGrow(size_t MinSize, size_t &NewCapacity) { + return static_cast<T *>( + SmallVectorBase<SmallVectorSizeType<T>>::mallocForGrow( + MinSize, sizeof(T), NewCapacity)); + } + + /// Move existing elements over to the new allocation \p NewElts, the middle + /// section of \a grow(). + void moveElementsForGrow(T *NewElts); + + /// Transfer ownership of the allocation, finishing up \a grow(). + void takeAllocationForGrow(T *NewElts, size_t NewCapacity); + /// Reserve enough space to add one element, and return the updated element /// pointer in case it was a reference to the storage. const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) { @@ -375,6 +379,27 @@ class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> { static T &&forward_value_param(T &&V) { return std::move(V); } static const T &forward_value_param(const T &V) { return V; } + void growAndAssign(size_t NumElts, const T &Elt) { + // Grow manually in case Elt is an internal reference. + size_t NewCapacity; + T *NewElts = mallocForGrow(NumElts, NewCapacity); + std::uninitialized_fill_n(NewElts, NumElts, Elt); + this->destroy_range(this->begin(), this->end()); + takeAllocationForGrow(NewElts, NewCapacity); + this->set_size(NumElts); + } + + template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) { + // Grow manually in case one of Args is an internal reference. + size_t NewCapacity; + T *NewElts = mallocForGrow(0, NewCapacity); + ::new ((void *)(NewElts + this->size())) T(std::forward<ArgTypes>(Args)...); + moveElementsForGrow(NewElts); + takeAllocationForGrow(NewElts, NewCapacity); + this->set_size(this->size() + 1); + return this->back(); + } + public: void push_back(const T &Elt) { const T *EltPtr = reserveForParamAndGetAddress(Elt); @@ -397,29 +422,27 @@ class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> { // Define this out-of-line to dissuade the C++ compiler from inlining it. template <typename T, bool TriviallyCopyable> void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) { - // Ensure we can fit the new capacity. - // This is only going to be applicable when the capacity is 32 bit. - if (MinSize > this->SizeTypeMax()) - this->report_size_overflow(MinSize); - - // Ensure we can meet the guarantee of space for at least one more element. - // The above check alone will not catch the case where grow is called with a - // default MinSize of 0, but the current capacity cannot be increased. - // This is only going to be applicable when the capacity is 32 bit. - if (this->capacity() == this->SizeTypeMax()) - this->report_at_maximum_capacity(); - - // Always grow, even from zero. - size_t NewCapacity = size_t(NextPowerOf2(this->capacity() + 2)); - NewCapacity = std::min(std::max(NewCapacity, MinSize), this->SizeTypeMax()); - T *NewElts = static_cast<T*>(llvm::safe_malloc(NewCapacity*sizeof(T))); + size_t NewCapacity; + T *NewElts = mallocForGrow(MinSize, NewCapacity); + moveElementsForGrow(NewElts); + takeAllocationForGrow(NewElts, NewCapacity); +} +// Define this out-of-line to dissuade the C++ compiler from inlining it. +template <typename T, bool TriviallyCopyable> +void SmallVectorTemplateBase<T, TriviallyCopyable>::moveElementsForGrow( + T *NewElts) { // Move the elements over. this->uninitialized_move(this->begin(), this->end(), NewElts); // Destroy the original elements. destroy_range(this->begin(), this->end()); +} +// Define this out-of-line to dissuade the C++ compiler from inlining it. +template <typename T, bool TriviallyCopyable> +void SmallVectorTemplateBase<T, TriviallyCopyable>::takeAllocationForGrow( + T *NewElts, size_t NewCapacity) { // If this wasn't grown from the inline copy, deallocate the old space. if (!this->isSmall()) free(this->begin()); @@ -502,6 +525,23 @@ class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> { /// Copy \p V or return a reference, depending on \a ValueParamT. static ValueParamT forward_value_param(ValueParamT V) { return V; } + void growAndAssign(size_t NumElts, T Elt) { + // Elt has been copied in case it's an internal reference, side-stepping + // reference invalidation problems without losing the realloc optimization. + this->set_size(0); + this->grow(NumElts); + std::uninitialized_fill_n(this->begin(), NumElts, Elt); + this->set_size(NumElts); + } + + template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) { + // Use push_back with a copy in case Args has an internal reference, + // side-stepping reference invalidation problems without losing the realloc + // optimization. + push_back(T(std::forward<ArgTypes>(Args)...)); + return this->back(); + } + public: void push_back(ValueParamT Elt) { const T *EltPtr = reserveForParamAndGetAddress(Elt); @@ -624,17 +664,25 @@ class SmallVectorImpl : public SmallVectorTemplateBase<T> { append(IL.begin(), IL.end()); } - // FIXME: Consider assigning over existing elements, rather than clearing & - // re-initializing them - for all assign(...) variants. + void assign(size_type NumElts, ValueParamT Elt) { + // Note that Elt could be an internal reference. + if (NumElts > this->capacity()) { + this->growAndAssign(NumElts, Elt); + return; + } - void assign(size_type NumElts, const T &Elt) { - this->assertSafeToReferenceAfterResize(&Elt, 0); - clear(); - this->reserve(NumElts); + // Assign over existing elements. + std::fill_n(this->begin(), std::min(NumElts, this->size()), Elt); + if (NumElts > this->size()) + std::uninitialized_fill_n(this->end(), NumElts - this->size(), Elt); + else if (NumElts < this->size()) + this->destroy_range(this->begin() + NumElts, this->end()); this->set_size(NumElts); - std::uninitialized_fill(this->begin(), this->end(), Elt); } + // FIXME: Consider assigning over existing elements, rather than clearing & + // re-initializing them - for all assign(...) variants. + template <typename in_iter, typename = std::enable_if_t<std::is_convertible< typename std::iterator_traits<in_iter>::iterator_category, @@ -854,9 +902,9 @@ class SmallVectorImpl : public SmallVectorTemplateBase<T> { } template <typename... ArgTypes> reference emplace_back(ArgTypes &&... Args) { - this->assertSafeToEmplace(Args...); if (LLVM_UNLIKELY(this->size() >= this->capacity())) - this->grow(); + return this->growAndEmplaceBack(std::forward<ArgTypes>(Args)...); + ::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...); this->set_size(this->size() + 1); return this->back(); diff --git a/llvm/lib/Support/SmallVector.cpp b/llvm/lib/Support/SmallVector.cpp index 7e3d01fe6864..0005f7840912 100644 --- a/llvm/lib/Support/SmallVector.cpp +++ b/llvm/lib/Support/SmallVector.cpp @@ -45,12 +45,15 @@ static_assert(sizeof(SmallVector<char, 0>) == sizeof(void *) * 2 + sizeof(void *), "1 byte elements have word-sized type for size and capacity"); -template <class Size_T> -void SmallVectorBase<Size_T>::report_size_overflow(size_t MinSize) { +/// Report that MinSize doesn't fit into this vector's size type. Throws +/// std::length_error or calls report_fatal_error. +LLVM_ATTRIBUTE_NORETURN +static void report_size_overflow(size_t MinSize, size_t MaxSize); +static void report_size_overflow(size_t MinSize, size_t MaxSize) { std::string Reason = "SmallVector unable to grow. Requested capacity (" + std::to_string(MinSize) + ") is larger than maximum value for size type (" + - std::to_string(SizeTypeMax()) + ")"; + std::to_string(MaxSize) + ")"; #ifdef LLVM_ENABLE_EXCEPTIONS throw std::length_error(Reason); #else @@ -58,10 +61,13 @@ void SmallVectorBase<Size_T>::report_size_overflow(size_t MinSize) { #endif } -template <class Size_T> void SmallVectorBase<Size_T>::report_at_maximum_capacity() { +/// Report that this vector is already at maximum capacity. Throws +/// std::length_error or calls report_fatal_error. +LLVM_ATTRIBUTE_NORETURN static void report_at_maximum_capacity(size_t MaxSize); +static void report_at_maximum_capacity(size_t MaxSize) { std::string Reason = "SmallVector capacity unable to grow. Already at maximum size " + - std::to_string(SizeTypeMax()); + std::to_string(MaxSize); #ifdef LLVM_ENABLE_EXCEPTIONS throw std::length_error(Reason); #else @@ -71,25 +77,40 @@ template <class Size_T> void SmallVectorBase<Size_T>::report_at_maximum_capacity // Note: Moving this function into the header may cause performance regression. template <class Size_T> -void SmallVectorBase<Size_T>::grow_pod(void *FirstEl, size_t MinSize, - size_t TSize) { +static size_t getNewCapacity(size_t MinSize, size_t TSize, size_t OldCapacity) { + constexpr size_t MaxSize = std::numeric_limits<Size_T>::max(); + // Ensure we can fit the new capacity. // This is only going to be applicable when the capacity is 32 bit. - if (MinSize > SizeTypeMax()) - report_size_overflow(MinSize); + if (MinSize > MaxSize) + report_size_overflow(MinSize, MaxSize); // Ensure we can meet the guarantee of space for at least one more element. // The above check alone will not catch the case where grow is called with a // default MinSize of 0, but the current capacity cannot be increased. // This is only going to be applicable when the capacity is 32 bit. - if (capacity() == SizeTypeMax()) - report_at_maximum_capacity(); + if (OldCapacity == MaxSize) + report_at_maximum_capacity(MaxSize); // In theory 2*capacity can overflow if the capacity is 64 bit, but the // original capacity would never be large enough for this to be a problem. - size_t NewCapacity = 2 * capacity() + 1; // Always grow. - NewCapacity = std::min(std::max(NewCapacity, MinSize), SizeTypeMax()); + size_t NewCapacity = 2 * OldCapacity + 1; // Always grow. + return std::min(std::max(NewCapacity, MinSize), MaxSize); +} +// Note: Moving this function into the header may cause performance regression. +template <class Size_T> +void *SmallVectorBase<Size_T>::mallocForGrow(size_t MinSize, size_t TSize, + size_t &NewCapacity) { + NewCapacity = getNewCapacity<Size_T>(MinSize, TSize, this->capacity()); + return llvm::safe_malloc(NewCapacity * TSize); +} + +// Note: Moving this function into the header may cause performance regression. +template <class Size_T> +void SmallVectorBase<Size_T>::grow_pod(void *FirstEl, size_t MinSize, + size_t TSize) { + size_t NewCapacity = getNewCapacity<Size_T>(MinSize, TSize, this->capacity()); void *NewElts; if (BeginX == FirstEl) { NewElts = safe_malloc(NewCapacity * TSize); diff --git a/llvm/unittests/ADT/SmallVectorTest.cpp b/llvm/unittests/ADT/SmallVectorTest.cpp index 1f6c7db99fa8..b2cccc1a1e56 100644 --- a/llvm/unittests/ADT/SmallVectorTest.cpp +++ b/llvm/unittests/ADT/SmallVectorTest.cpp @@ -1179,16 +1179,41 @@ TYPED_TEST(SmallVectorReferenceInvalidationTest, AppendRange) { } TYPED_TEST(SmallVectorReferenceInvalidationTest, Assign) { + // Note: setup adds [1, 2, ...] to V until it's at capacity in small mode. auto &V = this->V; (void)V; -#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST - // Regardless of capacity, assign should never reference an internal element. - EXPECT_DEATH(V.assign(1, V.back()), this->AssertionMessage); - EXPECT_DEATH(V.assign(this->NumBuiltinElts(V), V.back()), - this->AssertionMessage); - EXPECT_DEATH(V.assign(this->NumBuiltinElts(V) + 1, V.back()), - this->AssertionMessage); -#endif + int N = this->NumBuiltinElts(V); + ASSERT_EQ(unsigned(N), V.size()); + ASSERT_EQ(unsigned(N), V.capacity()); + + // Check assign that shrinks in small mode. + V.assign(1, V.back()); + EXPECT_EQ(1u, V.size()); + EXPECT_EQ(N, V[0]); + + // Check assign that grows within small mode. + ASSERT_LT(V.size(), V.capacity()); + V.assign(V.capacity(), V.back()); + for (int I = 0, E = V.size(); I != E; ++I) { + EXPECT_EQ(N, V[I]); + + // Reset to [1, 2, ...]. + V[I] = I + 1; + } + + // Check assign that grows to large mode. + ASSERT_EQ(2, V[1]); + V.assign(V.capacity() + 1, V[1]); + for (int I = 0, E = V.size(); I != E; ++I) { + EXPECT_EQ(2, V[I]); + + // Reset to [1, 2, ...]. + V[I] = I + 1; + } + + // Check assign that shrinks in large mode. + V.assign(1, V[1]); + EXPECT_EQ(2, V[0]); } TYPED_TEST(SmallVectorReferenceInvalidationTest, AssignRange) { @@ -1289,11 +1314,25 @@ TYPED_TEST(SmallVectorReferenceInvalidationTest, InsertRange) { } TYPED_TEST(SmallVectorReferenceInvalidationTest, EmplaceBack) { + // Note: setup adds [1, 2, ...] to V until it's at capacity in small mode. auto &V = this->V; - (void)V; -#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST - EXPECT_DEATH(V.emplace_back(V.back()), this->AssertionMessage); -#endif + int N = this->NumBuiltinElts(V); + + // Push back a reference to last element when growing from small storage. + V.emplace_back(V.back()); + EXPECT_EQ(N, V.back()); + + // Check that the old value is still there (not moved away). + EXPECT_EQ(N, V[V.size() - 2]); + + // Fill storage again. + V.back() = V.size(); + while (V.size() < V.capacity()) + V.push_back(V.size() + 1); + + // Push back a reference to last element when growing from large storage. + V.emplace_back(V.back()); + EXPECT_EQ(int(V.size()) - 1, V.back()); } template <class VectorT> @@ -1315,25 +1354,42 @@ class SmallVectorInternalReferenceInvalidationTest SmallVectorTestBase::SetUp(); // Fill up the small size so that insertions move the elements. - V.push_back(std::make_pair(0, 0)); + for (int I = 0, E = NumBuiltinElts(V); I != E; ++I) + V.emplace_back(I + 1, I + 1); } }; // Test pairs of the same types from SmallVectorReferenceInvalidationTestTypes. using SmallVectorInternalReferenceInvalidationTestTypes = - ::testing::Types<SmallVector<std::pair<int, int>, 1>, - SmallVector<std::pair<Constructable, Constructable>, 1>>; + ::testing::Types<SmallVector<std::pair<int, int>, 3>, + SmallVector<std::pair<Constructable, Constructable>, 3>>; TYPED_TEST_CASE(SmallVectorInternalReferenceInvalidationTest, SmallVectorInternalReferenceInvalidationTestTypes); TYPED_TEST(SmallVectorInternalReferenceInvalidationTest, EmplaceBack) { + // Note: setup adds [1, 2, ...] to V until it's at capacity in small mode. auto &V = this->V; - (void)V; -#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST - EXPECT_DEATH(V.emplace_back(V.back().first, 0), this->AssertionMessage); - EXPECT_DEATH(V.emplace_back(0, V.back().second), this->AssertionMessage); -#endif + int N = this->NumBuiltinElts(V); + + // Push back a reference to last element when growing from small storage. + V.emplace_back(V.back().first, V.back().second); + EXPECT_EQ(N, V.back().first); + EXPECT_EQ(N, V.back().second); + + // Check that the old value is still there (not moved away). + EXPECT_EQ(N, V[V.size() - 2].first); + EXPECT_EQ(N, V[V.size() - 2].second); + + // Fill storage again. + V.back().first = V.back().second = V.size(); + while (V.size() < V.capacity()) + V.emplace_back(V.size() + 1, V.size() + 1); + + // Push back a reference to last element when growing from large storage. + V.emplace_back(V.back().first, V.back().second); + EXPECT_EQ(int(V.size()) - 1, V.back().first); + EXPECT_EQ(int(V.size()) - 1, V.back().second); } } // end namespace _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits