================ @@ -0,0 +1,322 @@ +//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors --------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the PagedVector class. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_ADT_PAGEDVECTOR_H +#define LLVM_ADT_PAGEDVECTOR_H + +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/Support/Allocator.h" +#include <cassert> +#include <iostream> +#include <vector> + +namespace llvm { +// A vector that allocates memory in pages. +// Order is kept, but memory is allocated only when one element of the page is +// accessed. This introduces a level of indirection, but it is useful when you +// have a sparsely initialised vector where the full size is allocated upfront +// with the default constructor and elements are initialised later, on first +// access. +// +// Notice that this does not have iterators, because if you +// have iterators it probably means you are going to touch +// all the memory in any case, so better use a std::vector in +// the first place. +// +// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator. +template <typename T, std::size_t PAGE_SIZE = 1024 / sizeof(T)> +class PagedVector { + static_assert(PAGE_SIZE > 0, "PAGE_SIZE must be greater than 0. Most likely " + "you want it to be greater than 16."); + // The actual number of element in the vector which can be accessed. + std::size_t Size = 0; + + // The position of the initial element of the page in the Data vector. + // Pages are allocated contiguously in the Data vector. + mutable std::vector<uintptr_t> PageToDataIdx; + // Actual page data. All the page elements are added to this vector on the + // first access of any of the elements of the page. Elements default + // constructed and elements of the page are stored contiguously. The order of + // the elements however depends on the order of access of the pages. + PointerIntPair<BumpPtrAllocator *, 1, bool> Allocator; + + constexpr static uintptr_t InvalidPage = SIZE_MAX; + +public: + using value_type = T; + + // Default constructor. We build our own allocator. + PagedVector() : Allocator(new BumpPtrAllocator, true) {} + PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {} + + ~PagedVector() { + // If we own the allocator, delete it. + if (Allocator.getInt() == true) + delete Allocator.getPointer(); + } + + // Lookup an element at position i. + // If the associated page is not filled, it will be filled with default + // constructed elements. If the associated page is filled, return the element. + T &operator[](std::size_t Index) const { + assert(Index < Size); + assert(Index / PAGE_SIZE < PageToDataIdx.size()); + uintptr_t &PagePtr = PageToDataIdx[Index / PAGE_SIZE]; + // If the page was not yet allocated, allocate it. + if (PagePtr == InvalidPage) { + T *NewPagePtr = Allocator.getPointer()->template Allocate<T>(PAGE_SIZE); + // We need to invoke the default constructor on all the elements of the + // page. + for (std::size_t I = 0; I < PAGE_SIZE; ++I) + new (NewPagePtr + I) T(); + + PagePtr = reinterpret_cast<uintptr_t>(NewPagePtr); + } + // Dereference the element in the page. + return *((Index % PAGE_SIZE) + reinterpret_cast<T *>(PagePtr)); + } + + // Return the capacity of the vector. I.e. the maximum size it can be expanded + // to with the resize method without allocating more pages. + [[nodiscard]] std::size_t capacity() const { + return PageToDataIdx.size() * PAGE_SIZE; + } + + // Return the size of the vector. I.e. the maximum index that can be + // accessed, i.e. the maximum value which was used as argument of the + // resize method. + [[nodiscard]] std::size_t size() const { return Size; } + + // Expands the vector to the given NewSize number of elements. + // If the vector was smaller, allocates new pages as needed. + // It should be called only with NewSize >= Size. + void resize(std::size_t NewSize) { + // Handle shrink case: delete the pages and update the size. + if (NewSize < Size) { + std::size_t NewLastPage = (NewSize - 1) / PAGE_SIZE; + for (std::size_t I = NewLastPage + 1; I < PageToDataIdx.size(); ++I) { + uintptr_t PagePtr = PageToDataIdx[I]; + if (PagePtr == InvalidPage) + continue; + T *Page = reinterpret_cast<T *>(PagePtr); + // We need to invoke the destructor on all the elements of the page. + for (std::size_t J = 0; J < PAGE_SIZE; ++J) + Page[J].~T(); + Allocator.getPointer()->Deallocate(Page); + } + // Delete the extra ones in the new last page. + uintptr_t PagePtr = PageToDataIdx[NewLastPage]; + if (PagePtr != InvalidPage) { + T *Page = reinterpret_cast<T *>(PagePtr); + // If the new size and the old size are on the same page, we need to + // delete only the elements between the new size and the old size. + // Otherwise we need to delete all the remaining elements in the page. + std::size_t OldPage = (Size - 1) / PAGE_SIZE; + std::size_t NewPage = (NewSize - 1) / PAGE_SIZE; + std::size_t LastPageElements = + OldPage == NewPage ? Size % PAGE_SIZE : PAGE_SIZE; + for (std::size_t J = NewSize % PAGE_SIZE; J < LastPageElements; ++J) + Page[J].~T(); + } + PageToDataIdx.resize(NewLastPage + 1); + } + Size = NewSize; + // If the capacity is enough, just update the size and continue + // with the currently allocated pages. + if (Size <= capacity()) + return; + // The number of pages to allocate. The Remainder is calculated + // for the case in which the NewSize is not a multiple of PAGE_SIZE. + // In that case we need one more page. + std::size_t Pages = Size / PAGE_SIZE; + std::size_t Remainder = Size % PAGE_SIZE; + if (Remainder != 0) + Pages += 1; + assert(Pages > PageToDataIdx.size()); + // We use InvalidPage to indicate that a page has not been allocated yet. + // This cannot be 0, because 0 is a valid page id. + // We use InvalidPage instead of a separate bool to avoid wasting space. + PageToDataIdx.resize(Pages, InvalidPage); + } + + // Return true if the vector is empty + [[nodiscard]] bool empty() const { return Size == 0; } + + /// Clear the vector, i.e. clear the allocated pages, the whole page + /// lookup index and reset the size. + void clear() { + Size = 0; + // If we own the allocator, simply reset it, otherwise we + // deallocate the pages one by one. + if (Allocator.getInt() == true) + Allocator.getPointer()->Reset(); + else + for (uintptr_t Page : PageToDataIdx) + Allocator.getPointer()->Deallocate(reinterpret_cast<T *>(Page)); + + PageToDataIdx.clear(); + } + + // Iterator on all the elements of the vector + // which have actually being constructed. + class MaterialisedIterator { + PagedVector const *PV; + std::size_t ElementIdx; + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T *; + using reference = T &; + + MaterialisedIterator(PagedVector const *PV, std::size_t ElementIdx) + : PV(PV), ElementIdx(ElementIdx) {} + + // When incrementing the iterator, we skip the elements which have not + // been materialised yet. + MaterialisedIterator &operator++() { + while (ElementIdx < PV->Size) + if (PV->PageToDataIdx[ElementIdx++ / PAGE_SIZE] != InvalidPage) + break; + + return *this; + } + // Post increment operator. + MaterialisedIterator operator++(int) { + MaterialisedIterator Copy = *this; + ++*this; + return Copy; + } + + std::ptrdiff_t operator-(MaterialisedIterator const &Other) const { + assert(PV == Other.PV); + // If they are on the same table we can just subtract the indices. + // Otherwise we have to iterate over the pages to find the difference. + // If a page is invalid, we skip it. + if (PV == Other.PV) + return ElementIdx - Other.ElementIdx; + + std::size_t ElementMin = std::min(ElementIdx, Other.ElementIdx); + std::size_t ElementMax = std::max(ElementIdx, Other.ElementIdx); + std::size_t PageMin = ElementMin / PAGE_SIZE; + std::size_t PageMax = ElementMax / PAGE_SIZE; + + std::size_t Count = 0ULL; + for (std::size_t PageIdx = PageMin; PageIdx < PageMax; ++PageIdx) { + if (PV->PageToDataIdx[PageIdx] == InvalidPage) + continue; + + Count += PAGE_SIZE; + } + Count += ElementMax % PAGE_SIZE; + Count += PAGE_SIZE - ElementMin % PAGE_SIZE; + + return Count; + } + + // When dereferencing the iterator, we materialise the page if needed. + T const &operator*() const { + assert(ElementIdx < PV->Size); + assert(PV->PageToDataIdx[ElementIdx / PAGE_SIZE] != InvalidPage); + T *PagePtr = + reinterpret_cast<T *>(PV->PageToDataIdx[ElementIdx / PAGE_SIZE]); + return *((ElementIdx % PAGE_SIZE) + PagePtr); + } + + // Equality operator. + bool operator==(MaterialisedIterator const &Other) const { + // Iterators of two different vectors are never equal. + if (PV != Other.PV) + return false; + // Any iterator for an empty vector is equal to any other iterator. + if (PV->empty()) + return true; + // Get the pages of the two iterators. If between the two pages there + // are no valid pages, we can condider the iterators equal. + std::size_t PageMin = std::min(ElementIdx, Other.ElementIdx) / PAGE_SIZE; + std::size_t PageMax = std::max(ElementIdx, Other.ElementIdx) / PAGE_SIZE; + // If the two pages are past the end, the iterators are equal. + if (PageMin >= PV->PageToDataIdx.size()) + return true; + // If only the last page is past the end, the iterators are equal if + // all the pages up to the end are invalid. + if (PageMax >= PV->PageToDataIdx.size()) { + for (std::size_t PageIdx = PageMin; PageIdx < PV->PageToDataIdx.size(); + ++PageIdx) + if (PV->PageToDataIdx[PageIdx] != InvalidPage) + return false; + return true; + } + + uintptr_t Page1 = PV->PageToDataIdx[PageMin]; + uintptr_t Page2 = PV->PageToDataIdx[PageMax]; + if (Page1 == InvalidPage && Page2 == InvalidPage) + return true; + + // If the two pages are the same, the iterators are equal if they point + // to the same element. + if (PageMin == PageMax) + return ElementIdx == Other.ElementIdx; + + // If the two pages are different, the iterators are equal if all the + // pages between them are invalid. + for (std::size_t PageIdx = PageMin; PageIdx < PageMax; ++PageIdx) + if (PV->PageToDataIdx[PageIdx] != InvalidPage) + return false; + + return true; ---------------- ktf wrote:
Sorry, I thought you noticed I changed the code as you suggested, adding asserts to make sure that iterators are indeed always valid when being compared (or == `end()`). The actual change was: https://github.com/llvm/llvm-project/pull/66430/commits/985444327c26d5caaec416f386c41f264c08b634 This now works as expected, not sure what was the problem before, maybe I was actually constructing an invalid iterator. https://github.com/llvm/llvm-project/pull/66430 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits