This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-1.2-unstable in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-unstable by this push: new dcc8d57f46 [Revert](mem) revert the mem config cause perfermace degradation (#13526) dcc8d57f46 is described below commit dcc8d57f460d052b85e0ce9514abdaf6f715fffa Author: HappenLee <happen...@hotmail.com> AuthorDate: Fri Oct 21 08:32:16 2022 +0800 [Revert](mem) revert the mem config cause perfermace degradation (#13526) * Revert "[fix](mem) failure of allocating memory (#13414)" This reverts commit 971eb9172f3e925c0b46ec1ffd1a9037a1b49801. * Revert "[improvement](memory) disable page cache and chunk allocator, optimize memory allocate size (#13285)" This reverts commit a5f3880649b094b58061f25c15dccdb50a4a2973. Conflicts: be/src/common/config.h --- be/src/common/config.h | 14 +++----- be/src/runtime/exec_env_init.cpp | 15 ++++++-- be/src/runtime/mem_pool.cpp | 15 ++++++-- be/src/runtime/mem_pool.h | 13 ++++--- be/src/runtime/memory/chunk_allocator.cpp | 57 +++++++++++++++---------------- be/src/util/bit_util.h | 10 ++---- be/src/vec/common/arena.h | 11 ++---- be/src/vec/common/pod_array.h | 20 +++-------- 8 files changed, 73 insertions(+), 82 deletions(-) diff --git a/be/src/common/config.h b/be/src/common/config.h index 02f76870b0..a242e03f00 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -239,7 +239,7 @@ CONF_Int32(storage_page_cache_shard_size, "16"); // all storage page cache will be divided into data_page_cache and index_page_cache CONF_Int32(index_page_cache_percentage, "10"); // whether to disable page cache feature in storage -CONF_Bool(disable_storage_page_cache, "true"); +CONF_Bool(disable_storage_page_cache, "false"); CONF_Bool(enable_storage_vectorization, "true"); @@ -439,20 +439,14 @@ CONF_Bool(disable_mem_pools, "false"); // increase this variable can improve performance, // but will acquire more free memory which can not be used by other modules. CONF_mString(chunk_reserved_bytes_limit, "10%"); - -// Whether using chunk allocator to cache memory chunk -CONF_Bool(disable_chunk_allocator, "true"); +// 1024, The minimum chunk allocator size (in bytes) +CONF_Int32(min_chunk_reserved_bytes, "1024"); // Disable Chunk Allocator in Vectorized Allocator, this will reduce memory cache. // For high concurrent queries, using Chunk Allocator with vectorized Allocator can reduce the impact // of gperftools tcmalloc central lock. // Jemalloc or google tcmalloc have core cache, Chunk Allocator may no longer be needed after replacing // gperftools tcmalloc. -CONF_mBool(disable_chunk_allocator_in_vec, "true"); - -// Both MemPool and vectorized engine's podarray allocator, vectorized engine's arena will try to allocate memory as power of two. -// But if the memory is very large then power of two is also very large. This config means if the allocated memory's size is larger -// than this limit then all allocators will not use RoundUpToPowerOfTwo to allocate memory. -CONF_mInt64(memory_linear_growth_threshold, "134217728"); // 128Mb +CONF_mBool(disable_chunk_allocator_in_vec, "false"); // The probing algorithm of partitioned hash table. // Enable quadratic probing hash table diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index e18ed5c9c1..bf0f66d344 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -193,7 +193,9 @@ Status ExecEnv::_init_mem_tracker() { if (global_memory_limit_bytes > MemInfo::physical_mem()) { LOG(WARNING) << "Memory limit " << PrettyPrinter::print(global_memory_limit_bytes, TUnit::BYTES) - << " exceeds physical memory, using physical memory instead"; + << " exceeds physical memory of " + << PrettyPrinter::print(MemInfo::physical_mem(), TUnit::BYTES) + << ". Using physical memory instead"; global_memory_limit_bytes = MemInfo::physical_mem(); } _process_mem_tracker = @@ -300,6 +302,13 @@ Status ExecEnv::_init_mem_tracker() { RETURN_IF_ERROR(_disk_io_mgr->init(global_memory_limit_bytes)); RETURN_IF_ERROR(_tmp_file_mgr->init()); + // 5. init chunk allocator + if (!BitUtil::IsPowerOf2(config::min_chunk_reserved_bytes)) { + ss << "Config min_chunk_reserved_bytes must be a power-of-two: " + << config::min_chunk_reserved_bytes; + return Status::InternalError(ss.str()); + } + int64_t chunk_reserved_bytes_limit = ParseUtil::parse_mem_spec(config::chunk_reserved_bytes_limit, global_memory_limit_bytes, MemInfo::physical_mem(), &is_percent); @@ -309,8 +318,8 @@ Status ExecEnv::_init_mem_tracker() { << config::chunk_reserved_bytes_limit; return Status::InternalError(ss.str()); } - // Has to round to multiple of page size(4096 bytes), chunk allocator will also check this - chunk_reserved_bytes_limit = BitUtil::RoundDown(chunk_reserved_bytes_limit, 4096); + chunk_reserved_bytes_limit = + BitUtil::RoundDown(chunk_reserved_bytes_limit, config::min_chunk_reserved_bytes); ChunkAllocator::init_instance(chunk_reserved_bytes_limit); LOG(INFO) << "Chunk allocator memory limit: " << PrettyPrinter::print(chunk_reserved_bytes_limit, TUnit::BYTES) diff --git a/be/src/runtime/mem_pool.cpp b/be/src/runtime/mem_pool.cpp index 7e80e7e5b4..c2b709162c 100644 --- a/be/src/runtime/mem_pool.cpp +++ b/be/src/runtime/mem_pool.cpp @@ -119,9 +119,20 @@ Status MemPool::find_chunk(size_t min_size, bool check_limits) { } // Didn't find a big enough free chunk - need to allocate new chunk. + size_t chunk_size = 0; DCHECK_LE(next_chunk_size_, MAX_CHUNK_SIZE); - DCHECK_GE(next_chunk_size_, INITIAL_CHUNK_SIZE); - size_t chunk_size = BitUtil::RoundUpToPowerOfTwo(std::max<size_t>(min_size, next_chunk_size_)); + + if (config::disable_mem_pools) { + // Disable pooling by sizing the chunk to fit only this allocation. + // Make sure the alignment guarantees are respected. + // This will generate too many small chunks. + chunk_size = std::max<size_t>(min_size, alignof(max_align_t)); + } else { + DCHECK_GE(next_chunk_size_, INITIAL_CHUNK_SIZE); + chunk_size = std::max<size_t>(min_size, next_chunk_size_); + } + + chunk_size = BitUtil::RoundUpToPowerOfTwo(chunk_size); if (check_limits && !thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->check_limit( chunk_size)) { diff --git a/be/src/runtime/mem_pool.h b/be/src/runtime/mem_pool.h index d11952dd50..41240ab375 100644 --- a/be/src/runtime/mem_pool.h +++ b/be/src/runtime/mem_pool.h @@ -232,16 +232,15 @@ private: ChunkInfo& info = chunks_[current_chunk_idx_]; int64_t aligned_allocated_bytes = - BitUtil::RoundUpToMultiplyOfFactor(info.allocated_bytes, alignment); - auto size_with_padding = size + DEFAULT_PADDING_SIZE; - if (aligned_allocated_bytes + size_with_padding <= info.chunk.size) { + BitUtil::RoundUpToPowerOf2(info.allocated_bytes + DEFAULT_PADDING_SIZE, alignment); + if (aligned_allocated_bytes + size <= info.chunk.size) { // Ensure the requested alignment is respected. int64_t padding = aligned_allocated_bytes - info.allocated_bytes; uint8_t* result = info.chunk.data + aligned_allocated_bytes; - ASAN_UNPOISON_MEMORY_REGION(result, size_with_padding); - DCHECK_LE(info.allocated_bytes + size_with_padding, info.chunk.size); - info.allocated_bytes += padding + size_with_padding; - total_allocated_bytes_ += padding + size_with_padding; + ASAN_UNPOISON_MEMORY_REGION(result, size); + DCHECK_LE(info.allocated_bytes + size, info.chunk.size); + info.allocated_bytes += padding + size; + total_allocated_bytes_ += padding + size; DCHECK_LE(current_chunk_idx_, chunks_.size() - 1); return result; } diff --git a/be/src/runtime/memory/chunk_allocator.cpp b/be/src/runtime/memory/chunk_allocator.cpp index 09df7e23d4..43acc79538 100644 --- a/be/src/runtime/memory/chunk_allocator.cpp +++ b/be/src/runtime/memory/chunk_allocator.cpp @@ -154,36 +154,35 @@ ChunkAllocator::ChunkAllocator(size_t reserve_limit) Status ChunkAllocator::allocate(size_t size, Chunk* chunk) { CHECK((size > 0 && (size & (size - 1)) == 0)); + // fast path: allocate from current core arena int core_id = CpuInfo::get_current_core(); - chunk->core_id = core_id; chunk->size = size; - if (!config::disable_chunk_allocator) { - // fast path: allocate from current core arena - if (_arenas[core_id]->pop_free_chunk(size, &chunk->data)) { - DCHECK_GE(_reserved_bytes, 0); - _reserved_bytes.fetch_sub(size); - chunk_pool_local_core_alloc_count->increment(1); - // transfer the memory ownership of allocate from ChunkAllocator::tracker to the tls tracker. - THREAD_MEM_TRACKER_TRANSFER_FROM(size, _mem_tracker.get()); - return Status::OK(); - } - // Second path: try to allocate from other core's arena - // When the reserved bytes is greater than the limit, the chunk is stolen from other arena. - // Otherwise, it is allocated from the system first, which can reserve enough memory as soon as possible. - // After that, allocate from current core arena as much as possible. - if (_reserved_bytes > _steal_arena_limit) { - ++core_id; - for (int i = 1; i < _arenas.size(); ++i, ++core_id) { - if (_arenas[core_id % _arenas.size()]->pop_free_chunk(size, &chunk->data)) { - DCHECK_GE(_reserved_bytes, 0); - _reserved_bytes.fetch_sub(size); - chunk_pool_other_core_alloc_count->increment(1); - // reset chunk's core_id to other - chunk->core_id = core_id % _arenas.size(); - // transfer the memory ownership of allocate from ChunkAllocator::tracker to the tls tracker. - THREAD_MEM_TRACKER_TRANSFER_FROM(size, _mem_tracker.get()); - return Status::OK(); - } + chunk->core_id = core_id; + + if (_arenas[core_id]->pop_free_chunk(size, &chunk->data)) { + DCHECK_GE(_reserved_bytes, 0); + _reserved_bytes.fetch_sub(size); + chunk_pool_local_core_alloc_count->increment(1); + // transfer the memory ownership of allocate from ChunkAllocator::tracker to the tls tracker. + THREAD_MEM_TRACKER_TRANSFER_FROM(size, _mem_tracker.get()); + return Status::OK(); + } + // Second path: try to allocate from other core's arena + // When the reserved bytes is greater than the limit, the chunk is stolen from other arena. + // Otherwise, it is allocated from the system first, which can reserve enough memory as soon as possible. + // After that, allocate from current core arena as much as possible. + if (_reserved_bytes > _steal_arena_limit) { + ++core_id; + for (int i = 1; i < _arenas.size(); ++i, ++core_id) { + if (_arenas[core_id % _arenas.size()]->pop_free_chunk(size, &chunk->data)) { + DCHECK_GE(_reserved_bytes, 0); + _reserved_bytes.fetch_sub(size); + chunk_pool_other_core_alloc_count->increment(1); + // reset chunk's core_id to other + chunk->core_id = core_id % _arenas.size(); + // transfer the memory ownership of allocate from ChunkAllocator::tracker to the tls tracker. + THREAD_MEM_TRACKER_TRANSFER_FROM(size, _mem_tracker.get()); + return Status::OK(); } } } @@ -205,7 +204,7 @@ Status ChunkAllocator::allocate(size_t size, Chunk* chunk) { void ChunkAllocator::free(const Chunk& chunk) { DCHECK(chunk.core_id != -1); CHECK((chunk.size & (chunk.size - 1)) == 0); - if (config::disable_chunk_allocator) { + if (config::disable_mem_pools) { SystemAllocator::free(chunk.data, chunk.size); return; } diff --git a/be/src/util/bit_util.h b/be/src/util/bit_util.h index f68586df64..28534b139b 100644 --- a/be/src/util/bit_util.h +++ b/be/src/util/bit_util.h @@ -43,8 +43,6 @@ public: return value / divisor + (value % divisor != 0); } - static inline size_t round_up_to_page_size(size_t s) { return (s + 4096 - 1) / 4096 * 4096; } - // Returns 'value' rounded up to the nearest multiple of 'factor' static inline int64_t round_up(int64_t value, int64_t factor) { return (value + (factor - 1)) / factor * factor; @@ -306,12 +304,8 @@ public: } /// Returns 'value' rounded up to the nearest multiple of 'factor' when factor is - /// a power of two, for example - /// Factor has to be a power of two - /// factor = 16, value = 10 --> result = 16 - /// factor = 16, value = 17 --> result = 32 - /// factor = 16, value = 33 --> result = 48 - static inline int64_t RoundUpToMultiplyOfFactor(int64_t value, int64_t factor) { + /// a power of two + static inline int64_t RoundUpToPowerOf2(int64_t value, int64_t factor) { DCHECK((factor > 0) && ((factor & (factor - 1)) == 0)); return (value + (factor - 1)) & ~(factor - 1); } diff --git a/be/src/vec/common/arena.h b/be/src/vec/common/arena.h index 8042d5618d..e136bae143 100644 --- a/be/src/vec/common/arena.h +++ b/be/src/vec/common/arena.h @@ -127,16 +127,11 @@ private: public: Arena(size_t initial_size_ = 4096, size_t growth_factor_ = 2, - size_t linear_growth_threshold_ = -1) + size_t linear_growth_threshold_ = 128 * 1024 * 1024) : growth_factor(growth_factor_), + linear_growth_threshold(linear_growth_threshold_), head(new Chunk(initial_size_, nullptr)), - size_in_bytes(head->size()) { - if (linear_growth_threshold_ < 0) { - linear_growth_threshold = config::memory_linear_growth_threshold; - } else { - linear_growth_threshold = linear_growth_threshold_; - } - } + size_in_bytes(head->size()) {} ~Arena() { delete head; } diff --git a/be/src/vec/common/pod_array.h b/be/src/vec/common/pod_array.h index 6d844340ef..10a8ed8d75 100644 --- a/be/src/vec/common/pod_array.h +++ b/be/src/vec/common/pod_array.h @@ -30,8 +30,6 @@ #include <cstddef> #include <memory> -#include "common/config.h" -#include "util/bit_util.h" #include "vec/common/allocator.h" #include "vec/common/bit_helpers.h" #include "vec/common/memcpy_small.h" @@ -122,9 +120,8 @@ protected: } } - /// Not round up, keep the size just as the application pass in like std::vector void alloc_for_num_elements(size_t num_elements) { - alloc(minimum_memory_for_elements(num_elements)); + alloc(round_up_to_power_of_two_or_zero(minimum_memory_for_elements(num_elements))); } template <typename... TAllocatorParams> @@ -184,7 +181,6 @@ protected: return (stack_threshold > 0) && (allocated_bytes() <= stack_threshold); } - /// This method is called by push back or emplace back, this is the same behaviour with std::vector template <typename... TAllocatorParams> void reserve_for_next_size(TAllocatorParams&&... allocator_params) { if (size() == 0) { @@ -193,10 +189,8 @@ protected: realloc(std::max(integerRoundUp(initial_bytes, ELEMENT_SIZE), minimum_memory_for_elements(1)), std::forward<TAllocatorParams>(allocator_params)...); - } else { - // There is still a power of 2 expansion here, this method is used in push back method + } else realloc(allocated_bytes() * 2, std::forward<TAllocatorParams>(allocator_params)...); - } } #ifndef NDEBUG @@ -450,11 +444,9 @@ public: template <typename It1, typename It2, typename... TAllocatorParams> void insert_prepare(It1 from_begin, It2 from_end, TAllocatorParams&&... allocator_params) { size_t required_capacity = this->size() + (from_end - from_begin); - if (required_capacity > this->capacity()) { - // std::vector's insert method will expand if required capactiy is larger than current + if (required_capacity > this->capacity()) this->reserve(round_up_to_power_of_two_or_zero(required_capacity), std::forward<TAllocatorParams>(allocator_params)...); - } } /// Do not insert into the array a piece of itself. Because with the resize, the iterators on themselves can be invalidated. @@ -631,10 +623,8 @@ public: template <typename It1, typename It2> void assign(It1 from_begin, It2 from_end) { size_t required_capacity = from_end - from_begin; - if (required_capacity > this->capacity()) { - // std::vector assign just expand the capacity to the required capacity - this->reserve(required_capacity); - } + if (required_capacity > this->capacity()) + this->reserve(round_up_to_power_of_two_or_zero(required_capacity)); size_t bytes_to_copy = this->byte_size(required_capacity); memcpy(this->c_start, reinterpret_cast<const void*>(&*from_begin), bytes_to_copy); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org