This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new f5a35c2 [Optimize] [Memory] BitShufflePageDecoder use memory allocated by ChunkAllocator instead of Faststring (#6515) f5a35c2 is described below commit f5a35c28e9d252158135a6988b40bfa37a9c17de Author: Xinyi Zou <zouxiny...@gmail.com> AuthorDate: Wed Nov 17 11:20:21 2021 +0800 [Optimize] [Memory] BitShufflePageDecoder use memory allocated by ChunkAllocator instead of Faststring (#6515) BitShufflePageDecoder reuses the memory for storing decoder results, allocate memory directly from the `ChunkAllocator`, the performance is improved to a certain extent. In the case of #6285, the total time consumption is reduced by 13.5%, and the time consumption ratio of `~Reader()` has also been reduced from 17.65% to 1.53%, and the memory allocation is unified to `ChunkAllocator` for centralized management , Which is conducive to subsequent memory optimization. which can avoid the memory waste caused by `Mempool`, because the chunk can be free at any time, but the performance is lower than the allocation from `Mempool`. The guess is that there is no `Mempool` after secondary allocation of large chunks , Will directly apply for a large number of small chunks from `ChunkAllocator`, and it takes longer to lock in `pop_free_chunk` and `push_free_chunk` (but this is not proven from the flame graphs of BE's cpu and contention). --- be/src/olap/rowset/segment_v2/bitshuffle_page.h | 21 +++++++++++++++------ be/src/runtime/memory/chunk_allocator.cpp | 7 +++++++ be/src/runtime/memory/chunk_allocator.h | 2 ++ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h b/be/src/olap/rowset/segment_v2/bitshuffle_page.h index e4abece..02ab39f 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h +++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h @@ -33,6 +33,7 @@ #include "olap/rowset/segment_v2/page_builder.h" #include "olap/rowset/segment_v2/page_decoder.h" #include "olap/types.h" +#include "runtime/memory/chunk_allocator.h" #include "util/coding.h" #include "util/faststring.h" #include "util/slice.h" @@ -214,6 +215,12 @@ public: _size_of_element(0), _cur_index(0) {} + ~BitShufflePageDecoder() { + if (_chunk.size != 0) { + ChunkAllocator::instance()->free(_chunk); + } + } + Status init() override { CHECK(!_parsed); if (_data.size < BITSHUFFLE_PAGE_HEADER_SIZE) { @@ -302,7 +309,7 @@ public: // - left == _num_elements when not found (all values < target) while (left < right) { size_t mid = left + (right - left) / 2; - mid_value = &_decoded[mid * SIZE_OF_TYPE]; + mid_value = &_chunk.data[mid * SIZE_OF_TYPE]; if (TypeTraits<Type>::cmp(mid_value, value) < 0) { left = mid + 1; } else { @@ -312,7 +319,7 @@ public: if (left >= _num_elements) { return Status::NotFound("all value small than the value"); } - void* find_value = &_decoded[left * SIZE_OF_TYPE]; + void* find_value = &_chunk.data[left * SIZE_OF_TYPE]; if (TypeTraits<Type>::cmp(find_value, value) == 0) { *exact_match = true; } else { @@ -353,15 +360,17 @@ public: private: void _copy_next_values(size_t n, void* data) { - memcpy(data, &_decoded[_cur_index * SIZE_OF_TYPE], n * SIZE_OF_TYPE); + memcpy(data, &_chunk.data[_cur_index * SIZE_OF_TYPE], n * SIZE_OF_TYPE); } Status _decode() { if (_num_elements > 0) { int64_t bytes; - _decoded.resize(_num_element_after_padding * _size_of_element); + if (!ChunkAllocator::instance()->allocate_align(_num_element_after_padding * _size_of_element, &_chunk)) { + return Status::RuntimeError("Decoded Memory Alloc failed"); + } char* in = const_cast<char*>(&_data[BITSHUFFLE_PAGE_HEADER_SIZE]); - bytes = bitshuffle::decompress_lz4(in, _decoded.data(), _num_element_after_padding, + bytes = bitshuffle::decompress_lz4(in, _chunk.data, _num_element_after_padding, _size_of_element, 0); if (PREDICT_FALSE(bytes < 0)) { // Ideally, this should not happen. @@ -385,7 +394,7 @@ private: int _size_of_element; size_t _cur_index; - faststring _decoded; + Chunk _chunk; }; } // namespace segment_v2 diff --git a/be/src/runtime/memory/chunk_allocator.cpp b/be/src/runtime/memory/chunk_allocator.cpp index 0d50f2b..82327f2 100644 --- a/be/src/runtime/memory/chunk_allocator.cpp +++ b/be/src/runtime/memory/chunk_allocator.cpp @@ -135,6 +135,7 @@ bool ChunkAllocator::allocate(size_t size, Chunk* chunk) { chunk->core_id = core_id; if (_arenas[core_id]->pop_free_chunk(size, &chunk->data)) { + DCHECK_GE(_reserved_bytes, 0); _reserved_bytes.fetch_sub(size); chunk_pool_local_core_alloc_count->increment(1); return true; @@ -144,6 +145,7 @@ bool ChunkAllocator::allocate(size_t size, Chunk* chunk) { ++core_id; for (int i = 1; i < _arenas.size(); ++i, ++core_id) { if (_arenas[core_id % _arenas.size()]->pop_free_chunk(size, &chunk->data)) { + DCHECK_GE(_reserved_bytes, 0); _reserved_bytes.fetch_sub(size); chunk_pool_other_core_alloc_count->increment(1); // reset chunk's core_id to other @@ -188,4 +190,9 @@ void ChunkAllocator::free(const Chunk& chunk) { _arenas[chunk.core_id]->push_free_chunk(chunk.data, chunk.size); } + +bool ChunkAllocator::allocate_align(size_t size, Chunk* chunk) { + return allocate(BitUtil::RoundUpToPowerOfTwo(size), chunk); +} + } // namespace doris diff --git a/be/src/runtime/memory/chunk_allocator.h b/be/src/runtime/memory/chunk_allocator.h index 230517d..d7eb22f 100644 --- a/be/src/runtime/memory/chunk_allocator.h +++ b/be/src/runtime/memory/chunk_allocator.h @@ -65,6 +65,8 @@ public: // Otherwise return false. bool allocate(size_t size, Chunk* chunk); + bool allocate_align(size_t size, Chunk* chunk); + // Free chunk allocated from this allocator void free(const Chunk& chunk); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org