This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 8874ea07b68c09cee3c4e300b70706f3c5ea40af Author: Michael Smith <[email protected]> AuthorDate: Fri Nov 10 08:14:43 2023 -0800 IMPALA-11805: Fix LLVM memory manager bytes allocated Previously the bytes_allocated function would undercount memory allocated by the LLVM memory manager, as it allocates memory in full pages but bytes_allocated only tracked the actual storage requested. Implements bytes_allocated in the SectionMemoryManager to access the actual storage size allocated. Change-Id: I5a0193a1694db0718d75651dc2b2f9f92c0d6064 Reviewed-on: http://gerrit.cloudera.org:8080/20697 Reviewed-by: Yida Wu <[email protected]> Reviewed-by: Csaba Ringhofer <[email protected]> Tested-by: Michael Smith <[email protected]> --- be/src/codegen/llvm-codegen-cache-test.cc | 30 +++++++++++++------------ be/src/codegen/llvm-codegen-test.cc | 6 +++++ be/src/codegen/mcjit-mem-mgr.h | 10 ++------- be/src/thirdparty/llvm/SectionMemoryManager.cpp | 10 +++++++++ be/src/thirdparty/llvm/SectionMemoryManager.h | 6 +++++ 5 files changed, 40 insertions(+), 22 deletions(-) diff --git a/be/src/codegen/llvm-codegen-cache-test.cc b/be/src/codegen/llvm-codegen-cache-test.cc index e1976eeca..d8be5a399 100644 --- a/be/src/codegen/llvm-codegen-cache-test.cc +++ b/be/src/codegen/llvm-codegen-cache-test.cc @@ -18,6 +18,7 @@ #include "codegen/llvm-codegen-cache.h" #include <boost/thread/thread.hpp> #include <llvm/ExecutionEngine/ExecutionEngine.h> +#include "llvm/Support/Process.h" #include "codegen/mcjit-mem-mgr.h" #include "common/object-pool.h" #include "runtime/fragment-state.h" @@ -36,6 +37,9 @@ namespace impala { class LlvmCodeGenCacheTest : public testing::Test { public: virtual void SetUp() { + // Using single shard makes the logic of scenarios simple for capacity and + // eviction-related behavior. + FLAGS_cache_force_single_shard = true; FLAGS_codegen_cache_capacity = "0"; metrics_.reset(new MetricGroup("codegen-cache-test")); profile_ = RuntimeProfile::Create(&obj_pool_, "codegen-cache-test"); @@ -48,9 +52,11 @@ class LlvmCodeGenCacheTest : public testing::Test { PlanFragmentCtxPB* fragment_ctx = qs->obj_pool()->Add(new PlanFragmentCtxPB()); fragment_state_ = qs->obj_pool()->Add(new FragmentState(qs, *fragment, *fragment_ctx)); + page_size_ = llvm::sys::Process::getPageSize(); } virtual void TearDown() { + FLAGS_cache_force_single_shard = false; fragment_state_->ReleaseResources(); fragment_state_ = nullptr; codegen_cache_.reset(); @@ -137,6 +143,7 @@ class LlvmCodeGenCacheTest : public testing::Test { shared_ptr<LlvmExecutionEngineWrapper> exec_engine_wrapper_; TQueryOptions query_options_; TCodeGenOptLevel::type opt_level_ = TCodeGenOptLevel::O2; + unsigned page_size_; }; void LlvmCodeGenCacheTest::AddLlvmCodegenEcho(LlvmCodeGen* codegen) { @@ -268,19 +275,10 @@ int64_t LlvmCodeGenCacheTest::GetMemCharge( /// Test the situation that the codegen cache hits the limit of capacity, in this case, /// eviction is needed when new insertion comes. void LlvmCodeGenCacheTest::TestAtCapacity(TCodeGenCacheMode::type mode) { - int64_t codegen_cache_capacity = 196; // 196B + // LLVM allocates memory in pages, and for small examples allocates 3 pages. + int64_t codegen_cache_capacity = 3 * page_size_ + + sizeof(CodeGenCacheEntry) + CodeGenCacheKey::OptimalKeySize + sizeof(std::string); bool is_normal_mode = !CodeGenCacheModeAnalyzer::is_optimal(mode); - // 150B for optimal mode, or 164B on ARM systems - if (!is_normal_mode) { -#ifdef __aarch64__ - codegen_cache_capacity = 164; -#else - codegen_cache_capacity = 150; -#endif - } - // Using single shard makes the logic of scenarios simple for capacity and - // eviction-related behavior. - FLAGS_cache_force_single_shard = true; // Create two LlvmCodeGen objects containing a different codegen function separately. scoped_ptr<LlvmCodeGen> codegen; @@ -488,7 +486,9 @@ void LlvmCodeGenCacheTest::TestSwitchModeHelper(TCodeGenCacheMode::type mode, st // Test to switch among different modes. TEST_F(LlvmCodeGenCacheTest, SwitchMode) { - int64_t codegen_cache_capacity = 512; // 512B + // Storage for 2 entries + int64_t codegen_cache_capacity = 2 * (3 * page_size_ + + sizeof(CodeGenCacheEntry) + CodeGenCacheKey::OptimalKeySize + sizeof(std::string)); codegen_cache_.reset(new CodeGenCache(metrics_.get())); EXPECT_OK(codegen_cache_->Init(codegen_cache_capacity)); string key = "key"; @@ -579,7 +579,9 @@ void LlvmCodeGenCacheTest::TestConcurrentStore(int num_threads) { } TEST_F(LlvmCodeGenCacheTest, ConcurrentStore) { - int64_t codegen_cache_capacity = 512; // 512B + // Storage for 2 entries + int64_t codegen_cache_capacity = 2 * (3 * page_size_ + + sizeof(CodeGenCacheEntry) + CodeGenCacheKey::OptimalKeySize + sizeof(std::string)); codegen_cache_.reset(new CodeGenCache(metrics_.get())); EXPECT_OK(codegen_cache_->Init(codegen_cache_capacity)); TestConcurrentStore(8); diff --git a/be/src/codegen/llvm-codegen-test.cc b/be/src/codegen/llvm-codegen-test.cc index 3f6f70daa..2b87267a0 100644 --- a/be/src/codegen/llvm-codegen-test.cc +++ b/be/src/codegen/llvm-codegen-test.cc @@ -39,6 +39,8 @@ using std::unique_ptr; +DECLARE_bool(cache_force_single_shard); + namespace impala { class CodegenFnPtrTest : public testing:: Test { @@ -579,6 +581,9 @@ class LlvmOptTest : TQueryOptions query_opts_; virtual void SetUp() { + // Using single shard makes the logic of scenarios simple for capacity and + // eviction-related behavior. + FLAGS_cache_force_single_shard = true; metrics_.reset(new MetricGroup("codegen-test")); test_env_.reset(new TestEnv()); ASSERT_OK(test_env_->Init()); @@ -586,6 +591,7 @@ class LlvmOptTest : } virtual void TearDown() { + FLAGS_cache_force_single_shard = false; fragment_state_->ReleaseResources(); fragment_state_ = nullptr; test_env_.reset(); diff --git a/be/src/codegen/mcjit-mem-mgr.h b/be/src/codegen/mcjit-mem-mgr.h index 6002b6087..5844cb6f7 100644 --- a/be/src/codegen/mcjit-mem-mgr.h +++ b/be/src/codegen/mcjit-mem-mgr.h @@ -36,7 +36,7 @@ namespace impala { /// We also use it to track how much memory is allocated for compiled code. class ImpalaMCJITMemoryManager : public SectionMemoryManager { public: - ImpalaMCJITMemoryManager() : bytes_allocated_(0), bytes_tracked_(0){} + ImpalaMCJITMemoryManager() : bytes_tracked_(0) {} virtual uint64_t getSymbolAddress(const std::string& name) override { if (name == "__dso_handle") return reinterpret_cast<uint64_t>(&__dso_handle); @@ -45,29 +45,23 @@ class ImpalaMCJITMemoryManager : public SectionMemoryManager { virtual uint8_t* allocateCodeSection(uintptr_t size, unsigned alignment, unsigned section_id, llvm::StringRef section_name) override { - bytes_allocated_ += size; return SectionMemoryManager::allocateCodeSection( size, alignment, section_id, section_name); } virtual uint8_t* allocateDataSection(uintptr_t size, unsigned alignment, unsigned section_id, llvm::StringRef section_name, bool is_read_only) override { - bytes_allocated_ += size; return SectionMemoryManager::allocateDataSection( size, alignment, section_id, section_name, is_read_only); } - int64_t bytes_allocated() const { return bytes_allocated_; } int64_t bytes_tracked() const { return bytes_tracked_; } void set_bytes_tracked(int64_t bytes_tracked) { - DCHECK_LE(bytes_tracked, bytes_allocated_); + DCHECK_LE(bytes_tracked, bytes_allocated()); bytes_tracked_ = bytes_tracked; } private: - /// Total bytes allocated for the compiled code. - int64_t bytes_allocated_; - /// Total bytes already tracked by MemTrackers. <= 'bytes_allocated_'. /// Needed to release the correct amount from the MemTracker when done. int64_t bytes_tracked_; diff --git a/be/src/thirdparty/llvm/SectionMemoryManager.cpp b/be/src/thirdparty/llvm/SectionMemoryManager.cpp index 5964275ad..825843c0a 100644 --- a/be/src/thirdparty/llvm/SectionMemoryManager.cpp +++ b/be/src/thirdparty/llvm/SectionMemoryManager.cpp @@ -311,6 +311,16 @@ void SectionMemoryManager::invalidateInstructionCache() { sys::Memory::InvalidateInstructionCache(Block.base(), Block.size()); } +// Impala: added to track actual allocation +int64_t SectionMemoryManager::bytes_allocated() const { + int64_t total = 0; + for (const MemoryGroup *Group : {&CodeMem, &RWDataMem, &RODataMem}) { + for (const sys::MemoryBlock &Block : Group->AllocatedMem) + total += Block.size(); + } + return total; +} + SectionMemoryManager::~SectionMemoryManager() { for (MemoryGroup *Group : {&CodeMem, &RWDataMem, &RODataMem}) { for (sys::MemoryBlock &Block : Group->AllocatedMem) diff --git a/be/src/thirdparty/llvm/SectionMemoryManager.h b/be/src/thirdparty/llvm/SectionMemoryManager.h index 23ba5de28..fdfcb917a 100644 --- a/be/src/thirdparty/llvm/SectionMemoryManager.h +++ b/be/src/thirdparty/llvm/SectionMemoryManager.h @@ -105,6 +105,12 @@ public: /// This method is called from finalizeMemory. virtual void invalidateInstructionCache(); + /// \brief Returns number of bytes allocated by the memory manager. + /// + /// Impala: added to track actual memory allocation (which is page-aligned) + /// rather than just requested memory. + int64_t bytes_allocated() const; + private: struct FreeMemBlock { // The actual block of free memory
