This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 7230c57f6784cdb99080f3d4c8b679c33b54da21
Author: Michael Smith <[email protected]>
AuthorDate: Tue Aug 22 13:14:40 2023 -0700

    IMPALA-5081: Add codegen_opt_level query option
    
    Adds the 'codegen_opt_level' query option to select LLVM optimization
    level for generated code. Retains the prior behavior - O2 - as default.
    
    If optimization level is changed for an entry already in cache, the
    cache entry will be used unless the new optimization level is higher
    than the cached level.
    
    Adds additional counters for NumOptimizedFunctions and
    NumOptimizedInstructions, which allow observing some impacts from
    codegen optimization. These additional counters, and tracking opt level
    for cached entries, increases the size of each cached entry.
    
    Adds unit tests for all optimizition levels checking
    - that small functions are inlined at higher levels (as a way to verify
      that optimization level has an effect)
    - codegen cache entries are updated when optimizing the same fragment at
      a higher level, and not updated the rest of the time
    
    Change-Id: I371f8758b6552263e91a1fbfd9a6e1c28e1fa2bd
    Reviewed-on: http://gerrit.cloudera.org:8080/20399
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/codegen/CMakeLists.txt             |   9 +-
 be/src/codegen/llvm-codegen-cache-test.cc |  15 +--
 be/src/codegen/llvm-codegen-cache.cc      |  16 ++--
 be/src/codegen/llvm-codegen-cache.h       |  24 +++--
 be/src/codegen/llvm-codegen-test.cc       | 148 +++++++++++++++++++++++++++++-
 be/src/codegen/llvm-codegen.cc            |  64 ++++++++++---
 be/src/codegen/llvm-codegen.h             |   5 +
 be/src/service/query-options.cc           |   7 ++
 be/src/service/query-options.h            |   3 +-
 common/thrift/ImpalaService.thrift        |   4 +
 common/thrift/Query.thrift                |  11 +++
 testdata/llvm/test-opt.cc                 |  39 ++++++++
 12 files changed, 306 insertions(+), 39 deletions(-)

diff --git a/be/src/codegen/CMakeLists.txt b/be/src/codegen/CMakeLists.txt
index c40e2cf79..ef89223f2 100644
--- a/be/src/codegen/CMakeLists.txt
+++ b/be/src/codegen/CMakeLists.txt
@@ -112,9 +112,16 @@ add_custom_target(test-loop.bc
   SOURCES ${CMAKE_SOURCE_DIR}/testdata/llvm/test-loop.cc
 )
 
+# Provide "unoptimized" output. Clang O0 adds attributes to everything which 
prevents us
+# doing optimizations in the test, so use O1 instead.
+add_custom_target(test-opt.bc
+  COMMAND ${LLVM_CLANG_EXECUTABLE} ${CLANG_IR_CXX_FLAGS} -O1 
${CLANG_INCLUDE_FLAGS} ${CMAKE_SOURCE_DIR}/testdata/llvm/test-opt.cc -o 
${CMAKE_SOURCE_DIR}/llvm-ir/test-opt.bc
+  SOURCES ${CMAKE_SOURCE_DIR}/testdata/llvm/test-opt.cc
+)
+
 # Exception to unified be tests: custom main initializes LLVM
 ADD_BE_LSAN_TEST(llvm-codegen-test)
-add_dependencies(llvm-codegen-test test-loop.bc)
+add_dependencies(llvm-codegen-test test-loop.bc test-opt.bc)
 ADD_BE_LSAN_TEST(llvm-codegen-cache-test LlvmCodegenCacheTest.*)
 
 ADD_UNIFIED_BE_LSAN_TEST(instruction-counter-test InstructionCounterTest.*)
diff --git a/be/src/codegen/llvm-codegen-cache-test.cc 
b/be/src/codegen/llvm-codegen-cache-test.cc
index 70d419c70..e1976eeca 100644
--- a/be/src/codegen/llvm-codegen-cache-test.cc
+++ b/be/src/codegen/llvm-codegen-cache-test.cc
@@ -136,6 +136,7 @@ class LlvmCodeGenCacheTest : public testing::Test {
   scoped_ptr<CodeGenCache> codegen_cache_;
   shared_ptr<LlvmExecutionEngineWrapper> exec_engine_wrapper_;
   TQueryOptions query_options_;
+  TCodeGenOptLevel::type opt_level_ = TCodeGenOptLevel::O2;
 };
 
 void LlvmCodeGenCacheTest::AddLlvmCodegenEcho(LlvmCodeGen* codegen) {
@@ -214,7 +215,7 @@ void 
LlvmCodeGenCacheTest::TestBasicFunction(TCodeGenCacheMode::type mode) {
       GetMemCharge(codegen_double.get(), cache_key.data(), is_normal_mode);
 
   // Store and lookup the entry by the key.
-  EXPECT_OK(codegen_cache_->Store(cache_key, codegen.get(), mode));
+  EXPECT_OK(codegen_cache_->Store(cache_key, codegen.get(), mode, opt_level_));
   CheckInUseMetrics(
       codegen_cache_.get(), 1 /*num_entry_in_use*/, mem_charge 
/*bytes_in_use*/);
   EXPECT_OK(codegen_cache_->Lookup(cache_key, mode, &entry, 
&exec_engine_wrapper_));
@@ -225,7 +226,7 @@ void 
LlvmCodeGenCacheTest::TestBasicFunction(TCodeGenCacheMode::type mode) {
   CheckResult(entry);
   // Override the entry with a different function, should be able to find the 
new
   // function from the new entry.
-  EXPECT_OK(codegen_cache_->Store(cache_key, codegen_double.get(), mode));
+  EXPECT_OK(codegen_cache_->Store(cache_key, codegen_double.get(), mode, 
opt_level_));
   CheckInUseMetrics(
       codegen_cache_.get(), 1 /*num_entry_in_use*/, mem_charge_double 
/*bytes_in_use*/);
   EXPECT_OK(codegen_cache_->Lookup(cache_key, mode, &entry, 
&exec_engine_wrapper_));
@@ -269,12 +270,12 @@ int64_t LlvmCodeGenCacheTest::GetMemCharge(
 void LlvmCodeGenCacheTest::TestAtCapacity(TCodeGenCacheMode::type mode) {
   int64_t codegen_cache_capacity = 196; // 196B
   bool is_normal_mode = !CodeGenCacheModeAnalyzer::is_optimal(mode);
-  // 128B for optimal mode, or 140B on ARM systems
+  // 150B for optimal mode, or 164B on ARM systems
   if (!is_normal_mode) {
 #ifdef __aarch64__
-    codegen_cache_capacity = 140;
+    codegen_cache_capacity = 164;
 #else
-    codegen_cache_capacity = 128;
+    codegen_cache_capacity = 150;
 #endif
   }
   // Using single shard makes the logic of scenarios simple for capacity and
@@ -472,7 +473,7 @@ void 
LlvmCodeGenCacheTest::TestSwitchModeHelper(TCodeGenCacheMode::type mode, st
   CodeGenCacheKeyConstructor::construct(key, &cache_key);
 
   // Store and lookup the entry by the key.
-  EXPECT_OK(codegen_cache_->Store(cache_key, codegen.get(), mode));
+  EXPECT_OK(codegen_cache_->Store(cache_key, codegen.get(), mode, opt_level_));
   if (expect_entry_num != -1) {
     CheckInUseMetrics(codegen_cache_.get(), expect_entry_num 
/*num_entry_in_use*/);
   }
@@ -546,7 +547,7 @@ void 
LlvmCodeGenCacheTest::StoreHelper(TCodeGenCacheMode::type mode, string key)
   CodeGenCacheKey cache_key;
   CodeGenCacheEntry entry;
   CodeGenCacheKeyConstructor::construct(key, &cache_key);
-  EXPECT_OK(codegen_cache_->Store(cache_key, codegen.get(), mode));
+  EXPECT_OK(codegen_cache_->Store(cache_key, codegen.get(), mode, opt_level_));
   codegen->Close();
 }
 
diff --git a/be/src/codegen/llvm-codegen-cache.cc 
b/be/src/codegen/llvm-codegen-cache.cc
index b0d27a522..d2cf81414 100644
--- a/be/src/codegen/llvm-codegen-cache.cc
+++ b/be/src/codegen/llvm-codegen-cache.cc
@@ -133,8 +133,9 @@ Status CodeGenCache::Lookup(const CodeGenCacheKey& 
cache_key,
     // to look for jitted functions.
     if (LookupEngine(cached_entry->engine_pointer, execution_engine)) {
       entry->Reset(cached_entry->engine_pointer, cached_entry->num_functions,
-          cached_entry->num_instructions, 
cached_entry->function_names_hashcode,
-          cached_entry->total_bytes_charge);
+          cached_entry->num_instructions, cached_entry->num_opt_functions,
+          cached_entry->num_opt_instructions, 
cached_entry->function_names_hashcode,
+          cached_entry->total_bytes_charge, cached_entry->opt_level);
       return Status::OK();
     }
   }
@@ -143,7 +144,8 @@ Status CodeGenCache::Lookup(const CodeGenCacheKey& 
cache_key,
 }
 
 Status CodeGenCache::StoreInternal(const CodeGenCacheKey& cache_key,
-    LlvmCodeGen* codegen, const TCodeGenCacheMode::type& mode) {
+    LlvmCodeGen* codegen, TCodeGenCacheMode::type mode,
+    TCodeGenOptLevel::type opt_level) {
   // In normal mode, we will store the whole key content to the cache.
   // Otherwise, in optimal mode, we will only store the hash code and length 
of the key.
   Slice key = cache_key.optimal_key_slice();
@@ -163,7 +165,9 @@ Status CodeGenCache::StoreInternal(const CodeGenCacheKey& 
cache_key,
   CodeGenCacheEntry* cache_entry =
       
reinterpret_cast<CodeGenCacheEntry*>(cache_->MutableValue(&pending_handle));
   cache_entry->Reset(codegen->execution_engine(), 
codegen->num_functions_->value(),
-      codegen->num_instructions_->value(), codegen->function_names_hashcode_, 
mem_charge);
+      codegen->num_instructions_->value(), 
codegen->num_opt_functions_->value(),
+      codegen->num_opt_instructions_->value(), 
codegen->function_names_hashcode_,
+      mem_charge, opt_level);
   StoreEngine(codegen);
   /// It is thread-safe, but could override the existing entry with the same 
key.
   Cache::UniqueHandle cache_handle =
@@ -181,7 +185,7 @@ Status CodeGenCache::StoreInternal(const CodeGenCacheKey& 
cache_key,
 }
 
 Status CodeGenCache::Store(const CodeGenCacheKey& cache_key, LlvmCodeGen* 
codegen,
-    const TCodeGenCacheMode::type& mode) {
+    TCodeGenCacheMode::type mode, TCodeGenOptLevel::type opt_level) {
   DCHECK(!is_closed_);
   DCHECK(cache_ != nullptr);
   DCHECK(codegen != nullptr);
@@ -206,7 +210,7 @@ Status CodeGenCache::Store(const CodeGenCacheKey& 
cache_key, LlvmCodeGen* codege
     if (!key_to_insert_it.second) return status;
   }
   // Do store the cache entry to the cache.
-  status = StoreInternal(cache_key, codegen, mode);
+  status = StoreInternal(cache_key, codegen, mode, opt_level);
   // Remove the hash code of the key from the to_insert_keys set.
   lock_guard<mutex> lock(to_insert_set_lock_);
   keys_to_insert_.erase(key_to_insert_it.first);
diff --git a/be/src/codegen/llvm-codegen-cache.h 
b/be/src/codegen/llvm-codegen-cache.h
index 051c7e523..6c58dd77f 100644
--- a/be/src/codegen/llvm-codegen-cache.h
+++ b/be/src/codegen/llvm-codegen-cache.h
@@ -161,25 +161,37 @@ class CodeGenCacheModeAnalyzer {
 };
 
 struct CodeGenCacheEntry {
-  CodeGenCacheEntry() { Init(); }
+  CodeGenCacheEntry() : engine_pointer(nullptr) {}
+  // When Empty, no guarantees are made about the content of other fields.
   bool Empty() { return engine_pointer == nullptr; }
-  void Init() { memset((uint8_t*)this, 0, sizeof(CodeGenCacheEntry)); }
-  void Reset() { Reset(nullptr, 0, 0, 0, 0); }
+  void Reset() { engine_pointer = nullptr; }
   void Reset(llvm::ExecutionEngine* engine_ptr, int64_t num_funcs, int64_t 
num_instrucs,
-      uint64_t names_hashcode, int64_t charge) {
+      int64_t num_opt_funcs, int64_t num_opt_instrucs, uint64_t names_hashcode,
+      int64_t charge, TCodeGenOptLevel::type opt) {
     engine_pointer = engine_ptr;
     num_functions = num_funcs;
     num_instructions = num_instrucs;
+    num_opt_functions = num_opt_funcs;
+    num_opt_instructions = num_opt_instrucs;
     function_names_hashcode = names_hashcode;
     total_bytes_charge = charge;
+    opt_level = opt;
   }
   llvm::ExecutionEngine* engine_pointer;
+  /// Number of functions before optimization.
   int64_t num_functions;
+  /// Number of instructions before optimization.
   int64_t num_instructions;
+  /// Number of functions after optimization.
+  int64_t num_opt_functions;
+  /// Number of instructions after optimization.
+  int64_t num_opt_instructions;
   /// The hashcode of function names in the entry.
   uint64_t function_names_hashcode;
   /// Bytes charge including the key and the entry.
   int64_t total_bytes_charge;
+  /// CodeGen optimization level used to generate this entry.
+  TCodeGenOptLevel::type opt_level;
 };
 
 /// Each CodeGenCache is supposed to be a singleton in the daemon, manages the 
codegen
@@ -208,7 +220,7 @@ class CodeGenCache {
 
   /// Store the cache entry with the specific cache key.
   Status Store(const CodeGenCacheKey& key, LlvmCodeGen* codegen,
-      const TCodeGenCacheMode::type& mode);
+      TCodeGenCacheMode::type mode, TCodeGenOptLevel::type opt_level);
 
   /// Store the shared pointer of llvm execution engine to the cache to keep 
all the
   /// jitted functions in that engine alive.
@@ -257,7 +269,7 @@ class CodeGenCache {
 
   /// Helper function to store the entry to the cache.
   Status StoreInternal(const CodeGenCacheKey& key, LlvmCodeGen* codegen,
-      const TCodeGenCacheMode::type& mode);
+      TCodeGenCacheMode::type mode, TCodeGenOptLevel::type opt_level);
 
   /// Indicate if the cache is closed. If is closed, no call is allowed for 
any functions
   /// other than ReleaseResources().
diff --git a/be/src/codegen/llvm-codegen-test.cc 
b/be/src/codegen/llvm-codegen-test.cc
index 43a1663e2..26913bf69 100644
--- a/be/src/codegen/llvm-codegen-test.cc
+++ b/be/src/codegen/llvm-codegen-test.cc
@@ -61,7 +61,7 @@ TEST_F(CodegenFnPtrTest, StoreNonVoidPtrAndLoad) {
   ASSERT_TRUE(loaded_fn_ptr == int_identity);
 }
 
-class LlvmCodeGenTest : public testing:: Test {
+class LlvmCodeGenTest : public testing::Test {
  protected:
   scoped_ptr<TestEnv> test_env_;
   FragmentState* fragment_state_;
@@ -112,10 +112,6 @@ class LlvmCodeGenTest : public testing:: Test {
     return (*codegen)->MaterializeModule();
   }
 
-  static void ClearHashFns(LlvmCodeGen* codegen) {
-    codegen->ClearHashFns();
-  }
-
   static void AddFunctionToJit(LlvmCodeGen* codegen, llvm::Function* fn,
       CodegenFnPtrBase* fn_ptr) {
     // Bypass Impala-specific logic in AddFunctionToJit() that assumes 
Impala's struct
@@ -565,6 +561,148 @@ TEST_F(LlvmCodeGenTest, CleanupNonFinalizedMethodsTest) {
   ASSERT_TRUE(ContainsHandcraftedFn(codegen.get(), complete_fn));
   ASSERT_OK(FinalizeModule(codegen.get()));
 }
+
+class LlvmOptTest :
+    public testing::TestWithParam<std::tuple<TCodeGenOptLevel::type, bool>> {
+ protected:
+  scoped_ptr<MetricGroup> metrics_;
+  scoped_ptr<TestEnv> test_env_;
+  FragmentState* fragment_state_;
+  TQueryOptions query_opts_;
+
+  virtual void SetUp() {
+    metrics_.reset(new MetricGroup("codegen-test"));
+    test_env_.reset(new TestEnv());
+    ASSERT_OK(test_env_->Init());
+    SetUpQuery(0, std::get<0>(GetParam()));
+  }
+
+  virtual void TearDown() {
+    fragment_state_->ReleaseResources();
+    fragment_state_ = nullptr;
+    test_env_.reset();
+    metrics_.reset();
+  }
+
+  void SetUpQuery(int64_t query_id, TCodeGenOptLevel::type level) {
+    query_opts_.__set_codegen_opt_level(level);
+    RuntimeState* runtime_state_;
+    ASSERT_OK(test_env_->CreateQueryState(query_id, &query_opts_, 
&runtime_state_));
+    QueryState* qs = runtime_state_->query_state();
+    TPlanFragment* fragment = qs->obj_pool()->Add(new TPlanFragment());
+    PlanFragmentCtxPB* fragment_ctx = qs->obj_pool()->Add(new 
PlanFragmentCtxPB());
+    fragment_state_ =
+        qs->obj_pool()->Add(new FragmentState(qs, *fragment, *fragment_ctx));
+  }
+
+  void EnableCodegenCache() {
+    test_env_->ResetCodegenCache(metrics_.get());
+    EXPECT_OK(test_env_->codegen_cache()->Init(1024*1024));
+  }
+
+  // Wrapper to call private test-only methods on LlvmCodeGen object
+  Status CreateFromFile(const string& filename, scoped_ptr<LlvmCodeGen>* 
codegen) {
+    RETURN_IF_ERROR(LlvmCodeGen::CreateFromFile(fragment_state_,
+        fragment_state_->obj_pool(), nullptr, filename, "test", codegen));
+    return (*codegen)->MaterializeModule();
+  }
+
+  static void AddFunctionToJit(LlvmCodeGen* codegen, llvm::Function* fn,
+      CodegenFnPtrBase* fn_ptr) {
+    // Bypass Impala-specific logic in AddFunctionToJit() that assumes 
Impala's struct
+    // types are available in the module.
+    return codegen->AddFunctionToJitInternal(fn, fn_ptr);
+  }
+
+  static Status FinalizeModule(LlvmCodeGen* codegen) { return 
codegen->FinalizeModule(); }
+
+  static void VerifyCounters(LlvmCodeGen* codegen, bool expect_unopt) {
+    ASSERT_GT(codegen->num_functions_->value(), 0);
+    ASSERT_GT(codegen->num_instructions_->value(), 0);
+    if (expect_unopt) {
+      ASSERT_EQ(codegen->num_opt_functions_->value(), 
codegen->num_functions_->value());
+      ASSERT_EQ(
+          codegen->num_opt_instructions_->value(), 
codegen->num_instructions_->value());
+    } else {
+      ASSERT_LT(codegen->num_opt_functions_->value(), 
codegen->num_functions_->value());
+      ASSERT_LT(
+          codegen->num_opt_instructions_->value(), 
codegen->num_instructions_->value());
+    }
+  }
+
+  void LoadTestOpt(scoped_ptr<LlvmCodeGen>& codegen) {
+    const string func("_Z9loop_nullPii");
+    typedef void (*LoopFn)(int*, int);
+
+    string module_file;
+    PathBuilder::GetFullPath("llvm-ir/test-opt.bc", &module_file);
+
+    ASSERT_OK(CreateFromFile(module_file.c_str(), &codegen));
+    EXPECT_TRUE(codegen.get() != nullptr);
+    codegen->EnableOptimizations(true);
+
+    llvm::Function* fn = codegen->GetFunction(func, false);
+    EXPECT_TRUE(fn != nullptr);
+    EXPECT_EQ(fn->arg_size(), 2);
+    CodegenFnPtr<LoopFn> fn_impl;
+    AddFunctionToJit(codegen.get(), fn, &fn_impl);
+
+    ASSERT_OK(FinalizeModule(codegen.get()));
+    ASSERT_TRUE(fn_impl.load() != nullptr);
+  }
+};
+
+TEST_P(LlvmOptTest, OptFunction) {
+  scoped_ptr<LlvmCodeGen> codegen;
+  LoadTestOpt(codegen);
+  VerifyCounters(codegen.get(), std::get<1>(GetParam()));
+  codegen->Close();
+}
+
+TEST_P(LlvmOptTest, CachedOptFunction) {
+  TCodeGenOptLevel::type opt_level = std::get<0>(GetParam());
+  bool expect_unoptimized = std::get<1>(GetParam());
+  EnableCodegenCache();
+
+  scoped_ptr<LlvmCodeGen> codegen;
+  LoadTestOpt(codegen);
+  VerifyCounters(codegen.get(), expect_unoptimized);
+  codegen->Close();
+
+  int64_t query_id = 0;
+  constexpr std::array<TCodeGenOptLevel::type, 5> 
opt_levels{{TCodeGenOptLevel::O0,
+      TCodeGenOptLevel::O1, TCodeGenOptLevel::Os, TCodeGenOptLevel::O2,
+      TCodeGenOptLevel::O3}};
+  for (TCodeGenOptLevel::type level : opt_levels) {
+    codegen.reset();
+    SetUpQuery(++query_id, level);
+    LoadTestOpt(codegen);
+    // Higher levels are by definition O1+, which expects optimized size.
+    // Lower or equal levels should use the cached value from before the loop.
+    VerifyCounters(codegen.get(), level > opt_level ? false : 
expect_unoptimized);
+    codegen->Close();
+  }
+
+  // Check for cache hits/misses. Levels greater than opt_level will be a hit, 
others
+  // will be misses.
+  int num_less = opt_level - TCodeGenOptLevel::O0;
+  IntCounter* cache_hits =
+      metrics_->FindMetricForTesting<IntCounter>("impala.codegen-cache.hits");
+  EXPECT_NE(cache_hits, nullptr);
+  EXPECT_EQ(cache_hits->GetValue(), 1 + num_less);
+  IntCounter* cache_misses =
+      
metrics_->FindMetricForTesting<IntCounter>("impala.codegen-cache.misses");
+  EXPECT_NE(cache_misses, nullptr);
+  EXPECT_EQ(cache_misses->GetValue(), opt_levels.size() - num_less);
+}
+
+INSTANTIATE_TEST_CASE_P(OptLevels, LlvmOptTest, ::testing::Values(
+  //              Optimization level    Expect unoptimized
+  std::make_tuple(TCodeGenOptLevel::O0, true),
+  std::make_tuple(TCodeGenOptLevel::O1, false),
+  std::make_tuple(TCodeGenOptLevel::Os, false),
+  std::make_tuple(TCodeGenOptLevel::O2, false),
+  std::make_tuple(TCodeGenOptLevel::O3, false)));
 }
 
 int main(int argc, char **argv) {
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index b3ce78cd1..424142527 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -242,6 +242,8 @@ LlvmCodeGen::LlvmCodeGen(FragmentState* state, ObjectPool* 
pool,
       ASYNC_CODEGEN_THREAD_COUNTERS_PREFIX);
   num_functions_ = ADD_COUNTER(profile_, "NumFunctions", TUnit::UNIT);
   num_instructions_ = ADD_COUNTER(profile_, "NumInstructions", TUnit::UNIT);
+  num_opt_functions_ = ADD_COUNTER(profile_, "NumOptimizedFunctions", 
TUnit::UNIT);
+  num_opt_instructions_ = ADD_COUNTER(profile_, "NumOptimizedInstructions", 
TUnit::UNIT);
   num_cached_functions_ = ADD_COUNTER(profile_, "NumCachedFunctions", 
TUnit::UNIT);
   llvm_thread_counters_ = ADD_THREAD_COUNTERS(profile_, "Codegen");
 }
@@ -1184,14 +1186,24 @@ bool LlvmCodeGen::LookupCache(CodeGenCacheKey& 
cache_key) {
       return false;
     }
 
+    if (entry.opt_level < state_->query_options().codegen_opt_level) {
+      // Requested optimization level is higher than cached entry, so treat as 
a miss.
+      VLOG(2) << "Overwriting codegen cache entry at " << entry.opt_level
+          << " with optimization level " << 
state_->query_options().codegen_opt_level;
+      cache->IncHitOrMissCount(/*hit*/ false);
+      return false;
+    }
+
     const bool fn_pointers_set_from_cache = SetFunctionPointers(cache, 
&cache_key);
     if (!fn_pointers_set_from_cache) return false;
 
     // Because we cache the entire execution engine, the cached number of 
functions should
-    // be the same as the total function number.
-    COUNTER_SET(num_cached_functions_, entry.num_functions);
+    // be the same as the total optimized function number.
+    COUNTER_SET(num_cached_functions_, entry.num_opt_functions);
     COUNTER_SET(num_functions_, entry.num_functions);
     COUNTER_SET(num_instructions_, entry.num_instructions);
+    COUNTER_SET(num_opt_functions_, entry.num_opt_functions);
+    COUNTER_SET(num_opt_instructions_, entry.num_opt_instructions);
   }
   cache->IncHitOrMissCount(/*hit*/ entry_exists);
   return entry_exists;
@@ -1219,7 +1231,8 @@ void LlvmCodeGen::GenerateFunctionNamesHashCode() {
 Status LlvmCodeGen::StoreCache(CodeGenCacheKey& cache_key) {
   DCHECK(!cache_key.empty());
   Status store_status = ExecEnv::GetInstance()->codegen_cache()->Store(
-      cache_key, this, state_->query_options().codegen_cache_mode);
+      cache_key, this, state_->query_options().codegen_cache_mode,
+      state_->query_options().codegen_opt_level);
   LOG(INFO) << DebugCacheEntryString(cache_key, false /*is_lookup*/,
       
CodeGenCacheModeAnalyzer::is_debug(state_->query_options().codegen_cache_mode),
       store_status.ok());
@@ -1313,9 +1326,22 @@ Status LlvmCodeGen::FinalizeModule() {
     if (LookupCache(cache_key)) return Status::OK();
   }
 
+  // Update counters before final optimization, but after removing unused 
functions. This
+  // gives us a rough measure of how much work the optimization and 
compilation must do.
+  // If found in cache, counters will be restored from the cache entry.
+  InstructionCounter counter;
+  counter.visit(*module_);
+  COUNTER_SET(num_functions_, 
counter.GetCount(InstructionCounter::TOTAL_FUNCTIONS));
+  COUNTER_SET(num_instructions_, 
counter.GetCount(InstructionCounter::TOTAL_INSTS));
+
   if (optimizations_enabled_ && !FLAGS_disable_optimization_passes) {
     RETURN_IF_ERROR(OptimizeModule());
+    counter.ResetCount();
+    counter.visit(*module_);
   }
+  COUNTER_SET(num_opt_functions_,
+      counter.GetCount(InstructionCounter::TOTAL_FUNCTIONS));
+  COUNTER_SET(num_opt_instructions_, 
counter.GetCount(InstructionCounter::TOTAL_INSTS));
 
   if (FLAGS_opt_module_dir.size() != 0) {
     string path = FLAGS_opt_module_dir + "/" + id_ + "_opt.ll";
@@ -1404,18 +1430,30 @@ Status LlvmCodeGen::OptimizeModule() {
   pass_builder.registerLoopAnalyses(LAM);
   pass_builder.crossRegisterProxies(LAM, FAM, CGAM, MAM);
 
-  llvm::ModulePassManager pass_manager = 
pass_builder.buildPerModuleDefaultPipeline(
-      llvm::PassBuilder::OptimizationLevel::O2);
-
-  // Update counters before final optimization, but after removing unused 
functions. This
-  // gives us a rough measure of how much work the optimization and 
compilation must do.
-  InstructionCounter counter;
-  counter.visit(*module_);
-  COUNTER_SET(num_functions_, 
counter.GetCount(InstructionCounter::TOTAL_FUNCTIONS));
-  COUNTER_SET(num_instructions_, 
counter.GetCount(InstructionCounter::TOTAL_INSTS));
+  TCodeGenOptLevel::type opt_level = state_->query_options().codegen_opt_level;
+  llvm::PassBuilder::OptimizationLevel opt;
+  // GCC's -Werror=switch errors if a case is not covered.
+  switch (opt_level) {
+    case TCodeGenOptLevel::O0:
+      // Default optimization pipeline requires O1 or greater, so for O0 we 
skip.
+      return Status::OK();
+    case TCodeGenOptLevel::O1:
+      opt = llvm::PassBuilder::OptimizationLevel::O1;
+      break;
+    case TCodeGenOptLevel::Os:
+      opt = llvm::PassBuilder::OptimizationLevel::Os;
+      break;
+    case TCodeGenOptLevel::O2:
+      opt = llvm::PassBuilder::OptimizationLevel::O2;
+      break;
+    case TCodeGenOptLevel::O3:
+      opt = llvm::PassBuilder::OptimizationLevel::O3;
+      break;
+  }
+  llvm::ModulePassManager pass_manager = 
pass_builder.buildPerModuleDefaultPipeline(opt);
 
   int64_t estimated_memory = ESTIMATED_OPTIMIZER_BYTES_PER_INST
-      * counter.GetCount(InstructionCounter::TOTAL_INSTS);
+      * num_instructions_->value();
   if (!mem_tracker_->TryConsume(estimated_memory)) {
     const string& msg = Substitute(
         "Codegen failed to reserve '$0' bytes for optimization", 
estimated_memory);
diff --git a/be/src/codegen/llvm-codegen.h b/be/src/codegen/llvm-codegen.h
index 5d05e4857..23d170525 100644
--- a/be/src/codegen/llvm-codegen.h
+++ b/be/src/codegen/llvm-codegen.h
@@ -647,6 +647,7 @@ class LlvmCodeGen {
   friend class LlvmCodeGenTest;
   friend class LlvmCodeGenTest_CpuAttrWhitelist_Test;
   friend class LlvmCodeGenTest_HashTest_Test;
+  friend class LlvmOptTest;
   friend class SubExprElimination;
   friend class CodeGenCache;
   friend class LlvmCodeGenCacheTest;
@@ -877,6 +878,10 @@ class LlvmCodeGen {
   RuntimeProfile::Counter* num_functions_;
   RuntimeProfile::Counter* num_instructions_;
 
+  /// Number of instructions after optimization.
+  RuntimeProfile::Counter* num_opt_functions_;
+  RuntimeProfile::Counter* num_opt_instructions_;
+
   /// Number of functions that are used and cached.
   RuntimeProfile::Counter* num_cached_functions_;
 
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index 9f54083a9..7949e6ec6 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -1156,6 +1156,13 @@ Status impala::SetQueryOption(const string& key, const 
string& value,
         
query_options->__set_hdfs_scanner_non_reserved_bytes(mem_spec_val.value);
         break;
       };
+      case TImpalaQueryOptions::CODEGEN_OPT_LEVEL: {
+        TCodeGenOptLevel::type enum_type;
+        RETURN_IF_ERROR(GetThriftEnum(
+            value, "CodeGen Opt Level", _TCodeGenOptLevel_VALUES_TO_NAMES, 
&enum_type));
+        query_options->__set_codegen_opt_level(enum_type);
+        break;
+      }
       default:
         if (IsRemovedQueryOption(key)) {
           LOG(WARNING) << "Ignoring attempt to set removed query option '" << 
key << "'";
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index 5ac047b63..68337d907 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -50,7 +50,7 @@ typedef std::unordered_map<string, 
beeswax::TQueryOptionLevel::type>
 // time we add or remove a query option to/from the enum TImpalaQueryOptions.
 #define QUERY_OPTS_TABLE                                                       
          \
   DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(),                       
          \
-      TImpalaQueryOptions::HDFS_SCANNER_NON_RESERVED_BYTES + 1);               
          \
+      TImpalaQueryOptions::CODEGEN_OPT_LEVEL + 1);                             
          \
   REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, 
ABORT_ON_DEFAULT_LIMIT_EXCEEDED) \
   QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR)     
          \
   REMOVED_QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS)   
          \
@@ -310,6 +310,7 @@ typedef std::unordered_map<string, 
beeswax::TQueryOptionLevel::type>
       ICEBERG_PREDICATE_PUSHDOWN_SUBSETTING, TQueryOptionLevel::DEVELOPMENT)   
          \
   QUERY_OPT_FN(hdfs_scanner_non_reserved_bytes, 
HDFS_SCANNER_NON_RESERVED_BYTES,         \
       TQueryOptionLevel::ADVANCED)                                             
          \
+  QUERY_OPT_FN(codegen_opt_level, CODEGEN_OPT_LEVEL, 
TQueryOptionLevel::ADVANCED)        \
   ;
 
 /// Enforce practical limits on some query options to avoid undesired query 
state.
diff --git a/common/thrift/ImpalaService.thrift 
b/common/thrift/ImpalaService.thrift
index 6323d2510..83e37f629 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -854,6 +854,10 @@ enum TImpalaQueryOptions {
   // value, the value of flag hdfs_scanner_thread_max_estimated_bytes will be 
used
   // (which defaults to 32MB). The default value of this option is -1 (not 
set).
   HDFS_SCANNER_NON_RESERVED_BYTES = 166
+
+  // Select codegen optimization level from O0, O1, Os, O2, or O3. Higher 
levels will
+  // overwrite existing codegen cache entries.
+  CODEGEN_OPT_LEVEL = 167
 }
 
 // The summary of a DML statement.
diff --git a/common/thrift/Query.thrift b/common/thrift/Query.thrift
index bc51123ab..38d19019f 100644
--- a/common/thrift/Query.thrift
+++ b/common/thrift/Query.thrift
@@ -108,6 +108,14 @@ enum TParquetBloomFilterWrite {
   ALWAYS
 }
 
+enum TCodeGenOptLevel {
+  O0,
+  O1,
+  Os,
+  O2,
+  O3
+}
+
 // constants for TQueryOptions.num_nodes
 const i32 NUM_NODES_ALL = 0
 const i32 NUM_NODES_ALL_RACKS = -1
@@ -666,6 +674,9 @@ struct TQueryOptions {
 
   // See comment in ImpalaService.thrift
   167: optional i64 hdfs_scanner_non_reserved_bytes = -1
+
+  // See comment in ImpalaService.thrift
+  168: optional TCodeGenOptLevel codegen_opt_level = TCodeGenOptLevel.O2
 }
 
 // Impala currently has three types of sessions: Beeswax, HiveServer2 and 
external
diff --git a/testdata/llvm/test-opt.cc b/testdata/llvm/test-opt.cc
new file mode 100644
index 000000000..1878d60d1
--- /dev/null
+++ b/testdata/llvm/test-opt.cc
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This is a test-only file.  It was used to generate test-opt.bc
+// which is used by the unit test to test optimization passes.
+
+static const int m = 1;
+
+static int transform(int d) {
+  return d + m;
+}
+
+int loop_null(int* data, int num) {
+  int res = 0;
+  for (int i = 0; i < num; i++) {
+    // Expected to be optimized out.
+    if (i % 2 == 0) {
+      res += transform(data[i]);
+    }
+    if (i % 2 == 0) {
+      res -= transform(data[i]);
+    }
+  }
+  return res;
+}

Reply via email to