(impala) 03/03: IMPALA-13186: Tag query option scope for tuple cache

joemcdonnell Wed, 09 Oct 2024 10:22:01 -0700

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


commit b6b953b48e05b64e7f0c9d1cb2623148671ffce2
Author: Michael Smith <[email protected]>
AuthorDate: Wed Aug 7 13:36:06 2024 -0700

    IMPALA-13186: Tag query option scope for tuple cache
    
    Constructs a hash of non-default query options that are relevant to
    query results; by default query options are included in the hash.
    Passes this hash to the frontend for inclusion in the tuple cache key
    on plan leaf nodes (which will be included in parent key calculation).
    
    Modifies MurmurHash3 to be re-entrant so the backend can construct a
    hash incrementally. This is slightly slower but more memory efficient
    than accumulating all hash inputs in a contiguous array first.
    
    Adds TUPLE_CACHE_EXEMPT_QUERY_OPT_FN to mark query options that can be
    ignored when calculating a tuple cache hash.
    
    Adds startup flag 'tuple_cache_exempt_query_options' as a safety valve
    for query options that might be important to exempt that we missed.
    
    Removes duplicate printing logic for query options from child-query.cc
    in favor of re-using TQueryOptionsToMap, which does the same thing.
    
    Cleans up query-options.cc helpers so they're static and reduces
    duplicate printing logic.
    
    Adds test that different values for a relevant query option use
    different cache entries. Adds startup flag
    'tuple_cache_ignore_query_options' to omit query options for testing
    certain tuple cache failure modes, where we need to use debug actions.
    
    Change-Id: I1f4802ad9548749cd43df8848b6f46dca3739ae7
    Reviewed-on: http://gerrit.cloudera.org:8080/21698
    Reviewed-by: Joe McDonnell <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/service/child-query.cc                      |  36 +-----
 be/src/service/impala-server.cc                    |   2 +
 be/src/service/query-options.cc                    | 130 +++++++++++++++------
 be/src/service/query-options.h                     | 102 +++++++++-------
 be/src/thirdparty/datasketches/MurmurHash3.h       |  13 ++-
 common/thrift/ImpalaService.thrift                 |   5 +-
 common/thrift/Query.thrift                         |   9 ++
 .../java/org/apache/impala/planner/PlanNode.java   |  12 +-
 .../apache/impala/planner/TupleCachePlanner.java   |   3 +-
 tests/custom_cluster/test_tuple_cache.py           |  87 +++++++++++++-
 10 files changed, 276 insertions(+), 123 deletions(-)

diff --git a/be/src/service/child-query.cc b/be/src/service/child-query.cc
index a1eb77cd3..78c94a01e 100644
--- a/be/src/service/child-query.cc
+++ b/be/src/service/child-query.cc
@@ -126,43 +126,9 @@ Status ChildQuery::ExecAndFetch() {
   return status;
 }
 
-template <typename T>
-void PrintQueryOptionValue (const T& option, stringstream& val) {
-  val << option;
-}
-
-void PrintQueryOptionValue(const impala::TCompressionCodec& compression_codec,
-    stringstream& val) {
-  if (compression_codec.codec != THdfsCompression::ZSTD) {
-    val << compression_codec.codec;
-  } else {
-    val << compression_codec.codec << ":" << 
compression_codec.compression_level;
-  }
-}
-
-void PrintQueryOptionValue(const set<impala::TRuntimeFilterType::type>& 
filter_types,
-    stringstream& val) {
-  val << filter_types;
-}
-
-void PrintQueryOptionValue(const std::set<int32_t>& filter_ids, stringstream& 
val) {
-  val << filter_ids;
-}
-
 void ChildQuery::SetQueryOptions(TExecuteStatementReq* exec_stmt_req) {
   map<string, string> conf;
-  const TQueryOptions& parent_options =
-      parent_request_state_->exec_request().query_options;
-#define QUERY_OPT_FN(NAME, ENUM, LEVEL)\
-  if (parent_options.__isset.NAME) {\
-    stringstream val;\
-    PrintQueryOptionValue(parent_options.NAME, val);\
-    conf[#ENUM] = val.str();\
-  }
-#define REMOVED_QUERY_OPT_FN(NAME, ENUM)
-  QUERY_OPTS_TABLE
-#undef QUERY_OPT_FN
-#undef REMOVED_QUERY_OPT_FN
+  TQueryOptionsToMap(parent_request_state_->exec_request().query_options, 
&conf);
   // Ignore debug actions on child queries because they may cause deadlock.
   map<string, string>::iterator it = conf.find("DEBUG_ACTION");
   if (it != conf.end()) conf.erase(it);
diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc
index 88ea712ab..073610ba6 100644
--- a/be/src/service/impala-server.cc
+++ b/be/src/service/impala-server.cc
@@ -1404,6 +1404,8 @@ void ImpalaServer::PrepareQueryContext(const std::string& 
hostname,
   query_ctx->__set_status_report_interval_ms(FLAGS_status_report_interval_ms);
   query_ctx->__set_status_report_max_retry_s(FLAGS_status_report_max_retry_s);
   query_ctx->__set_gen_aggregated_profile(FLAGS_gen_experimental_profile);
+  query_ctx->__set_query_options_result_hash(
+      QueryOptionsResultHash(query_ctx->client_request.query_options));
 
   // Creating a random_generator every time is not free, but
   // benchmarks show it to be slightly cheaper than contending for a
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index 63a34cb3a..beb104893 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -21,8 +21,10 @@
 #include <regex>
 #include <sstream>
 #include <string>
+#include <unordered_set>
 
 #include <boost/algorithm/string.hpp>
+#include <gutil/strings/split.h>
 #include <gutil/strings/strip.h>
 #include <gutil/strings/substitute.h>
 
@@ -31,6 +33,7 @@
 #include "gen-cpp/Query_constants.h"
 #include "runtime/runtime-filter.h"
 #include "service/query-option-parser.h"
+#include "thirdparty/datasketches/MurmurHash3.h"
 #include "util/debug-util.h"
 #include "util/parse-util.h"
 
@@ -46,12 +49,24 @@ using std::to_string;
 using namespace impala;
 using namespace strings;
 
+DEFINE_bool_hidden(tuple_cache_ignore_query_options, false,
+    "If true, don't compute TQueryOptionsHash for tuple caching to allow 
testing tuple "
+    "caching failure modes.");
+
+DEFINE_string_hidden(tuple_cache_exempt_query_options, "",
+    "A comma-separated list of additional query options to exclude from the 
tuple cache "
+    "key. Option names must be lower-case.");
+DEFINE_validator(tuple_cache_exempt_query_options, [](const char* name,
+    const string& val) { return none_of(val.begin(), val.end(), isupper); });
+
 DECLARE_int32(idle_session_timeout);
 DECLARE_bool(allow_tuple_caching);
 
+#define TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(NAME, ENUM, LEVEL) QUERY_OPT_FN(NAME, 
ENUM, LEVEL)
+
 void impala::OverlayQueryOptions(
     const TQueryOptions& src, const QueryOptionsMask& mask, TQueryOptions* 
dst) {
-  DCHECK_GT(mask.size(), _TImpalaQueryOptions_VALUES_TO_NAMES.size())
+  DCHECK_GE(mask.size(), _TImpalaQueryOptions_VALUES_TO_NAMES.size())
       << "Size of QueryOptionsMask must be increased.";
 #define QUERY_OPT_FN(NAME, ENUM, LEVEL) \
   if (src.__isset.NAME && mask[TImpalaQueryOptions::ENUM]) 
dst->__set_##NAME(src.NAME);
@@ -63,20 +78,20 @@ void impala::OverlayQueryOptions(
 
 // Choose different print function based on the type.
 template <typename T, typename std::enable_if_t<std::is_enum<T>::value>* = 
nullptr>
-string PrintQueryOptionValue(const T& option) {
+static string PrintQueryOptionValue(const T& option) {
   return PrintValue(option);
 }
 
 template <typename T, typename std::enable_if_t<std::is_arithmetic<T>::value>* 
= nullptr>
-string PrintQueryOptionValue(const T& option) {
+static string PrintQueryOptionValue(const T& option) {
   return std::to_string(option);
 }
 
-const string& PrintQueryOptionValue(const std::string& option) {
+static const string& PrintQueryOptionValue(const string& option) {
   return option;
 }
 
-const string PrintQueryOptionValue(const impala::TCompressionCodec& 
compression_codec) {
+static string PrintQueryOptionValue(const impala::TCompressionCodec& 
compression_codec) {
   if (compression_codec.codec != THdfsCompression::ZSTD) {
     return Substitute("$0", PrintValue(compression_codec.codec));
   } else {
@@ -85,10 +100,10 @@ const string PrintQueryOptionValue(const 
impala::TCompressionCodec& compression_
   }
 }
 
-std::ostream& impala::operator<<(
-    std::ostream& out, const std::set<impala::TRuntimeFilterType::type>& 
filter_types) {
+template <typename T>
+static std::ostream& printSet(std::ostream& out, const std::set<T>& things) {
   bool first = true;
-  for (const auto& t : filter_types) {
+  for (const T& t : things) {
     if (!first) out << ",";
     out << t;
     first = false;
@@ -96,26 +111,19 @@ std::ostream& impala::operator<<(
   return out;
 }
 
-const string PrintQueryOptionValue(
-    const std::set<impala::TRuntimeFilterType::type>& filter_types) {
-  std::stringstream val;
-  val << filter_types;
-  return val.str();
+std::ostream& impala::operator<<(
+    std::ostream& out, const std::set<impala::TRuntimeFilterType::type>& 
filter_types) {
+  return printSet(out, filter_types);
 }
 
 std::ostream& impala::operator<<(std::ostream& out, const std::set<int32_t>& 
filter_ids) {
-  bool first = true;
-  for (const auto& t : filter_ids) {
-    if (!first) out << ",";
-    out << t;
-    first = false;
-  }
-  return out;
+  return printSet(out, filter_ids);
 }
 
-const string PrintQueryOptionValue(const std::set<int32_t>& filter_ids) {
+template <typename T>
+static string PrintQueryOptionValue(const std::set<T>& things) {
   std::stringstream val;
-  val << filter_ids;
+  val << things;
   return val.str();
 }
 
@@ -158,7 +166,7 @@ static TQueryOptions DefaultQueryOptions() {
   return defaults;
 }
 
-inline bool operator!=(const TCompressionCodec& a, const TCompressionCodec& b) 
{
+static inline bool operator!=(const TCompressionCodec& a, const 
TCompressionCodec& b) {
   return (a.codec != b.codec || a.compression_level != b.compression_level);
 }
 
@@ -179,7 +187,7 @@ string impala::DebugQueryOptions(const TQueryOptions& 
query_options) {
   return ss.str();
 }
 
-inline void TrimAndRemoveEmptyString(vector<string>& values) {
+static inline void TrimAndRemoveEmptyString(vector<string>& values) {
   int i = 0;
   while (i < values.size()) {
     trim(values[i]);
@@ -1392,7 +1400,7 @@ Status impala::ValidateQueryOptions(TQueryOptions* 
query_options) {
   return Status::OK();
 }
 
-void impala::PopulateQueryOptionLevels(QueryOptionLevels* query_option_levels){
+void impala::PopulateQueryOptionLevels(QueryOptionLevels* query_option_levels) 
{
 #define QUERY_OPT_FN(NAME, ENUM, LEVEL) \
   { (*query_option_levels)[#ENUM] = LEVEL; }
 #define REMOVED_QUERY_OPT_FN(NAME, ENUM) \
@@ -1403,19 +1411,71 @@ void 
impala::PopulateQueryOptionLevels(QueryOptionLevels* query_option_levels){
 #undef REMOVED_QUERY_OPT_FN
 }
 
+template<typename T, typename std::enable_if_t<std::is_enum<T>::value ||
+    std::is_arithmetic<T>::value>* = nullptr>
+static void HashQueryOptionValue(const T& option, HashState& hash) {
+  MurmurHash3_x64_128(&option, sizeof(option), hash);
+}
+
+static void HashQueryOptionValue(const string& option, HashState& hash) {
+  MurmurHash3_x64_128(option.c_str(), option.length(), hash);
+}
+
+static void HashQueryOptionValue(
+    const TCompressionCodec& compression_codec, HashState& hash) {
+  HashQueryOptionValue(compression_codec.codec, hash);
+  if (compression_codec.codec == THdfsCompression::ZSTD) {
+    HashQueryOptionValue(compression_codec.compression_level, hash);
+  }
+}
+
+template<typename T>
+static void HashQueryOptionValue(const std::set<T>& things, HashState& hash) {
+  for (const T& thing : things) {
+    HashQueryOptionValue(thing, hash);
+  }
+}
+
+constexpr uint64_t QUERY_OPTION_HASH_SEED = 0x9b8b4467323b23cf;
+
+TQueryOptionsHash impala::QueryOptionsResultHash(const TQueryOptions& 
query_options) {
+  if (UNLIKELY(FLAGS_tuple_cache_ignore_query_options)) return 
TQueryOptionsHash();
+
+  std::unordered_set<StringPiece> exempt;
+  if (!FLAGS_tuple_cache_exempt_query_options.empty()) {
+    exempt = Split(FLAGS_tuple_cache_exempt_query_options, ",", SkipEmpty());
+  }
+
+  HashState hash{QUERY_OPTION_HASH_SEED, QUERY_OPTION_HASH_SEED};
+#define QUERY_OPT_FN(NAME, ENUM, LEVEL) \
+  if (query_options.__isset.NAME && exempt.count(#NAME) == 0) \
+    HashQueryOptionValue(query_options.NAME, hash);
+#define REMOVED_QUERY_OPT_FN(NAME, ENUM)
+#undef TUPLE_CACHE_EXEMPT_QUERY_OPT_FN
+#define TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(NAME, ENUM, LEVEL)
+  QUERY_OPTS_TABLE
+#undef QUERY_OPT_FN
+#undef REMOVED_QUERY_OPT_FN
+#undef TUPLE_CACHE_EXEMPT_QUERY_OPT_FN
+#define TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(NAME, ENUM, LEVEL) QUERY_OPT_FN(NAME, 
ENUM, LEVEL)
+  TQueryOptionsHash thash;
+  thash.__set_hi(hash.h1);
+  thash.__set_lo(hash.h2);
+  return thash;
+}
+
 Status impala::ResetAllQueryOptions(
     TQueryOptions* query_options, QueryOptionsMask* set_query_options_mask) {
   static const TQueryOptions defaults = DefaultQueryOptions();
-#define QUERY_OPT_FN(NAME, ENUM, LEVEL)                  \
-  if (query_options->NAME != defaults.NAME) {            \
-    query_options->__isset.NAME = defaults.__isset.NAME; \
-    query_options->NAME = defaults.NAME;                 \
-    int option = GetQueryOptionForKey(#NAME);            \
-    DCHECK_GE(option, 0);                                \
-    if (set_query_options_mask != nullptr) {             \
-      DCHECK_LT(option, set_query_options_mask->size()); \
-      set_query_options_mask->reset(option);             \
-    }                                                    \
+#define QUERY_OPT_FN(NAME, ENUM, LEVEL)                           \
+  if (query_options->NAME != defaults.NAME) {                     \
+    query_options->__isset.NAME = defaults.__isset.NAME;          \
+    query_options->NAME = defaults.NAME;                          \
+    TImpalaQueryOptions::type option = TImpalaQueryOptions::ENUM; \
+    if (set_query_options_mask != nullptr) {                      \
+      DCHECK_LT(option, set_query_options_mask->size());          \
+      set_query_options_mask->reset(option);                      \
+    }                                                             \
   }
 #define REMOVED_QUERY_OPT_FN(NAME, ENUM)
   QUERY_OPTS_TABLE
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index 8104c83cd..44ddd6f48 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -15,15 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef IMPALA_SERVICE_QUERY_OPTIONS_H
-#define IMPALA_SERVICE_QUERY_OPTIONS_H
+#pragma once
 
-#include <string>
+#include <bitset>
 #include <map>
+#include <string>
 #include <unordered_map>
-#include <bitset>
 
 #include "common/status.h"
+#include "gen-cpp/Query_types.h"  // for TQueryOptionsHash
 
 /// Utility methods to process per-query options
 
@@ -50,9 +50,10 @@ typedef std::unordered_map<string, 
beeswax::TQueryOptionLevel::type>
 // option in the enum TImpalaQueryOptions (defined in ImpalaService.thrift)
 // plus one. Thus, the second argument to the DCHECK has to be updated every
 // time we add or remove a query option to/from the enum TImpalaQueryOptions.
+constexpr unsigned NUM_QUERY_OPTIONS =
+    TImpalaQueryOptions::ENABLE_TUPLE_CACHE_VERIFICATION + 1;
 #define QUERY_OPTS_TABLE                                                       
          \
-  DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(),                       
          \
-      TImpalaQueryOptions::ENABLE_TUPLE_CACHE_VERIFICATION + 1);               
          \
+  DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(), NUM_QUERY_OPTIONS);   
          \
   REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, 
ABORT_ON_DEFAULT_LIMIT_EXCEEDED) \
   QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR)     
          \
   REMOVED_QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS)   
          \
@@ -62,25 +63,30 @@ typedef std::unordered_map<string, 
beeswax::TQueryOptionLevel::type>
   REMOVED_QUERY_OPT_FN(disable_cached_reads, DISABLE_CACHED_READS)             
          \
   QUERY_OPT_FN(                                                                
          \
       disable_outermost_topn, DISABLE_OUTERMOST_TOPN, 
TQueryOptionLevel::DEVELOPMENT)    \
-  QUERY_OPT_FN(disable_codegen, DISABLE_CODEGEN, TQueryOptionLevel::REGULAR)   
          \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(disable_codegen, DISABLE_CODEGEN,            
          \
+      TQueryOptionLevel::REGULAR)                                              
          \
   QUERY_OPT_FN(explain_level, EXPLAIN_LEVEL, TQueryOptionLevel::REGULAR)       
          \
-  QUERY_OPT_FN(hbase_cache_blocks, HBASE_CACHE_BLOCKS, 
TQueryOptionLevel::ADVANCED)      \
-  QUERY_OPT_FN(hbase_caching, HBASE_CACHING, TQueryOptionLevel::ADVANCED)      
          \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(hbase_cache_blocks, HBASE_CACHE_BLOCKS,      
          \
+      TQueryOptionLevel::ADVANCED)                                             
          \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(hbase_caching, HBASE_CACHING,                
          \
+      TQueryOptionLevel::ADVANCED)                                             
          \
   QUERY_OPT_FN(max_errors, MAX_ERRORS, TQueryOptionLevel::ADVANCED)            
          \
   REMOVED_QUERY_OPT_FN(max_io_buffers, MAX_IO_BUFFERS)                         
          \
   QUERY_OPT_FN(                                                                
          \
       max_scan_range_length, MAX_SCAN_RANGE_LENGTH, 
TQueryOptionLevel::DEVELOPMENT)      \
-  QUERY_OPT_FN(mem_limit, MEM_LIMIT, TQueryOptionLevel::REGULAR)               
          \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(mem_limit, MEM_LIMIT, 
TQueryOptionLevel::REGULAR)      \
   QUERY_OPT_FN(num_nodes, NUM_NODES, TQueryOptionLevel::DEVELOPMENT)           
          \
-  QUERY_OPT_FN(num_scanner_threads, NUM_SCANNER_THREADS, 
TQueryOptionLevel::REGULAR)     \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(num_scanner_threads, NUM_SCANNER_THREADS,    
          \
+      TQueryOptionLevel::REGULAR)                                              
          \
   QUERY_OPT_FN(compression_codec, COMPRESSION_CODEC, 
TQueryOptionLevel::REGULAR)         \
   QUERY_OPT_FN(parquet_file_size, PARQUET_FILE_SIZE, 
TQueryOptionLevel::ADVANCED)        \
-  QUERY_OPT_FN(request_pool, REQUEST_POOL, TQueryOptionLevel::REGULAR)         
          \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(request_pool, REQUEST_POOL, 
TQueryOptionLevel::REGULAR)\
   REMOVED_QUERY_OPT_FN(reservation_request_timeout, 
RESERVATION_REQUEST_TIMEOUT)         \
-  QUERY_OPT_FN(sync_ddl, SYNC_DDL, TQueryOptionLevel::REGULAR)                 
          \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(sync_ddl, SYNC_DDL, 
TQueryOptionLevel::REGULAR)        \
   REMOVED_QUERY_OPT_FN(v_cpu_cores, V_CPU_CORES)                               
          \
   REMOVED_QUERY_OPT_FN(rm_initial_mem, RM_INITIAL_MEM)                         
          \
-  QUERY_OPT_FN(query_timeout_s, QUERY_TIMEOUT_S, TQueryOptionLevel::REGULAR)   
          \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(query_timeout_s, QUERY_TIMEOUT_S,            
          \
+      TQueryOptionLevel::REGULAR)                                              
          \
   QUERY_OPT_FN(buffer_pool_limit, BUFFER_POOL_LIMIT, 
TQueryOptionLevel::ADVANCED)        \
   QUERY_OPT_FN(appx_count_distinct, APPX_COUNT_DISTINCT, 
TQueryOptionLevel::ADVANCED)    \
   QUERY_OPT_FN(disable_unsafe_spills, DISABLE_UNSAFE_SPILLS, 
TQueryOptionLevel::REGULAR) \
@@ -117,9 +123,12 @@ typedef std::unordered_map<string, 
beeswax::TQueryOptionLevel::type>
       runtime_filter_max_size, RUNTIME_FILTER_MAX_SIZE, 
TQueryOptionLevel::ADVANCED)     \
   QUERY_OPT_FN(prefetch_mode, PREFETCH_MODE, TQueryOptionLevel::ADVANCED)      
          \
   QUERY_OPT_FN(strict_mode, STRICT_MODE, TQueryOptionLevel::DEVELOPMENT)       
          \
-  QUERY_OPT_FN(scratch_limit, SCRATCH_LIMIT, TQueryOptionLevel::REGULAR)       
          \
-  QUERY_OPT_FN(enable_expr_rewrites, ENABLE_EXPR_REWRITES, 
TQueryOptionLevel::ADVANCED)  \
-  QUERY_OPT_FN(enable_cnf_rewrites, ENABLE_CNF_REWRITES, 
TQueryOptionLevel::ADVANCED)    \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(scratch_limit, SCRATCH_LIMIT,                
          \
+      TQueryOptionLevel::REGULAR)                                              
          \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(enable_expr_rewrites, ENABLE_EXPR_REWRITES,  
          \
+      TQueryOptionLevel::ADVANCED)                                             
          \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(enable_cnf_rewrites, ENABLE_CNF_REWRITES,    
          \
+      TQueryOptionLevel::ADVANCED)                                             
          \
   QUERY_OPT_FN(decimal_v2, DECIMAL_V2, TQueryOptionLevel::DEVELOPMENT)         
          \
   QUERY_OPT_FN(parquet_dictionary_filtering, PARQUET_DICTIONARY_FILTERING,     
          \
       TQueryOptionLevel::ADVANCED)                                             
          \
@@ -129,14 +138,15 @@ typedef std::unordered_map<string, 
beeswax::TQueryOptionLevel::type>
       parquet_read_statistics, PARQUET_READ_STATISTICS, 
TQueryOptionLevel::ADVANCED)     \
   QUERY_OPT_FN(default_join_distribution_mode, DEFAULT_JOIN_DISTRIBUTION_MODE, 
          \
       TQueryOptionLevel::ADVANCED)                                             
          \
-  QUERY_OPT_FN(disable_codegen_rows_threshold, DISABLE_CODEGEN_ROWS_THRESHOLD, 
          \
-      TQueryOptionLevel::ADVANCED)                                             
          \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(disable_codegen_rows_threshold,              
          \
+      DISABLE_CODEGEN_ROWS_THRESHOLD, TQueryOptionLevel::ADVANCED)             
          \
   QUERY_OPT_FN(default_spillable_buffer_size, DEFAULT_SPILLABLE_BUFFER_SIZE,   
          \
       TQueryOptionLevel::ADVANCED)                                             
          \
   QUERY_OPT_FN(                                                                
          \
       min_spillable_buffer_size, MIN_SPILLABLE_BUFFER_SIZE, 
TQueryOptionLevel::ADVANCED) \
   QUERY_OPT_FN(max_row_size, MAX_ROW_SIZE, TQueryOptionLevel::REGULAR)         
          \
-  QUERY_OPT_FN(idle_session_timeout, IDLE_SESSION_TIMEOUT, 
TQueryOptionLevel::REGULAR)   \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(idle_session_timeout, IDLE_SESSION_TIMEOUT,  
          \
+      TQueryOptionLevel::REGULAR)                                              
          \
   QUERY_OPT_FN(compute_stats_min_sample_size, COMPUTE_STATS_MIN_SAMPLE_SIZE,   
          \
       TQueryOptionLevel::ADVANCED)                                             
          \
   QUERY_OPT_FN(exec_time_limit_s, EXEC_TIME_LIMIT_S, 
TQueryOptionLevel::REGULAR)         \
@@ -155,7 +165,8 @@ typedef std::unordered_map<string, 
beeswax::TQueryOptionLevel::type>
   QUERY_OPT_FN(scan_bytes_limit, SCAN_BYTES_LIMIT, 
TQueryOptionLevel::ADVANCED)          \
   QUERY_OPT_FN(cpu_limit_s, CPU_LIMIT_S, TQueryOptionLevel::DEVELOPMENT)       
          \
   QUERY_OPT_FN(topn_bytes_limit, TOPN_BYTES_LIMIT, 
TQueryOptionLevel::ADVANCED)          \
-  QUERY_OPT_FN(client_identifier, CLIENT_IDENTIFIER, 
TQueryOptionLevel::ADVANCED)        \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(client_identifier, CLIENT_IDENTIFIER,        
          \
+      TQueryOptionLevel::ADVANCED)                                             
          \
   QUERY_OPT_FN(resource_trace_ratio, RESOURCE_TRACE_RATIO, 
TQueryOptionLevel::ADVANCED)  \
   QUERY_OPT_FN(num_remote_executor_candidates, NUM_REMOTE_EXECUTOR_CANDIDATES, 
          \
       TQueryOptionLevel::ADVANCED)                                             
          \
@@ -176,36 +187,41 @@ typedef std::unordered_map<string, 
beeswax::TQueryOptionLevel::type>
       TQueryOptionLevel::REGULAR)                                              
          \
   QUERY_OPT_FN(default_hints_insert_statement, DEFAULT_HINTS_INSERT_STATEMENT, 
          \
       TQueryOptionLevel::REGULAR)                                              
          \
-  QUERY_OPT_FN(spool_query_results, SPOOL_QUERY_RESULTS, 
TQueryOptionLevel::DEVELOPMENT) \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(spool_query_results, SPOOL_QUERY_RESULTS,    
          \
+      TQueryOptionLevel::DEVELOPMENT)                                          
          \
   QUERY_OPT_FN(default_transactional_type, DEFAULT_TRANSACTIONAL_TYPE,         
          \
       TQueryOptionLevel::ADVANCED)                                             
          \
-  QUERY_OPT_FN(statement_expression_limit, STATEMENT_EXPRESSION_LIMIT,         
          \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(statement_expression_limit, 
STATEMENT_EXPRESSION_LIMIT,\
       TQueryOptionLevel::REGULAR)                                              
          \
   QUERY_OPT_FN(max_statement_length_bytes, MAX_STATEMENT_LENGTH_BYTES,         
          \
       TQueryOptionLevel::REGULAR)                                              
          \
-  QUERY_OPT_FN(disable_data_cache, DISABLE_DATA_CACHE, 
TQueryOptionLevel::ADVANCED)      \
-  QUERY_OPT_FN(                                                                
          \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(disable_data_cache, DISABLE_DATA_CACHE,      
          \
+      TQueryOptionLevel::ADVANCED)                                             
          \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(                                             
          \
       max_result_spooling_mem, MAX_RESULT_SPOOLING_MEM, 
TQueryOptionLevel::DEVELOPMENT)  \
-  QUERY_OPT_FN(max_spilled_result_spooling_mem, 
MAX_SPILLED_RESULT_SPOOLING_MEM,         \
-      TQueryOptionLevel::DEVELOPMENT)                                          
          \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(max_spilled_result_spooling_mem,             
          \
+      MAX_SPILLED_RESULT_SPOOLING_MEM, TQueryOptionLevel::DEVELOPMENT)         
          \
   QUERY_OPT_FN(disable_hbase_num_rows_estimate, 
DISABLE_HBASE_NUM_ROWS_ESTIMATE,         \
       TQueryOptionLevel::ADVANCED)                                             
          \
-  QUERY_OPT_FN(                                                                
          \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(                                             
          \
       fetch_rows_timeout_ms, FETCH_ROWS_TIMEOUT_MS, 
TQueryOptionLevel::ADVANCED)         \
   QUERY_OPT_FN(now_string, NOW_STRING, TQueryOptionLevel::DEVELOPMENT)         
          \
   QUERY_OPT_FN(parquet_object_store_split_size, 
PARQUET_OBJECT_STORE_SPLIT_SIZE,         \
       TQueryOptionLevel::ADVANCED)                                             
          \
-  QUERY_OPT_FN(mem_limit_executors, MEM_LIMIT_EXECUTORS, 
TQueryOptionLevel::ADVANCED)    \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(mem_limit_executors, MEM_LIMIT_EXECUTORS,    
          \
+      TQueryOptionLevel::ADVANCED)                                             
          \
   QUERY_OPT_FN(                                                                
          \
       broadcast_bytes_limit, BROADCAST_BYTES_LIMIT, 
TQueryOptionLevel::ADVANCED)         \
   QUERY_OPT_FN(preagg_bytes_limit, PREAGG_BYTES_LIMIT, 
TQueryOptionLevel::ADVANCED)      \
   QUERY_OPT_FN(max_cnf_exprs, MAX_CNF_EXPRS, TQueryOptionLevel::ADVANCED)      
          \
   QUERY_OPT_FN(kudu_snapshot_read_timestamp_micros, 
KUDU_SNAPSHOT_READ_TIMESTAMP_MICROS, \
       TQueryOptionLevel::ADVANCED)                                             
          \
-  QUERY_OPT_FN(retry_failed_queries, RETRY_FAILED_QUERIES, 
TQueryOptionLevel::REGULAR)   \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(retry_failed_queries, RETRY_FAILED_QUERIES,  
          \
+      TQueryOptionLevel::REGULAR)                                              
          \
   QUERY_OPT_FN(enabled_runtime_filter_types, ENABLED_RUNTIME_FILTER_TYPES,     
          \
       TQueryOptionLevel::ADVANCED)                                             
          \
-  QUERY_OPT_FN(async_codegen, ASYNC_CODEGEN, TQueryOptionLevel::DEVELOPMENT)   
          \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(async_codegen, ASYNC_CODEGEN,                
          \
+      TQueryOptionLevel::DEVELOPMENT)                                          
          \
   QUERY_OPT_FN(enable_distinct_semi_join_optimization,                         
          \
       ENABLE_DISTINCT_SEMI_JOIN_OPTIMIZATION, TQueryOptionLevel::ADVANCED)     
          \
   QUERY_OPT_FN(sort_run_bytes_limit, SORT_RUN_BYTES_LIMIT, 
TQueryOptionLevel::ADVANCED)  \
@@ -224,7 +240,8 @@ typedef std::unordered_map<string, 
beeswax::TQueryOptionLevel::type>
       ENABLE_OUTER_JOIN_TO_INNER_TRANSFORMATION, TQueryOptionLevel::ADVANCED)  
          \
   QUERY_OPT_FN(targeted_kudu_scan_range_length, 
TARGETED_KUDU_SCAN_RANGE_LENGTH,         \
       TQueryOptionLevel::ADVANCED)                                             
          \
-  QUERY_OPT_FN(report_skew_limit, REPORT_SKEW_LIMIT, 
TQueryOptionLevel::ADVANCED)        \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(report_skew_limit, REPORT_SKEW_LIMIT,        
          \
+      TQueryOptionLevel::ADVANCED)                                             
          \
   QUERY_OPT_FN(optimize_simple_limit, OPTIMIZE_SIMPLE_LIMIT, 
TQueryOptionLevel::REGULAR) \
   QUERY_OPT_FN(use_dop_for_costing, USE_DOP_FOR_COSTING, 
TQueryOptionLevel::ADVANCED)    \
   QUERY_OPT_FN(broadcast_to_partition_factor, BROADCAST_TO_PARTITION_FACTOR,   
          \
@@ -280,9 +297,10 @@ typedef std::unordered_map<string, 
beeswax::TQueryOptionLevel::type>
   QUERY_OPT_FN(expand_complex_types, EXPAND_COMPLEX_TYPES, 
TQueryOptionLevel::REGULAR)   \
   QUERY_OPT_FN(                                                                
          \
       fallback_db_for_functions, FALLBACK_DB_FOR_FUNCTIONS, 
TQueryOptionLevel::ADVANCED) \
-  QUERY_OPT_FN(                                                                
          \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(                                             
          \
       disable_codegen_cache, DISABLE_CODEGEN_CACHE, 
TQueryOptionLevel::ADVANCED)         \
-  QUERY_OPT_FN(codegen_cache_mode, CODEGEN_CACHE_MODE, 
TQueryOptionLevel::DEVELOPMENT)   \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(codegen_cache_mode, CODEGEN_CACHE_MODE,      
          \
+      TQueryOptionLevel::DEVELOPMENT)                                          
          \
   QUERY_OPT_FN(stringify_map_keys, STRINGIFY_MAP_KEYS, 
TQueryOptionLevel::ADVANCED)      \
   QUERY_OPT_FN(enable_trivial_query_for_admission, 
ENABLE_TRIVIAL_QUERY_FOR_ADMISSION,   \
       TQueryOptionLevel::REGULAR)                                              
          \
@@ -306,13 +324,14 @@ typedef std::unordered_map<string, 
beeswax::TQueryOptionLevel::type>
       large_agg_mem_threshold, LARGE_AGG_MEM_THRESHOLD, 
TQueryOptionLevel::ADVANCED)     \
   QUERY_OPT_FN(agg_mem_correlation_factor, AGG_MEM_CORRELATION_FACTOR,         
          \
       TQueryOptionLevel::ADVANCED)                                             
          \
-  QUERY_OPT_FN(mem_limit_coordinators, MEM_LIMIT_COORDINATORS,                 
          \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(mem_limit_coordinators, 
MEM_LIMIT_COORDINATORS,        \
       TQueryOptionLevel::ADVANCED)                                             
          \
   QUERY_OPT_FN(iceberg_predicate_pushdown_subsetting,                          
          \
       ICEBERG_PREDICATE_PUSHDOWN_SUBSETTING, TQueryOptionLevel::DEVELOPMENT)   
          \
   QUERY_OPT_FN(hdfs_scanner_non_reserved_bytes, 
HDFS_SCANNER_NON_RESERVED_BYTES,         \
       TQueryOptionLevel::ADVANCED)                                             
          \
-  QUERY_OPT_FN(codegen_opt_level, CODEGEN_OPT_LEVEL, 
TQueryOptionLevel::ADVANCED)        \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(codegen_opt_level, CODEGEN_OPT_LEVEL,        
          \
+      TQueryOptionLevel::ADVANCED)                                             
          \
   QUERY_OPT_FN(kudu_table_reserve_seconds, KUDU_TABLE_RESERVE_SECONDS,         
          \
       TQueryOptionLevel::ADVANCED)                                             
          \
   QUERY_OPT_FN(convert_kudu_utc_timestamps,                                    
          \
@@ -337,7 +356,8 @@ typedef std::unordered_map<string, 
beeswax::TQueryOptionLevel::type>
       WRITE_KUDU_UTC_TIMESTAMPS, TQueryOptionLevel::ADVANCED)                  
          \
   QUERY_OPT_FN(disable_optimized_json_count_star, 
DISABLE_OPTIMIZED_JSON_COUNT_STAR,     \
       TQueryOptionLevel::ADVANCED)                                             
          \
-  QUERY_OPT_FN(long_polling_time_ms, LONG_POLLING_TIME_MS, 
TQueryOptionLevel::REGULAR)   \
+  TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(long_polling_time_ms, LONG_POLLING_TIME_MS,  
          \
+      TQueryOptionLevel::REGULAR)                                              
          \
   QUERY_OPT_FN(enable_tuple_cache_verification, 
ENABLE_TUPLE_CACHE_VERIFICATION,         \
       TQueryOptionLevel::ADVANCED)                                             
          \
   ;
@@ -364,8 +384,7 @@ void TQueryOptionsToMap(const TQueryOptions& query_options,
 std::string DebugQueryOptions(const TQueryOptions& query_options);
 
 /// Bitmask for the values of TQueryOptions.
-/// TODO: Find a way to set the size based on the number of fields.
-typedef std::bitset<192> QueryOptionsMask;
+typedef std::bitset<NUM_QUERY_OPTIONS> QueryOptionsMask;
 
 /// Updates the query options in dst from those in src where the query option 
is set
 /// (i.e. src->__isset.PROPERTY is true) and the corresponding bit in mask is 
set. If
@@ -406,10 +425,11 @@ Status ParseQueryOptions(const std::string& options, 
TQueryOptions* query_option
 /// entries.
 void PopulateQueryOptionLevels(QueryOptionLevels* query_option_levels);
 
+/// Returns a hash of query option values that may modify fragment or query 
level results.
+TQueryOptionsHash QueryOptionsResultHash(const TQueryOptions& query_options);
+
 /// Reset all query options to its default value if they are not equal to 
default value.
 /// The bit corresponding to query option 'key' in set_query_options_mask is 
unset.
 Status ResetAllQueryOptions(
     TQueryOptions* query_options, QueryOptionsMask* set_query_options_mask);
 }
-
-#endif
diff --git a/be/src/thirdparty/datasketches/MurmurHash3.h 
b/be/src/thirdparty/datasketches/MurmurHash3.h
index 450fd474d..2eb8bc330 100644
--- a/be/src/thirdparty/datasketches/MurmurHash3.h
+++ b/be/src/thirdparty/datasketches/MurmurHash3.h
@@ -88,18 +88,15 @@ MURMUR3_FORCE_INLINE uint64_t fmix64 ( uint64_t k )
   return k;
 }
 
-MURMUR3_FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, 
uint64_t seed, HashState& out) {
+MURMUR3_FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, 
HashState& out) {
   static const uint64_t c1 = MURMUR3_BIG_CONSTANT(0x87c37b91114253d5);
   static const uint64_t c2 = MURMUR3_BIG_CONSTANT(0x4cf5ad432745937f);
 
   const uint8_t* data = (const uint8_t*)key;
 
-  out.h1 = seed;
-  out.h2 = seed;
-
   // Number of full 128-bit blocks of 16 bytes.
   // Possible exclusion of a remainder of up to 15 bytes.
-  const int nblocks = lenBytes >> 4; // bytes / 16 
+  const int nblocks = lenBytes >> 4; // bytes / 16
 
   // Process the 128-bit blocks (the body) into the hash
   const uint64_t* blocks = (const uint64_t*)(data);
@@ -162,6 +159,12 @@ MURMUR3_FORCE_INLINE void MurmurHash3_x64_128(const void* 
key, int lenBytes, uin
   out.h2 += out.h1;
 }
 
+MURMUR3_FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, 
uint64_t seed, HashState& out) {
+  out.h1 = seed;
+  out.h2 = seed;
+  MurmurHash3_x64_128(key, lenBytes, out);
+}
+
 //-----------------------------------------------------------------------------
 
 MURMUR3_FORCE_INLINE uint16_t compute_seed_hash(uint64_t seed) {
diff --git a/common/thrift/ImpalaService.thrift 
b/common/thrift/ImpalaService.thrift
index 2284cc011..5c70d1019 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -209,6 +209,7 @@ enum TImpalaQueryOptions {
   // If true, the planner will not generate plans with streaming 
preaggregations.
   DISABLE_STREAMING_PREAGGREGATIONS = 35
 
+  // Select between off, local, or global runtime filters.
   RUNTIME_FILTER_MODE = 36
 
   // Size (in bytes) of a runtime Bloom Filter. Will be rounded up to nearest 
power of
@@ -472,7 +473,7 @@ enum TImpalaQueryOptions {
   // The maximum amount of time, in milliseconds, a fetch rows request 
(TFetchResultsReq)
   // from the client should spend fetching results (including waiting for 
results to
   // become available and materialize). When result spooling is enabled, a 
fetch request
-  // to may read multiple RowBatches, in which case, the timeout controls how 
long the
+  // may read multiple RowBatches, in which case, the timeout controls how 
long the
   // client waits for all returned RowBatches to be produced. If the timeout 
is hit, the
   // client returns whatever rows it has already read. Defaults to 10000 
milliseconds. A
   // value of 0 causes fetch requests to wait indefinitely.
@@ -512,7 +513,7 @@ enum TImpalaQueryOptions {
   PREAGG_BYTES_LIMIT = 98
 
   // Indicates whether the FE should rewrite disjunctive predicates to 
conjunctive
-  // normal form (CNF) for optimization purposes. Default is False.
+  // normal form (CNF) for optimization purposes. Default is true.
   ENABLE_CNF_REWRITES = 99
 
   // The max number of conjunctive normal form (CNF) exprs to create when 
converting
diff --git a/common/thrift/Query.thrift b/common/thrift/Query.thrift
index bdfafa94d..d0b8355a6 100644
--- a/common/thrift/Query.thrift
+++ b/common/thrift/Query.thrift
@@ -144,6 +144,12 @@ const i32 MAX_FRAGMENT_INSTANCES_PER_NODE = 128
 // Conservative minimum size of hash table for low-cardinality aggregations.
 const i64 MIN_HASH_TBL_MEM = 10485760  // 10MB
 
+// Used to represent a 128-bit hash of query option values that are relevant 
to a cache.
+struct TQueryOptionsHash {
+  1: required i64 hi
+  2: required i64 lo
+}
+
 // Query options that correspond to ImpalaService.ImpalaQueryOptions, with 
their
 // respective defaults. Query options can be set in the following ways:
 //
@@ -927,6 +933,9 @@ struct TQueryCtx {
 
   // True if the query can be optimized for Iceberg V2 table.
   30: required bool optimize_count_star_for_iceberg_v2 = false
+
+  // 128-bit hash representing query option values that affect query results.
+  31: optional TQueryOptionsHash query_options_result_hash
 }
 
 
diff --git a/fe/src/main/java/org/apache/impala/planner/PlanNode.java 
b/fe/src/main/java/org/apache/impala/planner/PlanNode.java
index b28b9d085..c6d0e1ffc 100644
--- a/fe/src/main/java/org/apache/impala/planner/PlanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/PlanNode.java
@@ -54,6 +54,7 @@ import org.apache.impala.thrift.TPlan;
 import org.apache.impala.thrift.TPlanNode;
 import org.apache.impala.thrift.TQueryOptions;
 import org.apache.impala.thrift.TSortingOrder;
+import org.apache.impala.thrift.TQueryOptionsHash;
 import org.apache.impala.util.BitUtil;
 import org.apache.impala.util.ExprUtil;
 import org.slf4j.Logger;
@@ -1323,7 +1324,8 @@ abstract public class PlanNode extends TreeNode<PlanNode> 
{
    * This computes the cache key by hashing Thrift structures, but it only 
computes
    * the key if the node is eligible to avoid overhead.
    */
-  public void computeTupleCacheInfo(DescriptorTable descTbl) {
+  public void computeTupleCacheInfo(DescriptorTable descTbl,
+      TQueryOptionsHash queryOptsHash) {
     if (tupleCacheInfo_ != null) {
       // Already computed.
       LOG.trace("Tuple cache found for {}", this);
@@ -1335,7 +1337,7 @@ abstract public class PlanNode extends TreeNode<PlanNode> 
{
     // computing the tuple cache information is a bottom-up tree traversal,
     // so visit and merge the children before processing this node's contents
     for (PlanNode child : getChildren()) {
-      child.computeTupleCacheInfo(descTbl);
+      child.computeTupleCacheInfo(descTbl, queryOptsHash);
       if (!tupleCacheInfo_.mergeChild(child.getTupleCacheInfo())) {
         LOG.trace("{} ineligible for caching due to {}", this, child);
       }
@@ -1367,7 +1369,7 @@ abstract public class PlanNode extends TreeNode<PlanNode> 
{
       }
 
       // Build may not have been visited yet.
-      build.computeTupleCacheInfo(descTbl);
+      build.computeTupleCacheInfo(descTbl, queryOptsHash);
       if (!tupleCacheInfo_.mergeChildWithScans(build.getTupleCacheInfo())) {
         LOG.trace("{} on {} ineligible for caching due to {}", filter, this, 
build);
         tupleCacheInfo_.finalizeHash();
@@ -1383,6 +1385,10 @@ abstract public class PlanNode extends 
TreeNode<PlanNode> {
     initThrift(msg, serialCtx);
     toThrift(msg, serialCtx);
     tupleCacheInfo_.hashThrift(msg);
+    if (getChildCount() == 0 && queryOptsHash != null) {
+      // Leaf node, add query options hash.
+      tupleCacheInfo_.hashThrift(queryOptsHash);
+    }
     tupleCacheInfo_.finalizeHash();
     LOG.trace("Hash for {}: {}", this, tupleCacheInfo_.getHashTrace());
   }
diff --git a/fe/src/main/java/org/apache/impala/planner/TupleCachePlanner.java 
b/fe/src/main/java/org/apache/impala/planner/TupleCachePlanner.java
index 8b4cdd101..e5c5f894a 100644
--- a/fe/src/main/java/org/apache/impala/planner/TupleCachePlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/TupleCachePlanner.java
@@ -52,7 +52,8 @@ public class TupleCachePlanner {
     // Start at the root of the PlanNode tree
     PlanNode root = plan.get(0).getPlanRoot();
     // Step 1: Compute the TupleCacheInfo for all PlanNodes
-    root.computeTupleCacheInfo(ctx_.getRootAnalyzer().getDescTbl());
+    root.computeTupleCacheInfo(ctx_.getRootAnalyzer().getDescTbl(),
+        ctx_.getRootAnalyzer().getQueryCtx().query_options_result_hash);
 
     // Step 2: Build up the new PlanNode tree with TupleCacheNodes added
     PlanNode newRoot = buildCachingPlan(root);
diff --git a/tests/custom_cluster/test_tuple_cache.py 
b/tests/custom_cluster/test_tuple_cache.py
index 6a40331ba..6c86726fb 100644
--- a/tests/custom_cluster/test_tuple_cache.py
+++ b/tests/custom_cluster/test_tuple_cache.py
@@ -168,7 +168,8 @@ class TestTupleCache(TestTupleCacheBase):
     assertCounters(result2.runtime_profile, num_hits=0, num_halted=0, 
num_skipped=1)
 
   @CustomClusterTestSuite.with_args(
-    start_args=CACHE_START_ARGS, cluster_size=1)
+    start_args=CACHE_START_ARGS, cluster_size=1,
+    impalad_args="--tuple_cache_ignore_query_options=true")
   @pytest.mark.execute_serially
   def test_failpoints(self, vector, unique_database):
     fq_table = "{0}.failpoints".format(unique_database)
@@ -368,6 +369,90 @@ class TestTupleCache(TestTupleCacheBase):
     assert base_cache_keys == reload_cache_keys
     # Skips verifying cache hits as fragments may not be assigned to the same 
nodes.
 
+  @CustomClusterTestSuite.with_args(start_args=CACHE_START_ARGS, 
cluster_size=1)
+  @pytest.mark.execute_serially
+  def test_non_exempt_query_options(self, vector, unique_database):
+    """Non-exempt query options result in different cache entries"""
+    fq_table = "{0}.query_options".format(unique_database)
+    self.create_table(fq_table)
+    query = "SELECT * from {0}".format(fq_table)
+
+    strict_true = dict(vector.get_value('exec_option'))
+    strict_true['strict_mode'] = 'true'
+    strict_false = dict(vector.get_value('exec_option'))
+    strict_false['strict_mode'] = 'false'
+
+    noexempt1 = self.execute_query(query, query_options=strict_false)
+    noexempt2 = self.execute_query(query, query_options=strict_true)
+    noexempt3 = self.execute_query(query, query_options=strict_false)
+    noexempt4 = self.execute_query(query, query_options=strict_true)
+    noexempt5 = self.execute_query(query, 
query_options=vector.get_value('exec_option'))
+
+    assert noexempt1.success
+    assert noexempt2.success
+    assert noexempt3.success
+    assert noexempt4.success
+    assert noexempt5.success
+    assert noexempt1.data == noexempt2.data
+    assert noexempt1.data == noexempt3.data
+    assert noexempt1.data == noexempt4.data
+    assert noexempt1.data == noexempt5.data
+    assertCounters(noexempt1.runtime_profile, num_hits=0, num_halted=0, 
num_skipped=0)
+    assertCounters(noexempt2.runtime_profile, num_hits=0, num_halted=0, 
num_skipped=0)
+    assertCounters(noexempt3.runtime_profile, num_hits=1, num_halted=0, 
num_skipped=0)
+    assertCounters(noexempt4.runtime_profile, num_hits=1, num_halted=0, 
num_skipped=0)
+    assertCounters(noexempt5.runtime_profile, num_hits=1, num_halted=0, 
num_skipped=0)
+
+  @CustomClusterTestSuite.with_args(start_args=CACHE_START_ARGS, 
cluster_size=1)
+  @pytest.mark.execute_serially
+  def test_exempt_query_options(self, vector, unique_database):
+    """Exempt query options share cache entry"""
+    fq_table = "{0}.query_options".format(unique_database)
+    self.create_table(fq_table)
+    query = "SELECT * from {0}".format(fq_table)
+
+    codegen_false = dict(vector.get_value('exec_option'))
+    codegen_false['disable_codegen'] = 'true'
+    codegen_true = dict(vector.get_value('exec_option'))
+    codegen_true['disable_codegen'] = 'false'
+
+    exempt1 = self.execute_query(query, query_options=codegen_true)
+    exempt2 = self.execute_query(query, query_options=codegen_false)
+    exempt3 = self.execute_query(query, 
query_options=vector.get_value('exec_option'))
+    assert exempt1.success
+    assert exempt2.success
+    assert exempt1.data == exempt2.data
+    assert exempt1.data == exempt3.data
+    assertCounters(exempt1.runtime_profile, num_hits=0, num_halted=0, 
num_skipped=0)
+    assertCounters(exempt2.runtime_profile, num_hits=1, num_halted=0, 
num_skipped=0)
+    assertCounters(exempt3.runtime_profile, num_hits=1, num_halted=0, 
num_skipped=0)
+
+  @CustomClusterTestSuite.with_args(
+      start_args=CACHE_START_ARGS, cluster_size=1,
+      
impalad_args='--tuple_cache_exempt_query_options=max_errors,exec_time_limit_s')
+  @pytest.mark.execute_serially
+  def test_custom_exempt_query_options(self, vector, unique_database):
+    """Custom list of exempt query options share cache entry"""
+    fq_table = "{0}.query_options".format(unique_database)
+    self.create_table(fq_table)
+    query = "SELECT * from {0}".format(fq_table)
+
+    errors_10 = dict(vector.get_value('exec_option'))
+    errors_10['max_errors'] = '10'
+    exec_time_limit = dict(vector.get_value('exec_option'))
+    exec_time_limit['exec_time_limit_s'] = '30'
+
+    exempt1 = self.execute_query(query, query_options=errors_10)
+    exempt2 = self.execute_query(query, query_options=exec_time_limit)
+    exempt3 = self.execute_query(query, 
query_options=vector.get_value('exec_option'))
+    assert exempt1.success
+    assert exempt2.success
+    assert exempt1.data == exempt2.data
+    assert exempt1.data == exempt3.data
+    assertCounters(exempt1.runtime_profile, num_hits=0, num_halted=0, 
num_skipped=0)
+    assertCounters(exempt2.runtime_profile, num_hits=1, num_halted=0, 
num_skipped=0)
+    assertCounters(exempt3.runtime_profile, num_hits=1, num_halted=0, 
num_skipped=0)
+
 
 class TestTupleCacheRuntimeKeysBasic(TestTupleCacheBase):
   """Simpler tests that run on a single node with mt_dop=0 or mt_dop=1."""

(impala) 03/03: IMPALA-13186: Tag query option scope for tuple cache

Reply via email to