This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 3b0705ba63c7eb6910829b5a7a7feb8f6d8e5607
Author: Michael Smith <[email protected]>
AuthorDate: Mon May 8 13:11:55 2023 -0700

    IMPALA-11941: Support Java 17 in Impala
    
    Enables building for Java 17 - and particularly using Java 17 in
    containers - but won't run a minicluster fully with Java 17 as some
    projects (Hadoop) don't yet support it.
    
    Starting with Java 15, ehcache.sizeof encounters
    UnsupportedOperationException: can't get field offset on a hidden class
    in class members pointing to capturing lambda functions. Java 17 also
    introduces new modules that need to be added to add-opens. Both of these
    pose problems for continued use of ehcache.
    
    Adds https://github.com/jbellis/jamm as a new cache weigher for Java
    15+. We build from HEAD as an external project until Java 17 support is
    released (https://github.com/jbellis/jamm/issues/44). Adds the
    'java_weigher' option to select 'sizeof' or 'jamm'; defaults to 'auto',
    which uses jamm for Java 15+ and sizeof for everything else. Also adds
    metrics for viewing cache weight results.
    
    Adds JAVA_HOME/lib/server to LD_LIBRARY_PATH in run-jvm-binary to
    simplify switching between JDK versions for testing. You can now
    - export IMPALA_JDK_VERSION=11
    - source bin/impala-config.sh
    - start-impala-cluster.py
    and have Impala running a different JDK (11) version.
    
    Retains add-opens calls that are still necessary due to dependencies'
    use of lambdas for jamm, and all others for ehcache. Add-opens are still
    required as a fallback, as noted in
    https://github.com/jbellis/jamm#object-graph-crawling. We catch the
    exceptions jamm and ehcache throw - CannotAccessFieldException,
    UnsupportedOperationException - to avoid crashing Impala, and add it to
    the list of banned log messages (as we should add-opens when we find
    them).
    
    Testing:
    - container test run with Java 11 and 17 (excludes custom cluster)
    - manual custom_cluster/test_local_catalog.py +
      test_banned_log_messages.py run with Java 11 and 17 (Java 8 build)
    - full Java 11 build (passed except IMPALA-12184)
    - add test catalog cache entry size metrics fit reasonable bounds
    - add unit test for utility to find jamm jar file in classpath
    
    Change-Id: Ic378896f572e030a3a019646a96a32a07866a737
    Reviewed-on: http://gerrit.cloudera.org:8080/19863
    Reviewed-by: Michael Smith <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/common/init.cc                              | 152 +++++++++++++++------
 be/src/service/impala-server.cc                    |   6 +-
 be/src/util/backend-gflag-util.cc                  |   2 +
 be/src/util/filesystem-util-test.cc                |  53 +++++++
 be/src/util/filesystem-util.cc                     |  20 +++
 be/src/util/filesystem-util.h                      |   9 ++
 be/src/util/impalad-metrics.cc                     |  10 ++
 be/src/util/impalad-metrics.h                      |   8 ++
 bin/impala-config-java.sh                          |   7 +-
 bin/impala-config.sh                               |  15 +-
 .../dockerized-impala-bootstrap-and-test.sh        |   2 +-
 bin/jenkins/dockerized-impala-run-tests.sh         |  13 +-
 bin/rat_exclude_files.txt                          |   1 +
 bin/run-all-tests.sh                               |   7 +-
 bin/run-jvm-binary.sh                              |   2 +
 bin/start-impala-cluster.py                        |   4 +-
 common/thrift/BackendGflags.thrift                 |   2 +
 common/thrift/Frontend.thrift                      |   2 +
 common/thrift/metrics.json                         |  20 +++
 docker/CMakeLists.txt                              |  12 ++
 docker/daemon_entrypoint.sh                        |  22 +--
 fe/pom.xml                                         |  12 +-
 .../org/apache/impala/catalog/FeCatalogUtils.java  |   5 +
 .../impala/catalog/local/CatalogdMetaProvider.java |  60 +++++++-
 .../org/apache/impala/service/BackendConfig.java   |   4 +
 .../catalog/local/CatalogdMetaProviderTest.java    |   6 +
 java/.gitignore                                    |   1 +
 java/CMakeLists.txt                                |  14 +-
 java/toolchains.xml.tmpl                           |  35 +++++
 tests/custom_cluster/test_local_catalog.py         |   8 ++
 tests/verifiers/test_banned_log_messages.py        |  15 +-
 31 files changed, 441 insertions(+), 88 deletions(-)

diff --git a/be/src/common/init.cc b/be/src/common/init.cc
index 6671c3eb9..db76f46eb 100644
--- a/be/src/common/init.cc
+++ b/be/src/common/init.cc
@@ -45,6 +45,7 @@
 #include "util/cpu-info.h"
 #include "util/debug-util.h"
 #include "util/disk-info.h"
+#include "util/filesystem-util.h"
 #include "util/jni-util.h"
 #include "util/logging-support.h"
 #include "util/mem-info.h"
@@ -66,6 +67,7 @@
 #include "common/names.h"
 
 using namespace impala;
+namespace filesystem = boost::filesystem;
 
 DECLARE_bool(enable_process_lifetime_heap_profiling);
 DECLARE_string(heap_profile_dir);
@@ -101,6 +103,11 @@ DEFINE_bool(jvm_automatic_add_opens, true,
     "Adds necessary --add-opens options for core Java modules necessary to 
correctly "
     "calculate catalog metadata cache object sizes.");
 
+DEFINE_string_hidden(java_weigher, "auto",
+    "Choose between 'jamm' (com.github.jbellis:jamm) and 'sizeof' 
(org.ehcache:sizeof) "
+    "weighers for determining catalog metadata cache entry size. 'auto' uses 
'sizeof' "
+    "for Java 8 - 11, and 'jamm' for Java 15+.");
+
 // Defined by glog. This allows users to specify the log level using a glob. 
For
 // example -vmodule=*scanner*=3 would enable full logging for scanners. If 
redaction
 // is enabled, this option won't be allowed because some logging dumps table 
data
@@ -304,24 +311,21 @@ void BlockImpalaShutdownSignal() {
   AbortIfError(pthread_sigmask(SIG_BLOCK, &signals, nullptr), error_msg);
 }
 
-// Append add-opens args to JAVA_TOOL_OPTIONS for ehcache.
-static Status JavaAddOpens() {
-  if (!FLAGS_jvm_automatic_add_opens) return Status::OK();
-
-  // Unknown flags in JAVA_TOOL_OPTIONS causes Java to ignore everything, so 
only include
-  // --add-opens for Java 9+. Uses JAVA_HOME if set, otherwise relies on PATH.
+// Returns Java major version, such as 8, 11, or 17.
+static int GetJavaMajorVersion() {
   string cmd = "java";
   const char* java_home = getenv("JAVA_HOME");
   if (java_home != NULL) {
-    cmd = (boost::filesystem::path(java_home) / "bin" / "java").string();
+    cmd = (filesystem::path(java_home) / "bin" / "java").string();
   }
   cmd += " -version 2>&1";
   string msg;
   if (!RunShellProcess(cmd, &msg, false, {"JAVA_TOOL_OPTIONS"})) {
-    return Status(msg);
+    LOG(INFO) << Substitute("Unable to determine Java version (default to 8): 
$0", msg);
+    return 8;
   }
 
-  // Find a version string in the first line. Return if major version isn't 9+.
+  // Find a version string in the first line.
   string first_line;
   std::getline(istringstream(msg), first_line);
   // Need to allow for a wide variety of formats for different JDK 
implementations.
@@ -329,10 +333,41 @@ static Status JavaAddOpens() {
   std::regex java_version_pattern("\"([0-9]{1,3})\\.[0-9]+\\.[0-9]+[^\"]*\"");
   std::smatch matches;
   if (!std::regex_search(first_line, matches, java_version_pattern)) {
-    return Status(Substitute("Could not determine Java version: $0", msg));
+    LOG(INFO) << Substitute("Unable to determine Java version (default to 8): 
$0", msg);
+    return 8;
   }
   DCHECK_EQ(matches.size(), 2);
-  if (std::stoi(matches.str(1)) < 9) return Status::OK();
+  return std::stoi(matches.str(1));
+}
+
+// Append the javaagent arg to JAVA_TOOL_OPTIONS to load jamm.
+static Status JavaAddJammAgent() {
+  stringstream val_out;
+  char* current_val_c = getenv("JAVA_TOOL_OPTIONS");
+  if (current_val_c != NULL) {
+    val_out << current_val_c << " ";
+  }
+
+  istringstream classpath {getenv("CLASSPATH")};
+  string jamm_path, test_path;
+  while (getline(classpath, test_path, ':')) {
+    jamm_path = FileSystemUtil::FindFileInPath(test_path, "jamm-.*.jar");
+    if (!jamm_path.empty()) break;
+  }
+  if (jamm_path.empty()) {
+    return Status("Could not find jamm-*.jar in Java CLASSPATH");
+  }
+  val_out << "-javaagent:" << jamm_path;
+
+  if (setenv("JAVA_TOOL_OPTIONS", val_out.str().c_str(), 1) < 0) {
+    return Status(Substitute("Could not update JAVA_TOOL_OPTIONS: $0", 
GetStrErrMsg()));
+  }
+  return Status::OK();
+}
+
+// Append add-opens args to JAVA_TOOL_OPTIONS for ehcache and jamm.
+static Status JavaAddOpens(bool useSizeOf) {
+  if (!FLAGS_jvm_automatic_add_opens) return Status::OK();
 
   stringstream val_out;
   char* current_val_c = getenv("JAVA_TOOL_OPTIONS");
@@ -341,45 +376,78 @@ static Status JavaAddOpens() {
   }
 
   for (const string& param : {
-    "--add-opens=java.base/java.io=ALL-UNNAMED",
-    "--add-opens=java.base/java.lang.module=ALL-UNNAMED",
-    "--add-opens=java.base/java.lang.ref=ALL-UNNAMED",
-    "--add-opens=java.base/java.lang.reflect=ALL-UNNAMED",
+    // Needed for jamm and ehcache
     "--add-opens=java.base/java.lang=ALL-UNNAMED",
-    "--add-opens=java.base/java.net=ALL-UNNAMED",
-    "--add-opens=java.base/java.nio.charset=ALL-UNNAMED",
-    "--add-opens=java.base/java.nio.file.attribute=ALL-UNNAMED",
     "--add-opens=java.base/java.nio=ALL-UNNAMED",
-    "--add-opens=java.base/java.security=ALL-UNNAMED",
-    "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED",
-    "--add-opens=java.base/java.util.jar=ALL-UNNAMED",
-    "--add-opens=java.base/java.util.zip=ALL-UNNAMED",
-    "--add-opens=java.base/java.util=ALL-UNNAMED",
-    "--add-opens=java.base/jdk.internal.loader=ALL-UNNAMED",
-    "--add-opens=java.base/jdk.internal.math=ALL-UNNAMED",
-    "--add-opens=java.base/jdk.internal.module=ALL-UNNAMED",
-    "--add-opens=java.base/jdk.internal.perf=ALL-UNNAMED",
-    "--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED",
-    "--add-opens=java.base/jdk.internal.reflect=ALL-UNNAMED",
-    "--add-opens=java.base/jdk.internal.util.jar=ALL-UNNAMED",
-    "--add-opens=java.base/sun.nio.fs=ALL-UNNAMED",
-    "--add-opens=jdk.dynalink/jdk.dynalink.beans=ALL-UNNAMED",
-    "--add-opens=jdk.dynalink/jdk.dynalink.linker.support=ALL-UNNAMED",
-    "--add-opens=jdk.dynalink/jdk.dynalink.linker=ALL-UNNAMED",
-    "--add-opens=jdk.dynalink/jdk.dynalink.support=ALL-UNNAMED",
-    "--add-opens=jdk.dynalink/jdk.dynalink=ALL-UNNAMED",
-    "--add-opens=jdk.management.jfr/jdk.management.jfr=ALL-UNNAMED",
-    "--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED"
+    "--add-opens=java.base/sun.nio.ch=ALL-UNNAMED"
   }) {
     val_out << " " << param;
   }
 
+  if (useSizeOf) {
+    for (const string& param : {
+      // Only needed for ehcache
+      "--add-opens=java.base/java.io=ALL-UNNAMED",
+      "--add-opens=java.base/java.lang.invoke=ALL-UNNAMED",
+      "--add-opens=java.base/java.lang.module=ALL-UNNAMED",
+      "--add-opens=java.base/java.lang.ref=ALL-UNNAMED",
+      "--add-opens=java.base/java.lang.reflect=ALL-UNNAMED",
+      "--add-opens=java.base/java.net=ALL-UNNAMED",
+      "--add-opens=java.base/java.nio.charset=ALL-UNNAMED",
+      "--add-opens=java.base/java.nio.file.attribute=ALL-UNNAMED",
+      "--add-opens=java.base/java.security=ALL-UNNAMED",
+      "--add-opens=java.base/java.util.concurrent.locks=ALL-UNNAMED",
+      "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED",
+      "--add-opens=java.base/java.util.jar=ALL-UNNAMED",
+      "--add-opens=java.base/java.util.zip=ALL-UNNAMED",
+      "--add-opens=java.base/java.util=ALL-UNNAMED",
+      "--add-opens=java.base/jdk.internal.loader=ALL-UNNAMED",
+      "--add-opens=java.base/jdk.internal.math=ALL-UNNAMED",
+      "--add-opens=java.base/jdk.internal.module=ALL-UNNAMED",
+      "--add-opens=java.base/jdk.internal.perf=ALL-UNNAMED",
+      "--add-opens=java.base/jdk.internal.platform=ALL-UNNAMED",
+      "--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED",
+      "--add-opens=java.base/jdk.internal.reflect=ALL-UNNAMED",
+      "--add-opens=java.base/jdk.internal.util.jar=ALL-UNNAMED",
+      "--add-opens=java.base/sun.net.www.protocol.jar=ALL-UNNAMED",
+      "--add-opens=java.base/sun.nio.fs=ALL-UNNAMED",
+      "--add-opens=jdk.dynalink/jdk.dynalink.beans=ALL-UNNAMED",
+      "--add-opens=jdk.dynalink/jdk.dynalink.linker.support=ALL-UNNAMED",
+      "--add-opens=jdk.dynalink/jdk.dynalink.linker=ALL-UNNAMED",
+      "--add-opens=jdk.dynalink/jdk.dynalink.support=ALL-UNNAMED",
+      "--add-opens=jdk.dynalink/jdk.dynalink=ALL-UNNAMED",
+      "--add-opens=jdk.management.jfr/jdk.management.jfr=ALL-UNNAMED",
+      "--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED"
+    }) {
+      val_out << " " << param;
+    }
+  }
+
   if (setenv("JAVA_TOOL_OPTIONS", val_out.str().c_str(), 1) < 0) {
     return Status(Substitute("Could not update JAVA_TOOL_OPTIONS: $0", 
GetStrErrMsg()));
   }
   return Status::OK();
 }
 
+static Status InitializeJavaWeigher() {
+  // This is set up so the default if things go wrong is to continue using 
ehcache.sizeof.
+  int version = GetJavaMajorVersion();
+  if (FLAGS_java_weigher == "auto") {
+    // Update for backend-gflag-util.cc setting use_jamm_weigher.
+    FLAGS_java_weigher = (version >= 15) ? "jamm" : "sizeof";
+  }
+  LOG(INFO) << "Using Java weigher " << FLAGS_java_weigher;
+
+  if (FLAGS_java_weigher == "jamm") {
+    RETURN_IF_ERROR(JavaAddJammAgent());
+  }
+  if (version >= 9) {
+    // add-opens is only supported in Java 9+.
+    RETURN_IF_ERROR(JavaAddOpens(FLAGS_java_weigher != "jamm"));
+  }
+  return Status::OK();
+}
+
 static Status JavaSetProcessName(string name) {
   string current_val;
   char* current_val_c = getenv("JAVA_TOOL_OPTIONS");
@@ -559,13 +627,9 @@ void impala::InitCommonRuntime(int argc, char** argv, bool 
init_jvm,
   if (!fs_cache_init_status.ok()) 
CLEAN_EXIT_WITH_ERROR(fs_cache_init_status.GetDetail());
 
   if (init_jvm) {
-    // Add JAVA_TOOL_OPTIONS for ehcache
-    ABORT_IF_ERROR(JavaAddOpens());
-
-    ABORT_IF_ERROR(JavaSetProcessName(
-        boost::filesystem::path(argv[0]).filename().string()));
-
     if (!external_fe) {
+      ABORT_IF_ERROR(InitializeJavaWeigher());
+      
ABORT_IF_ERROR(JavaSetProcessName(filesystem::path(argv[0]).filename().string()));
       JniUtil::InitLibhdfs();
     }
     ABORT_IF_ERROR(JniUtil::Init());
diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc
index 432a9659a..e6105a80c 100644
--- a/be/src/service/impala-server.cc
+++ b/be/src/service/impala-server.cc
@@ -1561,6 +1561,8 @@ Status ImpalaServer::UpdateCatalogMetrics() {
   DCHECK(metrics.__isset.cache_hit_rate);
   DCHECK(metrics.__isset.cache_load_exception_rate);
   DCHECK(metrics.__isset.cache_miss_rate);
+  DCHECK(metrics.__isset.cache_entry_median_size);
+  DCHECK(metrics.__isset.cache_entry_99th_size);
   
ImpaladMetrics::CATALOG_CACHE_EVICTION_COUNT->SetValue(metrics.cache_eviction_count);
   ImpaladMetrics::CATALOG_CACHE_HIT_COUNT->SetValue(metrics.cache_hit_count);
   ImpaladMetrics::CATALOG_CACHE_LOAD_COUNT->SetValue(metrics.cache_load_count);
@@ -1576,8 +1578,10 @@ Status ImpalaServer::UpdateCatalogMetrics() {
   ImpaladMetrics::CATALOG_CACHE_LOAD_EXCEPTION_RATE->SetValue(
       metrics.cache_load_exception_rate);
   ImpaladMetrics::CATALOG_CACHE_MISS_RATE->SetValue(metrics.cache_miss_rate);
+  ImpaladMetrics::CATALOG_CACHE_ENTRY_MEDIAN_SIZE->SetValue(
+      metrics.cache_entry_median_size);
+  
ImpaladMetrics::CATALOG_CACHE_ENTRY_99TH_SIZE->SetValue(metrics.cache_entry_99th_size);
   return Status::OK();
-
 }
 
 shared_ptr<QueryDriver> ImpalaServer::GetQueryDriver(
diff --git a/be/src/util/backend-gflag-util.cc 
b/be/src/util/backend-gflag-util.cc
index eda30258c..965547515 100644
--- a/be/src/util/backend-gflag-util.cc
+++ b/be/src/util/backend-gflag-util.cc
@@ -106,6 +106,7 @@ DECLARE_bool(enable_reload_events);
 DECLARE_string(geospatial_library);
 DECLARE_int32(thrift_rpc_max_message_size);
 DECLARE_string(file_metadata_reload_properties);
+DECLARE_string(java_weigher);
 
 // HS2 SAML2.0 configuration
 // Defined here because TAG_FLAG caused issues in global-flags.cc
@@ -419,6 +420,7 @@ Status PopulateThriftBackendGflags(TBackendGflags& cfg) {
   
cfg.__set_file_metadata_reload_properties(FLAGS_file_metadata_reload_properties);
   cfg.__set_thrift_rpc_max_message_size(FLAGS_thrift_rpc_max_message_size);
   cfg.__set_scan_range_cost_factor(FLAGS_scan_range_cost_factor);
+  cfg.__set_use_jamm_weigher(FLAGS_java_weigher == "jamm");
   return Status::OK();
 }
 
diff --git a/be/src/util/filesystem-util-test.cc 
b/be/src/util/filesystem-util-test.cc
index 4f56d5c64..57d7d87e7 100644
--- a/be/src/util/filesystem-util-test.cc
+++ b/be/src/util/filesystem-util-test.cc
@@ -121,6 +121,59 @@ TEST(FilesystemUtil, Paths) {
   EXPECT_EQ(string("def"), relpath);
 }
 
+TEST(FilesystemUtil, FindFileInPath) {
+  path dir = filesystem::unique_path();
+
+  // Paths to existent and non-existent dirs.
+  path subdir1 = dir / "impala1";
+  path subdir2 = dir / "impala2";
+  filesystem::create_directories(subdir1);
+
+  // Paths to existent and non-existent file.
+  path file1 = dir / "a_file1";
+  path file2 = dir / "a_file2";
+  ASSERT_OK(FileSystemUtil::CreateFile(file1.string()));
+
+  path file3 = subdir1 / "a_file3";
+  ASSERT_OK(FileSystemUtil::CreateFile(file3.string()));
+
+  string path = FileSystemUtil::FindFileInPath(dir.string(), "a.*1");
+  EXPECT_EQ(file1.string(), path);
+  path = FileSystemUtil::FindFileInPath(dir.string(), "a.*2");
+  EXPECT_EQ("", path);
+  path = FileSystemUtil::FindFileInPath(dir.string(), "a.*3");
+  EXPECT_EQ("", path);
+  path = FileSystemUtil::FindFileInPath(dir.string(), "");
+  EXPECT_EQ("", path);
+
+  path = FileSystemUtil::FindFileInPath(dir.string() + "/*", "a_file1");
+  EXPECT_EQ(file1.string(), path);
+  path = FileSystemUtil::FindFileInPath(dir.string(), "a_file2");
+  EXPECT_EQ("", path);
+
+  path = FileSystemUtil::FindFileInPath(dir.string(), "impala1");
+  EXPECT_EQ(subdir1.string(), path);
+  path = FileSystemUtil::FindFileInPath(dir.string(), "impala2");
+  EXPECT_EQ("", path);
+
+  path = FileSystemUtil::FindFileInPath(subdir1.string(), "a.*3");
+  EXPECT_EQ(file3.string(), path);
+  path = FileSystemUtil::FindFileInPath(subdir1.string(), "anything");
+  EXPECT_EQ("", path);
+  path = FileSystemUtil::FindFileInPath(subdir2.string(), ".*");
+  EXPECT_EQ("", path);
+
+  path = FileSystemUtil::FindFileInPath(file1.string(), "a.*1");
+  EXPECT_EQ(file1.string(), path);
+  path = FileSystemUtil::FindFileInPath(file1.string(), "a.*2");
+  EXPECT_EQ("", path);
+  path = FileSystemUtil::FindFileInPath(file2.string(), "a.*2");
+  EXPECT_EQ("", path);
+
+  // Cleanup
+  filesystem::remove_all(dir);
+}
+
 // This test exercises the handling of different directory entry types by 
GetEntryNames().
 TEST(FilesystemUtil, DirEntryTypes) {
   // Setup a temporary directory with one subdir
diff --git a/be/src/util/filesystem-util.cc b/be/src/util/filesystem-util.cc
index c5810caaa..a46e431ac 100644
--- a/be/src/util/filesystem-util.cc
+++ b/be/src/util/filesystem-util.cc
@@ -29,6 +29,7 @@
 #include <string>
 #include <vector>
 
+#include <boost/algorithm/string/trim.hpp>
 #include <boost/filesystem.hpp>
 
 #include "common/status.h"
@@ -263,6 +264,25 @@ bool FileSystemUtil::GetRelativePath(const string& path, 
const string& start,
   return false;
 }
 
+std::string FileSystemUtil::FindFileInPath(string path, const std::string& 
regex) {
+  // Wildcard at the end matches any file, so trim and treat as a directory.
+  boost::algorithm::trim_right_if(path, boost::algorithm::is_any_of("*"));
+  if (path.empty()) return "";
+  if (!filesystem::exists(path)) return "";
+  std::regex pattern{regex};
+  if (filesystem::is_directory(path)) {
+    for (filesystem::directory_entry& child : 
filesystem::directory_iterator(path)) {
+      // If child matches pattern, use it
+      if (std::regex_match(child.path().filename().string(), pattern)) {
+        return child.path().string();
+      }
+    }
+  } else if (std::regex_match(filesystem::path(path).filename().string(), 
pattern)) {
+    return path;
+  }
+  return "";
+}
+
 FileSystemUtil::Directory::Directory(const string& path)
     : dir_path_(path),
       status_(Status::OK()) {
diff --git a/be/src/util/filesystem-util.h b/be/src/util/filesystem-util.h
index e37a76fc1..916cb5ec2 100644
--- a/be/src/util/filesystem-util.h
+++ b/be/src/util/filesystem-util.h
@@ -88,6 +88,15 @@ class FileSystemUtil {
   static bool GetRelativePath(const std::string& path, const std::string& 
start,
       std::string* relpath);
 
+  /// Finds the first file matching the supplied 'regex' in 'path', or returns 
empty
+  /// string; matches the behavior of paths specified in Java CLASSPATH for 
JARs.
+  /// If 'path' is a file, returns 'path' if 'regex' matches its filename.
+  /// If 'path' is a directory, returns the absolute path for a match if 
'regex' matches
+  /// any files in that directory.
+  /// If 'path' ends with a wildcard - as in "/lib/*" - it will treat path as 
a directory
+  /// excluding the wildcard.
+  static std::string FindFileInPath(string path, const std::string& regex);
+
   /// Ext filesystem on certain kernel versions may result in inconsistent 
metadata after
   /// punching holes in files. The filesystem may require fsck repair on next 
reboot.
   /// See KUDU-1508 for details. This function checks if the filesystem at 
'path' resides
diff --git a/be/src/util/impalad-metrics.cc b/be/src/util/impalad-metrics.cc
index 7f735b33d..697864087 100644
--- a/be/src/util/impalad-metrics.cc
+++ b/be/src/util/impalad-metrics.cc
@@ -138,6 +138,10 @@ const char* ImpaladMetricKeys::CATALOG_CACHE_REQUEST_COUNT 
=
     "catalog.cache.request-count";
 const char* ImpaladMetricKeys::CATALOG_CACHE_TOTAL_LOAD_TIME =
     "catalog.cache.total-load-time";
+const char* ImpaladMetricKeys::CATALOG_CACHE_ENTRY_MEDIAN_SIZE =
+    "catalog.cache.entry-median-size";
+const char* ImpaladMetricKeys::CATALOG_CACHE_ENTRY_99TH_SIZE =
+    "catalog.cache.entry-99th-size";
 const char* ImpaladMetricKeys::NUM_FILES_OPEN_FOR_INSERT =
     "impala-server.num-files-open-for-insert";
 const char* ImpaladMetricKeys::IMPALA_SERVER_NUM_OPEN_HS2_SESSIONS =
@@ -237,6 +241,8 @@ DoubleGauge* ImpaladMetrics::CATALOG_CACHE_AVG_LOAD_TIME = 
nullptr;
 DoubleGauge* ImpaladMetrics::CATALOG_CACHE_HIT_RATE = nullptr;
 DoubleGauge* ImpaladMetrics::CATALOG_CACHE_LOAD_EXCEPTION_RATE = nullptr;
 DoubleGauge* ImpaladMetrics::CATALOG_CACHE_MISS_RATE = nullptr;
+DoubleGauge* ImpaladMetrics::CATALOG_CACHE_ENTRY_MEDIAN_SIZE = nullptr;
+DoubleGauge* ImpaladMetrics::CATALOG_CACHE_ENTRY_99TH_SIZE = nullptr;
 
 // Properties
 BooleanProperty* ImpaladMetrics::CATALOG_READY = nullptr;
@@ -296,6 +302,10 @@ void ImpaladMetrics::InitCatalogMetrics(MetricGroup* m) {
         
catalog_metrics->AddCounter(ImpaladMetricKeys::CATALOG_CACHE_REQUEST_COUNT, 0);
     CATALOG_CACHE_TOTAL_LOAD_TIME =
         
catalog_metrics->AddCounter(ImpaladMetricKeys::CATALOG_CACHE_TOTAL_LOAD_TIME, 
0);
+    CATALOG_CACHE_ENTRY_MEDIAN_SIZE = catalog_metrics->AddDoubleGauge(
+        ImpaladMetricKeys::CATALOG_CACHE_ENTRY_MEDIAN_SIZE, 0);
+    CATALOG_CACHE_ENTRY_99TH_SIZE = catalog_metrics->AddDoubleGauge(
+        ImpaladMetricKeys::CATALOG_CACHE_ENTRY_99TH_SIZE, 0);
   }
 }
 
diff --git a/be/src/util/impalad-metrics.h b/be/src/util/impalad-metrics.h
index 35e53c82a..d206f916c 100644
--- a/be/src/util/impalad-metrics.h
+++ b/be/src/util/impalad-metrics.h
@@ -218,6 +218,12 @@ class ImpaladMetricKeys {
   /// Total time spent in Impalad Catalog cache loading new values.
   static const char* CATALOG_CACHE_TOTAL_LOAD_TIME;
 
+  /// Median size of Impalad Catalog cache entries.
+  static const char* CATALOG_CACHE_ENTRY_MEDIAN_SIZE;
+
+  /// 99th percentile size of Impalad Catalog cache entries.
+  static const char* CATALOG_CACHE_ENTRY_99TH_SIZE;
+
   /// Number of files open for insert
   static const char* NUM_FILES_OPEN_FOR_INSERT;
 
@@ -320,6 +326,8 @@ class ImpaladMetrics {
   static DoubleGauge* CATALOG_CACHE_HIT_RATE;
   static DoubleGauge* CATALOG_CACHE_LOAD_EXCEPTION_RATE;
   static DoubleGauge* CATALOG_CACHE_MISS_RATE;
+  static DoubleGauge* CATALOG_CACHE_ENTRY_MEDIAN_SIZE;
+  static DoubleGauge* CATALOG_CACHE_ENTRY_99TH_SIZE;
   static IntGauge* IMPALA_SERVER_NUM_OPEN_BEESWAX_SESSIONS;
   static IntGauge* IMPALA_SERVER_NUM_OPEN_HS2_SESSIONS;
   static MetricGroup* IO_MGR_METRICS;
diff --git a/bin/impala-config-java.sh b/bin/impala-config-java.sh
index 0b1fa929b..276d37f85 100644
--- a/bin/impala-config-java.sh
+++ b/bin/impala-config-java.sh
@@ -21,12 +21,9 @@ IMPALA_JDK_VERSION=${IMPALA_JDK_VERSION:-system}
 if [[ "${IMPALA_JDK_VERSION}" == "system" || "${IMPALA_JDK_VERSION}" == "8" 
]]; then
   UBUNTU_JAVA_VERSION=8
   REDHAT_JAVA_VERSION=1.8.0
-elif [[ "${IMPALA_JDK_VERSION}" == "11" ]]; then
-  UBUNTU_JAVA_VERSION=11
-  REDHAT_JAVA_VERSION=11
 else
-  echo "Unknown value for IMPALA_JDK_VERSION=${IMPALA_JDK_VERSION}"
-  exit 1
+  UBUNTU_JAVA_VERSION="${IMPALA_JDK_VERSION}"
+  REDHAT_JAVA_VERSION="${IMPALA_JDK_VERSION}"
 fi
 
 if [[ "$(uname -p)" == 'aarch64' ]]; then
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index d6b7715f0..85e0b7095 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -267,10 +267,14 @@ export 
IMPALA_REDHAT7_DOCKER_BASE=${IMPALA_REDHAT7_DOCKER_BASE:-"centos:centos7.
 export 
IMPALA_REDHAT8_DOCKER_BASE=${IMPALA_REDHAT8_DOCKER_BASE:-"rockylinux:8.5"}
 export 
IMPALA_REDHAT9_DOCKER_BASE=${IMPALA_REDHAT9_DOCKER_BASE:-"rockylinux:9.2"}
 
-# When set to true, this instructs start-impala-cluster.py to start with Java 
11 images
-# created via 'make docker_java11_images' (or 'docker_debug_java11_images'). 
The Java
-# version used in these images is independent of the Java version used to 
compile Impala.
-export IMPALA_DOCKER_USE_JAVA11=${IMPALA_DOCKER_USE_JAVA11:-"false"}
+# Selects the version of Java to use when start-impala-cluster.py starts with 
container
+# images (created via e.g. 'make docker_debug_java11_images'). The Java 
version used in
+# these images is independent of the Java version used to compile Impala.
+# Accepts 8, 11, 17.
+export IMPALA_DOCKER_JAVA=${IMPALA_DOCKER_JAVA:-"8"}
+if [ "${IMPALA_DOCKER_USE_JAVA11:-}" = "true" ]; then
+  export IMPALA_DOCKER_JAVA=11
+fi
 
 # There are multiple compatible implementations of zlib. Cloudflare Zlib is an
 # implementation with optimizations to use platform-specific CPU features that 
are not
@@ -387,8 +391,7 @@ fi
 # Decision tree:
 # if IMPALA_JAVA_HOME_OVERRIDE is set, respect it
 # else if IMPALA_JDK_VERSION == system, look for system JDK
-# else if IMPALA_JDK_VERSION == 8, look for Java 8 JDK
-# else if IMPALA_JDK_VERSION == 11, look for Java 11 JDK
+# else if IMPALA_JDK_VERSION == 8+, look for Java 8+ JDK
 
 # Initialize IMPALA_JDK_VERSION and set package variables for Docker builds
 . "$IMPALA_HOME/bin/impala-config-java.sh"
diff --git a/bin/jenkins/dockerized-impala-bootstrap-and-test.sh 
b/bin/jenkins/dockerized-impala-bootstrap-and-test.sh
index 19b8c0220..07ab2612d 100755
--- a/bin/jenkins/dockerized-impala-bootstrap-and-test.sh
+++ b/bin/jenkins/dockerized-impala-bootstrap-and-test.sh
@@ -36,7 +36,7 @@ source ./bin/bootstrap_system.sh
 # to preserve additional variables.
 ./bin/jenkins/dockerized-impala-preserve-vars.py \
     EE_TEST EE_TEST_FILES JDBC_TEST EXPLORATION_STRATEGY CMAKE_BUILD_TYPE \
-    IMPALA_DOCKER_USE_JAVA11
+    IMPALA_DOCKER_JAVA
 
 # Execute the tests using su to re-login so that group change made above
 # setup_docker takes effect. This does a full re-login and does not stay
diff --git a/bin/jenkins/dockerized-impala-run-tests.sh 
b/bin/jenkins/dockerized-impala-run-tests.sh
index cb665b18f..a5e3b165a 100755
--- a/bin/jenkins/dockerized-impala-run-tests.sh
+++ b/bin/jenkins/dockerized-impala-run-tests.sh
@@ -81,9 +81,16 @@ start-impala-cluster.py --kill
 # parquet-reader and impala-profile-tool are needed for e2e tests but not 
built for
 # non-test build.
 IMAGE_TYPE=docker_debug
-if ${IMPALA_DOCKER_USE_JAVA11-false}; then
-  IMAGE_TYPE=${IMAGE_TYPE}_java11
-fi
+case ${IMPALA_DOCKER_JAVA:-8} in
+  11)
+    IMAGE_TYPE=${IMAGE_TYPE}_java11
+    ;;
+  17)
+    IMAGE_TYPE=${IMAGE_TYPE}_java17
+    ;;
+  *)
+    ;;
+esac
 make -j ${IMPALA_BUILD_THREADS} ${IMAGE_TYPE}_images parquet-reader 
impala-profile-tool
 
 source_impala_config
diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt
index 2ec1575d6..ffa059080 100644
--- a/bin/rat_exclude_files.txt
+++ b/bin/rat_exclude_files.txt
@@ -133,6 +133,7 @@ fe/src/test/resources/adusers.ldif
 fe/src/test/resources/hbase-jaas-client.conf.template
 fe/src/test/resources/hbase-jaas-server.conf.template
 fe/src/test/resources/users.ldif
+java/toolchains.xml.tmpl
 testdata/AllTypesError/*.txt
 testdata/AllTypesErrorNoNulls/*.txt
 *.avsc
diff --git a/bin/run-all-tests.sh b/bin/run-all-tests.sh
index 051a89efd..8f419c777 100755
--- a/bin/run-all-tests.sh
+++ b/bin/run-all-tests.sh
@@ -243,9 +243,14 @@ do
     # Requires a running impalad cluster because some tests (such as 
DataErrorTest and
     # JdbcTest) queries against an impala cluster.
     pushd "${IMPALA_FE_DIR}"
+
+    # Add Jamm as javaagent for CatalogdMetaProviderTest.testWeights
+    JAMM_JAR=$(compgen -G ${IMPALA_HOME}/fe/target/dependency/jamm-*.jar)
+    export JAVA_TOOL_OPTIONS="${JAVA_TOOL_OPTIONS-} -javaagent:${JAMM_JAR}"
+
     if $JAVA -version 2>&1 | grep -q -E ' version "(9|[1-9][0-9])\.'; then
       # If running with Java 9+, add-opens to JAVA_TOOL_OPTIONS for
-      # CatalogdMetaProviderTest.testWeights
+      # CatalogdMetaProviderTest.testWeights with ehcache.sizeof.
       JAVA_OPTIONS=" --add-opens=java.base/java.io=ALL-UNNAMED"
       JAVA_OPTIONS+=" --add-opens=java.base/java.lang.module=ALL-UNNAMED"
       JAVA_OPTIONS+=" --add-opens=java.base/java.lang.ref=ALL-UNNAMED"
diff --git a/bin/run-jvm-binary.sh b/bin/run-jvm-binary.sh
index 0aaf1eda4..1b7d34537 100755
--- a/bin/run-jvm-binary.sh
+++ b/bin/run-jvm-binary.sh
@@ -26,6 +26,8 @@ set -euo pipefail
 . "${IMPALA_HOME}/bin/set-classpath.sh"
 . "${IMPALA_HOME}/bin/set-ld-library-path.sh"
 
+export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:$(dirname ${LIB_JVM})"
+
 # LLVM must be on path to symbolise sanitiser stack traces.
 export 
PATH="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/llvm-${IMPALA_LLVM_VERSION}/bin:${PATH}"
 exec "$@"
diff --git a/bin/start-impala-cluster.py b/bin/start-impala-cluster.py
index 4b0883dc2..0f0c0e216 100755
--- a/bin/start-impala-cluster.py
+++ b/bin/start-impala-cluster.py
@@ -759,8 +759,8 @@ class DockerMiniClusterOperations(object):
       image_tag = daemon + "_debug"
     else:
       image_tag = daemon
-    if os.getenv('IMPALA_DOCKER_USE_JAVA11', '') == "true":
-      image_tag += "_java11"
+    java_versions = {"8": "", "11": "_java11", "17": "_java17"}
+    image_tag += java_versions[os.getenv('IMPALA_DOCKER_JAVA', '8')]
     host_name = self.__gen_host_name__(daemon, instance)
     container_name = self.__gen_container_name__(daemon, instance)
     # Mount configuration into container so that we don't need to rebuild 
container
diff --git a/common/thrift/BackendGflags.thrift 
b/common/thrift/BackendGflags.thrift
index 77e7420c0..5e4868082 100644
--- a/common/thrift/BackendGflags.thrift
+++ b/common/thrift/BackendGflags.thrift
@@ -256,4 +256,6 @@ struct TBackendGflags {
   112: required string file_metadata_reload_properties
 
   113: required double scan_range_cost_factor
+
+  114: required bool use_jamm_weigher
 }
diff --git a/common/thrift/Frontend.thrift b/common/thrift/Frontend.thrift
index 89c46dd3d..45035389f 100644
--- a/common/thrift/Frontend.thrift
+++ b/common/thrift/Frontend.thrift
@@ -113,6 +113,8 @@ struct TGetCatalogMetricsResult {
   12: optional double cache_hit_rate
   13: optional double cache_load_exception_rate
   14: optional double cache_miss_rate
+  15: optional double cache_entry_median_size
+  16: optional double cache_entry_99th_size
 }
 
 // Arguments to getDbs, which returns a list of dbs that match an optional 
pattern
diff --git a/common/thrift/metrics.json b/common/thrift/metrics.json
index 7c144b9a6..8db554f1f 100644
--- a/common/thrift/metrics.json
+++ b/common/thrift/metrics.json
@@ -2917,6 +2917,26 @@
     "kind": "COUNTER",
     "key": "catalog.cache.total-load-time"
   },
+  {
+    "description": "Median size of entries added to the Impalad Catalog 
cache.",
+    "contexts": [
+      "IMPALAD"
+    ],
+    "label": "Impalad catalog cache entry median size",
+    "units": "BYTES",
+    "kind": "GAUGE",
+    "key": "catalog.cache.entry-median-size"
+  },
+  {
+    "description": "99th percentile size of entries added to the Impalad 
Catalog.",
+    "contexts": [
+      "IMPALAD"
+    ],
+    "label": "Impalad catalog cache entry 99th percentile size",
+    "units": "BYTES",
+    "kind": "GAUGE",
+    "key": "catalog.cache.entry-99th-size"
+  },
   {
     "description": "RPC queue length for partial object fetches.",
     "contexts": [
diff --git a/docker/CMakeLists.txt b/docker/CMakeLists.txt
index fb620fa31..90fa34cc4 100644
--- a/docker/CMakeLists.txt
+++ b/docker/CMakeLists.txt
@@ -103,16 +103,20 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
   endfunction()
   add_base_image(release "" "")
   add_base_image(release_java11 "" "--java 11")
+  add_base_image(release_java17 "" "--java 17")
   # Debug images include debug tools
   add_base_image(debug "--debug-build" "--install-debug-tools")
   add_base_image(debug_java11 "--debug-build" "--install-debug-tools --java 
11")
+  add_base_image(debug_java17 "--debug-build" "--install-debug-tools --java 
17")
 
   # Target to build all docker images. Dependencies are added for each docker 
image
   # instantiated below.
   add_custom_target(docker_images)
   add_custom_target(docker_java11_images)
+  add_custom_target(docker_java17_images)
   add_custom_target(docker_debug_images)
   add_custom_target(docker_debug_java11_images)
+  add_custom_target(docker_debug_java17_images)
   add_custom_target(quickstart_docker_images)
 
   set(exported_image_names "")
@@ -145,18 +149,26 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
     set(release_target ${daemon_name}_image)
     set(release_java11_image ${daemon_name}_java11)
     set(release_java11_target ${daemon_name}_java11_image)
+    set(release_java17_image ${daemon_name}_java17)
+    set(release_java17_target ${daemon_name}_java17_image)
     set(debug_image ${daemon_name}_debug)
     set(debug_target ${daemon_name}_debug_image)
     set(debug_java11_image ${daemon_name}_debug_java11)
     set(debug_java11_target ${daemon_name}_debug_java11_image)
+    set(debug_java17_image ${daemon_name}_debug_java17)
+    set(debug_java17_target ${daemon_name}_debug_java17_image)
     add_daemon_docker_image(${release_target} ${daemon_name} ${release_image} 
release)
     add_daemon_docker_image(${release_java11_target} ${daemon_name} 
${release_java11_image} release_java11)
+    add_daemon_docker_image(${release_java17_target} ${daemon_name} 
${release_java17_image} release_java17)
     add_daemon_docker_image(${debug_target} ${daemon_name} ${debug_image} 
debug)
     add_daemon_docker_image(${debug_java11_target} ${daemon_name} 
${debug_java11_image} debug_java11)
+    add_daemon_docker_image(${debug_java17_target} ${daemon_name} 
${debug_java17_image} debug_java17)
     ADD_DEPENDENCIES(docker_images ${release_target})
     ADD_DEPENDENCIES(docker_java11_images ${release_java11_target})
+    ADD_DEPENDENCIES(docker_java17_images ${release_java17_target})
     ADD_DEPENDENCIES(docker_debug_images ${debug_target})
     ADD_DEPENDENCIES(docker_debug_java11_images ${debug_java11_target})
+    ADD_DEPENDENCIES(docker_debug_java17_images ${debug_java17_target})
   endfunction()
 
   # Stamp out image targets for all of the Impala daemons.
diff --git a/docker/daemon_entrypoint.sh b/docker/daemon_entrypoint.sh
index fda7a65b9..e8d77a2c4 100755
--- a/docker/daemon_entrypoint.sh
+++ b/docker/daemon_entrypoint.sh
@@ -52,28 +52,28 @@ fi
 
 # Need libjvm.so and libjsig.so on LD_LIBRARY_PATH
 # Ubuntu and Redhat use different locations for JAVA_HOME.
-# This detection logic handles both Java 8 and Java 11.
-# If both are present (which shouldn't happen), Java 11 is preferred.
+# This detection logic handles Java 8, 11, and 17. Prefers the newest version.
 JAVA_HOME=Unknown
 if [[ $DISTRIBUTION == Ubuntu ]]; then
   # Since the Java location includes the CPU architecture, use a glob
   # to find Java home
-  if compgen -G "/usr/lib/jvm/java-11-openjdk*" ; then
+  if compgen -G "/usr/lib/jvm/java-17-openjdk*" ; then
+    echo "Detected Java 17"
+    JAVA_HOME=$(compgen -G "/usr/lib/jvm/java-17-openjdk*")
+  elif compgen -G "/usr/lib/jvm/java-11-openjdk*" ; then
+    echo "Detected Java 11"
     JAVA_HOME=$(compgen -G "/usr/lib/jvm/java-11-openjdk*")
-    if compgen -G "/usr/lib/jvm/java-8-openjdk*" ; then
-      echo "WARNING: Java 8 is also present"
-    fi
   elif compgen -G "/usr/lib/jvm/java-8-openjdk*" ; then
     echo "Detected Java 8"
     JAVA_HOME=$(compgen -G "/usr/lib/jvm/java-8-openjdk*")
   fi
 elif [[ $DISTRIBUTION == Redhat ]]; then
-  if [[ -d /usr/lib/jvm/jre-11 ]]; then
+  if [[ -d /usr/lib/jvm/jre-17 ]]; then
+    echo "Detected Java 17"
+    JAVA_HOME=/usr/lib/jvm/jre-17
+  elif [[ -d /usr/lib/jvm/jre-11 ]]; then
     echo "Detected Java 11"
     JAVA_HOME=/usr/lib/jvm/jre-11
-    if [[ -d /usr/lib/jvm/jre-1.8.0 ]]; then
-      echo "WARNING: Java 8 is also present"
-    fi
   elif [[ -d /usr/lib/jvm/jre-1.8.0 ]]; then
     echo "Detected Java 8"
     JAVA_HOME=/usr/lib/jvm/jre-1.8.0
@@ -82,7 +82,7 @@ fi
 
 if [[ $JAVA_HOME == Unknown ]]; then
   echo "ERROR: Did not find Java in any expected location."
-  echo "Only Java 8 and Java 11 are supported."
+  echo "Only Java 8, 11, and 17 are supported."
   exit 1
 fi
 
diff --git a/fe/pom.xml b/fe/pom.xml
index 549abf499..bfa588176 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -358,9 +358,15 @@ under the License.
     </dependency>
 
     <dependency>
-        <groupId>org.ehcache</groupId>
-        <artifactId>sizeof</artifactId>
-        <version>0.4.0</version>
+      <groupId>org.ehcache</groupId>
+      <artifactId>sizeof</artifactId>
+      <version>0.4.0</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.github.jbellis</groupId>
+      <artifactId>jamm</artifactId>
+      <version>0.4.0-IMPALA</version>
     </dependency>
 
     <dependency>
diff --git a/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java 
b/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java
index a238ff427..245a6ff12 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java
@@ -51,6 +51,7 @@ import org.apache.impala.util.MetaStoreUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.codahale.metrics.Snapshot;
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.cache.CacheStats;
@@ -418,6 +419,10 @@ public abstract class FeCatalogUtils {
     metrics.setCache_hit_rate(stats.hitRate());
     metrics.setCache_load_exception_rate(stats.loadExceptionRate());
     metrics.setCache_miss_rate(stats.missRate());
+
+    Snapshot cacheEntrySize = ((CatalogdMetaProvider) 
provider).getCacheEntrySize();
+    metrics.setCache_entry_median_size(cacheEntrySize.getMedian());
+    metrics.setCache_entry_99th_size(cacheEntrySize.get99thPercentile());
   }
 
 
diff --git 
a/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java 
b/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java
index de0f7d653..f35832c26 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java
@@ -35,6 +35,9 @@ import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.stream.Collectors;
 
+import com.codahale.metrics.Histogram;
+import com.codahale.metrics.Snapshot;
+import com.codahale.metrics.UniformReservoir;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableCollection;
 import com.google.common.collect.ImmutableMap;
@@ -107,6 +110,8 @@ import org.apache.thrift.TException;
 import org.apache.thrift.TSerializer;
 import org.apache.thrift.protocol.TBinaryProtocol;
 import org.ehcache.sizeof.SizeOf;
+import org.github.jamm.CannotAccessFieldException;
+import org.github.jamm.MemoryMeter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -285,6 +290,8 @@ public class CatalogdMetaProvider implements MetaProvider {
    */
   final Cache<Object,Object> cache_;
 
+  private final Histogram cacheEntrySize_ = new Histogram(new 
UniformReservoir());
+
   /**
    * The last catalog version seen in an update from the catalogd.
    *
@@ -364,7 +371,8 @@ public class CatalogdMetaProvider implements MetaProvider {
     cache_ = CacheBuilder.newBuilder()
         .maximumWeight(cacheSizeBytes)
         .expireAfterAccess(expirationSecs, TimeUnit.SECONDS)
-        .weigher(new SizeOfWeigher())
+        .weigher(new SizeOfWeigher(
+            BackendConfig.INSTANCE.useJammWeigher(), cacheEntrySize_))
         .recordStats()
         .build();
   }
@@ -373,6 +381,10 @@ public class CatalogdMetaProvider implements MetaProvider {
     return cache_.stats();
   }
 
+  public Snapshot getCacheEntrySize() {
+    return cacheEntrySize_.getSnapshot();
+  }
+
   @Override
   public Iterable<HdfsCachePool> getHdfsCachePools() {
     return hdfsCachePools_;
@@ -2004,8 +2016,6 @@ public class CatalogdMetaProvider implements MetaProvider 
{
     // Cache the reflected sizes of classes seen.
     private static final boolean CACHE_SIZES = true;
 
-    private static SizeOf SIZEOF = SizeOf.newInstance(BYPASS_FLYWEIGHT, 
CACHE_SIZES);
-
     private static final int BYTES_PER_WORD = 8; // Assume 64-bit VM.
     // Guava cache overhead based on:
     // 
http://code-o-matic.blogspot.com/2012/02/updated-memory-cost-per-javaguava.html
@@ -2013,9 +2023,51 @@ public class CatalogdMetaProvider implements 
MetaProvider {
         12 * BYTES_PER_WORD + // base cost per entry
         4 * BYTES_PER_WORD;  // for use of 'maximumSize()'
 
+    // Retain Ehcache while Jamm is thoroughly tested. We only expect to make 
one
+    // CatalogdMetaProvider and thus one SizeOfWeigher.
+    private final SizeOf SIZEOF;
+    private final MemoryMeter METER;
+
+    private final boolean useJamm_;
+    private final Histogram entrySize_;
+
+    public SizeOfWeigher(boolean useJamm, Histogram entrySize) {
+      if (useJamm) {
+        METER = MemoryMeter.builder().build();
+        SIZEOF = null;
+      } else {
+        SIZEOF = SizeOf.newInstance(BYPASS_FLYWEIGHT, CACHE_SIZES);
+        METER = null;
+      }
+      useJamm_ = useJamm;
+      entrySize_ = entrySize;
+    }
+
+    public SizeOfWeigher() {
+      this(true, null);
+    }
+
     @Override
     public int weigh(Object key, Object value) {
-      long size = SIZEOF.deepSizeOf(key, value) + OVERHEAD_PER_ENTRY;
+      long size = OVERHEAD_PER_ENTRY;
+      try {
+        if (useJamm_) {
+          size += METER.measureDeep(key);
+          size += METER.measureDeep(value);
+        } else {
+          size += SIZEOF.deepSizeOf(key, value);
+        }
+      } catch (CannotAccessFieldException e) {
+        // This may happen if we miss an add-opens call for lambdas in Java 17.
+        LOG.warn("Unable to weigh cache entry, additional add-opens needed", 
e);
+      } catch (UnsupportedOperationException e) {
+        // Thrown by ehcache for lambdas in Java 17.
+        LOG.warn("Unable to weigh cache entry", e);
+      }
+
+      if (entrySize_ != null) {
+        entrySize_.update(size);
+      }
       if (size > Integer.MAX_VALUE) {
         return Integer.MAX_VALUE;
       }
diff --git a/fe/src/main/java/org/apache/impala/service/BackendConfig.java 
b/fe/src/main/java/org/apache/impala/service/BackendConfig.java
index 607910568..ac7f62715 100644
--- a/fe/src/main/java/org/apache/impala/service/BackendConfig.java
+++ b/fe/src/main/java/org/apache/impala/service/BackendConfig.java
@@ -411,4 +411,8 @@ public class BackendConfig {
   public float getScanRangeCostFactor() {
     return (float) backendCfg_.scan_range_cost_factor;
   }
+
+  public boolean useJammWeigher() {
+    return backendCfg_.use_jamm_weigher;
+  }
 }
diff --git 
a/fe/src/test/java/org/apache/impala/catalog/local/CatalogdMetaProviderTest.java
 
b/fe/src/test/java/org/apache/impala/catalog/local/CatalogdMetaProviderTest.java
index dbfb59639..d0cc54deb 100644
--- 
a/fe/src/test/java/org/apache/impala/catalog/local/CatalogdMetaProviderTest.java
+++ 
b/fe/src/test/java/org/apache/impala/catalog/local/CatalogdMetaProviderTest.java
@@ -274,6 +274,12 @@ public class CatalogdMetaProviderTest {
     int weigh = weigher.weigh(refs, null);
     assertTrue("Actual weigh: " + weigh, weigh > 4000);
     assertTrue("Actual weigh: " + weigh, weigh < 5000);
+
+    // Also continue to test ehcache.
+    weigher = new SizeOfWeigher(false, null);
+    weigh = weigher.weigh(refs, null);
+    assertTrue("Actual weigh: " + weigh, weigh > 4000);
+    assertTrue("Actual weigh: " + weigh, weigh < 5000);
   }
 
   @Test
diff --git a/java/.gitignore b/java/.gitignore
new file mode 100644
index 000000000..173495683
--- /dev/null
+++ b/java/.gitignore
@@ -0,0 +1 @@
+jamm-prefix/
diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt
index d26c4614e..89313e1f4 100644
--- a/java/CMakeLists.txt
+++ b/java/CMakeLists.txt
@@ -19,6 +19,18 @@ add_custom_target(validate_java_pom_versions ALL
   COMMAND $ENV{IMPALA_HOME}/bin/validate-java-pom-versions.sh
 )
 
-add_custom_target(java ALL DEPENDS gen-deps function-registry 
geospatial-udf-wrappers validate_java_pom_versions
+include(ExternalProject)
+ExternalProject_Add(jamm
+  GIT_REPOSITORY    https://github.com/jbellis/jamm
+  # 0.4.0 RC. A release is expected soon: 
https://github.com/jbellis/jamm/issues/44
+  GIT_TAG           49c3e691c43a39ddaccaf19839ef316807009377
+  PATCH_COMMAND     sed -i.bak s/0.4.0-SNAPSHOT/0.4.0-IMPALA/ pom.xml
+  CONFIGURE_COMMAND sed s:JAVA_HOME:$ENV{JAVA_HOME}:g 
${CMAKE_CURRENT_SOURCE_DIR}/toolchains.xml.tmpl > toolchains.xml
+  BUILD_IN_SOURCE   true
+  BUILD_COMMAND     mvn --toolchains toolchains.xml install 
-Dmaven.test.skip=true -DskipTests
+  INSTALL_COMMAND   ""
+)
+
+add_custom_target(java ALL DEPENDS gen-deps function-registry 
geospatial-udf-wrappers validate_java_pom_versions jamm
   COMMAND $ENV{IMPALA_HOME}/bin/mvn-quiet.sh -B install -DskipTests
 )
diff --git a/java/toolchains.xml.tmpl b/java/toolchains.xml.tmpl
new file mode 100644
index 000000000..c5e0215ef
--- /dev/null
+++ b/java/toolchains.xml.tmpl
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<toolchains>
+  <!-- Provide toolchain entries for all versions Jamm expects. We don't 
actually use
+       11 or 17 to build or run tests however, so what they point to doesn't 
matter. -->
+  <toolchain>
+    <type>jdk</type>
+    <provides>
+      <version>1.8</version>
+      <vendor>Oracle Corporation</vendor>
+    </provides>
+    <configuration>
+      <jdkHome>JAVA_HOME</jdkHome>
+    </configuration>
+  </toolchain>
+  <toolchain>
+    <type>jdk</type>
+    <provides>
+      <version>11</version>
+      <vendor>Oracle Corporation</vendor>
+    </provides>
+    <configuration>
+      <jdkHome>JAVA_HOME</jdkHome>
+    </configuration>
+  </toolchain>
+  <toolchain>
+    <type>jdk</type>
+    <provides>
+      <version>17</version>
+      <vendor>Oracle Corporation</vendor>
+    </provides>
+    <configuration>
+      <jdkHome>JAVA_HOME</jdkHome>
+    </configuration>
+  </toolchain>
+</toolchains>
diff --git a/tests/custom_cluster/test_local_catalog.py 
b/tests/custom_cluster/test_local_catalog.py
index 89f19e075..32c65f872 100644
--- a/tests/custom_cluster/test_local_catalog.py
+++ b/tests/custom_cluster/test_local_catalog.py
@@ -486,6 +486,8 @@ class TestObservability(CustomClusterTestSuite):
       cache_miss_rate_metric_key = "catalog.cache.miss-rate"
       cache_hit_count_metric_key = "catalog.cache.hit-count"
       cache_request_count_metric_key = "catalog.cache.request-count"
+      cache_entry_median_size_key = "catalog.cache.entry-median-size"
+      cache_entry_99th_size_key = "catalog.cache.entry-99th-size"
       cache_request_count_prev_run = 0
       cache_hit_count_prev_run = 0
       test_table_name = "%s.test_cache_metrics_test_tbl" % unique_database
@@ -512,6 +514,12 @@ class TestObservability(CustomClusterTestSuite):
           assert cache_request_count > cache_request_count_prev_run,\
              "%s not updated betweeen two query runs, query - %s"\
              % (cache_request_count_metric_key, query)
+
+          cache_entry_median_size = cache_metrics[cache_entry_median_size_key]
+          cache_entry_99th_size = cache_metrics[cache_entry_99th_size_key]
+          assert cache_entry_median_size > 300 and cache_entry_median_size < 
1000
+          assert cache_entry_99th_size > 12500 and cache_entry_99th_size < 
18000
+
           cache_hit_count_prev_run = cache_hit_count
           cache_request_count_prev_run = cache_request_count
     finally:
diff --git a/tests/verifiers/test_banned_log_messages.py 
b/tests/verifiers/test_banned_log_messages.py
index ac5d1e3e0..1f4d52162 100644
--- a/tests/verifiers/test_banned_log_messages.py
+++ b/tests/verifiers/test_banned_log_messages.py
@@ -22,7 +22,6 @@ import os
 import subprocess
 
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.skip import SkipIfDockerizedCluster
 
 
 class TestBannedLogMessages(ImpalaTestSuite):
@@ -31,13 +30,17 @@ class TestBannedLogMessages(ImpalaTestSuite):
   This test suite should be run after all the tests have been run.
   """
 
-  @SkipIfDockerizedCluster.daemon_logs_not_exposed
-  def test_no_inaccessible_objects(self):
-    """Test that cluster logs do not contain InaccessibleObjectException"""
-    log_dir = os.environ["IMPALA_LOGS_DIR"]
-    message = 'InaccessibleObjectException'
+  def assert_message_absent(self, message, 
log_dir=os.environ["IMPALA_LOGS_DIR"]):
     for root, _, files in os.walk(log_dir):
       for file in files:
         log_file_path = os.path.join(root, file)
         returncode = subprocess.call(['grep', message, log_file_path])
         assert returncode == 1, "%s contains '%s'" % (log_file_path, message)
+
+  def test_no_inaccessible_objects(self):
+    """Test that cluster logs do not contain InaccessibleObjectException"""
+    self.assert_message_absent('InaccessibleObjectException')
+
+  def test_no_unsupported_operations(self):
+    """Test that cluster logs do not contain jamm.CannotAccessFieldException"""
+    self.assert_message_absent('CannotAccessFieldException')

Reply via email to