This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 3b0705ba63c7eb6910829b5a7a7feb8f6d8e5607 Author: Michael Smith <[email protected]> AuthorDate: Mon May 8 13:11:55 2023 -0700 IMPALA-11941: Support Java 17 in Impala Enables building for Java 17 - and particularly using Java 17 in containers - but won't run a minicluster fully with Java 17 as some projects (Hadoop) don't yet support it. Starting with Java 15, ehcache.sizeof encounters UnsupportedOperationException: can't get field offset on a hidden class in class members pointing to capturing lambda functions. Java 17 also introduces new modules that need to be added to add-opens. Both of these pose problems for continued use of ehcache. Adds https://github.com/jbellis/jamm as a new cache weigher for Java 15+. We build from HEAD as an external project until Java 17 support is released (https://github.com/jbellis/jamm/issues/44). Adds the 'java_weigher' option to select 'sizeof' or 'jamm'; defaults to 'auto', which uses jamm for Java 15+ and sizeof for everything else. Also adds metrics for viewing cache weight results. Adds JAVA_HOME/lib/server to LD_LIBRARY_PATH in run-jvm-binary to simplify switching between JDK versions for testing. You can now - export IMPALA_JDK_VERSION=11 - source bin/impala-config.sh - start-impala-cluster.py and have Impala running a different JDK (11) version. Retains add-opens calls that are still necessary due to dependencies' use of lambdas for jamm, and all others for ehcache. Add-opens are still required as a fallback, as noted in https://github.com/jbellis/jamm#object-graph-crawling. We catch the exceptions jamm and ehcache throw - CannotAccessFieldException, UnsupportedOperationException - to avoid crashing Impala, and add it to the list of banned log messages (as we should add-opens when we find them). Testing: - container test run with Java 11 and 17 (excludes custom cluster) - manual custom_cluster/test_local_catalog.py + test_banned_log_messages.py run with Java 11 and 17 (Java 8 build) - full Java 11 build (passed except IMPALA-12184) - add test catalog cache entry size metrics fit reasonable bounds - add unit test for utility to find jamm jar file in classpath Change-Id: Ic378896f572e030a3a019646a96a32a07866a737 Reviewed-on: http://gerrit.cloudera.org:8080/19863 Reviewed-by: Michael Smith <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/common/init.cc | 152 +++++++++++++++------ be/src/service/impala-server.cc | 6 +- be/src/util/backend-gflag-util.cc | 2 + be/src/util/filesystem-util-test.cc | 53 +++++++ be/src/util/filesystem-util.cc | 20 +++ be/src/util/filesystem-util.h | 9 ++ be/src/util/impalad-metrics.cc | 10 ++ be/src/util/impalad-metrics.h | 8 ++ bin/impala-config-java.sh | 7 +- bin/impala-config.sh | 15 +- .../dockerized-impala-bootstrap-and-test.sh | 2 +- bin/jenkins/dockerized-impala-run-tests.sh | 13 +- bin/rat_exclude_files.txt | 1 + bin/run-all-tests.sh | 7 +- bin/run-jvm-binary.sh | 2 + bin/start-impala-cluster.py | 4 +- common/thrift/BackendGflags.thrift | 2 + common/thrift/Frontend.thrift | 2 + common/thrift/metrics.json | 20 +++ docker/CMakeLists.txt | 12 ++ docker/daemon_entrypoint.sh | 22 +-- fe/pom.xml | 12 +- .../org/apache/impala/catalog/FeCatalogUtils.java | 5 + .../impala/catalog/local/CatalogdMetaProvider.java | 60 +++++++- .../org/apache/impala/service/BackendConfig.java | 4 + .../catalog/local/CatalogdMetaProviderTest.java | 6 + java/.gitignore | 1 + java/CMakeLists.txt | 14 +- java/toolchains.xml.tmpl | 35 +++++ tests/custom_cluster/test_local_catalog.py | 8 ++ tests/verifiers/test_banned_log_messages.py | 15 +- 31 files changed, 441 insertions(+), 88 deletions(-) diff --git a/be/src/common/init.cc b/be/src/common/init.cc index 6671c3eb9..db76f46eb 100644 --- a/be/src/common/init.cc +++ b/be/src/common/init.cc @@ -45,6 +45,7 @@ #include "util/cpu-info.h" #include "util/debug-util.h" #include "util/disk-info.h" +#include "util/filesystem-util.h" #include "util/jni-util.h" #include "util/logging-support.h" #include "util/mem-info.h" @@ -66,6 +67,7 @@ #include "common/names.h" using namespace impala; +namespace filesystem = boost::filesystem; DECLARE_bool(enable_process_lifetime_heap_profiling); DECLARE_string(heap_profile_dir); @@ -101,6 +103,11 @@ DEFINE_bool(jvm_automatic_add_opens, true, "Adds necessary --add-opens options for core Java modules necessary to correctly " "calculate catalog metadata cache object sizes."); +DEFINE_string_hidden(java_weigher, "auto", + "Choose between 'jamm' (com.github.jbellis:jamm) and 'sizeof' (org.ehcache:sizeof) " + "weighers for determining catalog metadata cache entry size. 'auto' uses 'sizeof' " + "for Java 8 - 11, and 'jamm' for Java 15+."); + // Defined by glog. This allows users to specify the log level using a glob. For // example -vmodule=*scanner*=3 would enable full logging for scanners. If redaction // is enabled, this option won't be allowed because some logging dumps table data @@ -304,24 +311,21 @@ void BlockImpalaShutdownSignal() { AbortIfError(pthread_sigmask(SIG_BLOCK, &signals, nullptr), error_msg); } -// Append add-opens args to JAVA_TOOL_OPTIONS for ehcache. -static Status JavaAddOpens() { - if (!FLAGS_jvm_automatic_add_opens) return Status::OK(); - - // Unknown flags in JAVA_TOOL_OPTIONS causes Java to ignore everything, so only include - // --add-opens for Java 9+. Uses JAVA_HOME if set, otherwise relies on PATH. +// Returns Java major version, such as 8, 11, or 17. +static int GetJavaMajorVersion() { string cmd = "java"; const char* java_home = getenv("JAVA_HOME"); if (java_home != NULL) { - cmd = (boost::filesystem::path(java_home) / "bin" / "java").string(); + cmd = (filesystem::path(java_home) / "bin" / "java").string(); } cmd += " -version 2>&1"; string msg; if (!RunShellProcess(cmd, &msg, false, {"JAVA_TOOL_OPTIONS"})) { - return Status(msg); + LOG(INFO) << Substitute("Unable to determine Java version (default to 8): $0", msg); + return 8; } - // Find a version string in the first line. Return if major version isn't 9+. + // Find a version string in the first line. string first_line; std::getline(istringstream(msg), first_line); // Need to allow for a wide variety of formats for different JDK implementations. @@ -329,10 +333,41 @@ static Status JavaAddOpens() { std::regex java_version_pattern("\"([0-9]{1,3})\\.[0-9]+\\.[0-9]+[^\"]*\""); std::smatch matches; if (!std::regex_search(first_line, matches, java_version_pattern)) { - return Status(Substitute("Could not determine Java version: $0", msg)); + LOG(INFO) << Substitute("Unable to determine Java version (default to 8): $0", msg); + return 8; } DCHECK_EQ(matches.size(), 2); - if (std::stoi(matches.str(1)) < 9) return Status::OK(); + return std::stoi(matches.str(1)); +} + +// Append the javaagent arg to JAVA_TOOL_OPTIONS to load jamm. +static Status JavaAddJammAgent() { + stringstream val_out; + char* current_val_c = getenv("JAVA_TOOL_OPTIONS"); + if (current_val_c != NULL) { + val_out << current_val_c << " "; + } + + istringstream classpath {getenv("CLASSPATH")}; + string jamm_path, test_path; + while (getline(classpath, test_path, ':')) { + jamm_path = FileSystemUtil::FindFileInPath(test_path, "jamm-.*.jar"); + if (!jamm_path.empty()) break; + } + if (jamm_path.empty()) { + return Status("Could not find jamm-*.jar in Java CLASSPATH"); + } + val_out << "-javaagent:" << jamm_path; + + if (setenv("JAVA_TOOL_OPTIONS", val_out.str().c_str(), 1) < 0) { + return Status(Substitute("Could not update JAVA_TOOL_OPTIONS: $0", GetStrErrMsg())); + } + return Status::OK(); +} + +// Append add-opens args to JAVA_TOOL_OPTIONS for ehcache and jamm. +static Status JavaAddOpens(bool useSizeOf) { + if (!FLAGS_jvm_automatic_add_opens) return Status::OK(); stringstream val_out; char* current_val_c = getenv("JAVA_TOOL_OPTIONS"); @@ -341,45 +376,78 @@ static Status JavaAddOpens() { } for (const string& param : { - "--add-opens=java.base/java.io=ALL-UNNAMED", - "--add-opens=java.base/java.lang.module=ALL-UNNAMED", - "--add-opens=java.base/java.lang.ref=ALL-UNNAMED", - "--add-opens=java.base/java.lang.reflect=ALL-UNNAMED", + // Needed for jamm and ehcache "--add-opens=java.base/java.lang=ALL-UNNAMED", - "--add-opens=java.base/java.net=ALL-UNNAMED", - "--add-opens=java.base/java.nio.charset=ALL-UNNAMED", - "--add-opens=java.base/java.nio.file.attribute=ALL-UNNAMED", "--add-opens=java.base/java.nio=ALL-UNNAMED", - "--add-opens=java.base/java.security=ALL-UNNAMED", - "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED", - "--add-opens=java.base/java.util.jar=ALL-UNNAMED", - "--add-opens=java.base/java.util.zip=ALL-UNNAMED", - "--add-opens=java.base/java.util=ALL-UNNAMED", - "--add-opens=java.base/jdk.internal.loader=ALL-UNNAMED", - "--add-opens=java.base/jdk.internal.math=ALL-UNNAMED", - "--add-opens=java.base/jdk.internal.module=ALL-UNNAMED", - "--add-opens=java.base/jdk.internal.perf=ALL-UNNAMED", - "--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED", - "--add-opens=java.base/jdk.internal.reflect=ALL-UNNAMED", - "--add-opens=java.base/jdk.internal.util.jar=ALL-UNNAMED", - "--add-opens=java.base/sun.nio.fs=ALL-UNNAMED", - "--add-opens=jdk.dynalink/jdk.dynalink.beans=ALL-UNNAMED", - "--add-opens=jdk.dynalink/jdk.dynalink.linker.support=ALL-UNNAMED", - "--add-opens=jdk.dynalink/jdk.dynalink.linker=ALL-UNNAMED", - "--add-opens=jdk.dynalink/jdk.dynalink.support=ALL-UNNAMED", - "--add-opens=jdk.dynalink/jdk.dynalink=ALL-UNNAMED", - "--add-opens=jdk.management.jfr/jdk.management.jfr=ALL-UNNAMED", - "--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED" + "--add-opens=java.base/sun.nio.ch=ALL-UNNAMED" }) { val_out << " " << param; } + if (useSizeOf) { + for (const string& param : { + // Only needed for ehcache + "--add-opens=java.base/java.io=ALL-UNNAMED", + "--add-opens=java.base/java.lang.invoke=ALL-UNNAMED", + "--add-opens=java.base/java.lang.module=ALL-UNNAMED", + "--add-opens=java.base/java.lang.ref=ALL-UNNAMED", + "--add-opens=java.base/java.lang.reflect=ALL-UNNAMED", + "--add-opens=java.base/java.net=ALL-UNNAMED", + "--add-opens=java.base/java.nio.charset=ALL-UNNAMED", + "--add-opens=java.base/java.nio.file.attribute=ALL-UNNAMED", + "--add-opens=java.base/java.security=ALL-UNNAMED", + "--add-opens=java.base/java.util.concurrent.locks=ALL-UNNAMED", + "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED", + "--add-opens=java.base/java.util.jar=ALL-UNNAMED", + "--add-opens=java.base/java.util.zip=ALL-UNNAMED", + "--add-opens=java.base/java.util=ALL-UNNAMED", + "--add-opens=java.base/jdk.internal.loader=ALL-UNNAMED", + "--add-opens=java.base/jdk.internal.math=ALL-UNNAMED", + "--add-opens=java.base/jdk.internal.module=ALL-UNNAMED", + "--add-opens=java.base/jdk.internal.perf=ALL-UNNAMED", + "--add-opens=java.base/jdk.internal.platform=ALL-UNNAMED", + "--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED", + "--add-opens=java.base/jdk.internal.reflect=ALL-UNNAMED", + "--add-opens=java.base/jdk.internal.util.jar=ALL-UNNAMED", + "--add-opens=java.base/sun.net.www.protocol.jar=ALL-UNNAMED", + "--add-opens=java.base/sun.nio.fs=ALL-UNNAMED", + "--add-opens=jdk.dynalink/jdk.dynalink.beans=ALL-UNNAMED", + "--add-opens=jdk.dynalink/jdk.dynalink.linker.support=ALL-UNNAMED", + "--add-opens=jdk.dynalink/jdk.dynalink.linker=ALL-UNNAMED", + "--add-opens=jdk.dynalink/jdk.dynalink.support=ALL-UNNAMED", + "--add-opens=jdk.dynalink/jdk.dynalink=ALL-UNNAMED", + "--add-opens=jdk.management.jfr/jdk.management.jfr=ALL-UNNAMED", + "--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED" + }) { + val_out << " " << param; + } + } + if (setenv("JAVA_TOOL_OPTIONS", val_out.str().c_str(), 1) < 0) { return Status(Substitute("Could not update JAVA_TOOL_OPTIONS: $0", GetStrErrMsg())); } return Status::OK(); } +static Status InitializeJavaWeigher() { + // This is set up so the default if things go wrong is to continue using ehcache.sizeof. + int version = GetJavaMajorVersion(); + if (FLAGS_java_weigher == "auto") { + // Update for backend-gflag-util.cc setting use_jamm_weigher. + FLAGS_java_weigher = (version >= 15) ? "jamm" : "sizeof"; + } + LOG(INFO) << "Using Java weigher " << FLAGS_java_weigher; + + if (FLAGS_java_weigher == "jamm") { + RETURN_IF_ERROR(JavaAddJammAgent()); + } + if (version >= 9) { + // add-opens is only supported in Java 9+. + RETURN_IF_ERROR(JavaAddOpens(FLAGS_java_weigher != "jamm")); + } + return Status::OK(); +} + static Status JavaSetProcessName(string name) { string current_val; char* current_val_c = getenv("JAVA_TOOL_OPTIONS"); @@ -559,13 +627,9 @@ void impala::InitCommonRuntime(int argc, char** argv, bool init_jvm, if (!fs_cache_init_status.ok()) CLEAN_EXIT_WITH_ERROR(fs_cache_init_status.GetDetail()); if (init_jvm) { - // Add JAVA_TOOL_OPTIONS for ehcache - ABORT_IF_ERROR(JavaAddOpens()); - - ABORT_IF_ERROR(JavaSetProcessName( - boost::filesystem::path(argv[0]).filename().string())); - if (!external_fe) { + ABORT_IF_ERROR(InitializeJavaWeigher()); + ABORT_IF_ERROR(JavaSetProcessName(filesystem::path(argv[0]).filename().string())); JniUtil::InitLibhdfs(); } ABORT_IF_ERROR(JniUtil::Init()); diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc index 432a9659a..e6105a80c 100644 --- a/be/src/service/impala-server.cc +++ b/be/src/service/impala-server.cc @@ -1561,6 +1561,8 @@ Status ImpalaServer::UpdateCatalogMetrics() { DCHECK(metrics.__isset.cache_hit_rate); DCHECK(metrics.__isset.cache_load_exception_rate); DCHECK(metrics.__isset.cache_miss_rate); + DCHECK(metrics.__isset.cache_entry_median_size); + DCHECK(metrics.__isset.cache_entry_99th_size); ImpaladMetrics::CATALOG_CACHE_EVICTION_COUNT->SetValue(metrics.cache_eviction_count); ImpaladMetrics::CATALOG_CACHE_HIT_COUNT->SetValue(metrics.cache_hit_count); ImpaladMetrics::CATALOG_CACHE_LOAD_COUNT->SetValue(metrics.cache_load_count); @@ -1576,8 +1578,10 @@ Status ImpalaServer::UpdateCatalogMetrics() { ImpaladMetrics::CATALOG_CACHE_LOAD_EXCEPTION_RATE->SetValue( metrics.cache_load_exception_rate); ImpaladMetrics::CATALOG_CACHE_MISS_RATE->SetValue(metrics.cache_miss_rate); + ImpaladMetrics::CATALOG_CACHE_ENTRY_MEDIAN_SIZE->SetValue( + metrics.cache_entry_median_size); + ImpaladMetrics::CATALOG_CACHE_ENTRY_99TH_SIZE->SetValue(metrics.cache_entry_99th_size); return Status::OK(); - } shared_ptr<QueryDriver> ImpalaServer::GetQueryDriver( diff --git a/be/src/util/backend-gflag-util.cc b/be/src/util/backend-gflag-util.cc index eda30258c..965547515 100644 --- a/be/src/util/backend-gflag-util.cc +++ b/be/src/util/backend-gflag-util.cc @@ -106,6 +106,7 @@ DECLARE_bool(enable_reload_events); DECLARE_string(geospatial_library); DECLARE_int32(thrift_rpc_max_message_size); DECLARE_string(file_metadata_reload_properties); +DECLARE_string(java_weigher); // HS2 SAML2.0 configuration // Defined here because TAG_FLAG caused issues in global-flags.cc @@ -419,6 +420,7 @@ Status PopulateThriftBackendGflags(TBackendGflags& cfg) { cfg.__set_file_metadata_reload_properties(FLAGS_file_metadata_reload_properties); cfg.__set_thrift_rpc_max_message_size(FLAGS_thrift_rpc_max_message_size); cfg.__set_scan_range_cost_factor(FLAGS_scan_range_cost_factor); + cfg.__set_use_jamm_weigher(FLAGS_java_weigher == "jamm"); return Status::OK(); } diff --git a/be/src/util/filesystem-util-test.cc b/be/src/util/filesystem-util-test.cc index 4f56d5c64..57d7d87e7 100644 --- a/be/src/util/filesystem-util-test.cc +++ b/be/src/util/filesystem-util-test.cc @@ -121,6 +121,59 @@ TEST(FilesystemUtil, Paths) { EXPECT_EQ(string("def"), relpath); } +TEST(FilesystemUtil, FindFileInPath) { + path dir = filesystem::unique_path(); + + // Paths to existent and non-existent dirs. + path subdir1 = dir / "impala1"; + path subdir2 = dir / "impala2"; + filesystem::create_directories(subdir1); + + // Paths to existent and non-existent file. + path file1 = dir / "a_file1"; + path file2 = dir / "a_file2"; + ASSERT_OK(FileSystemUtil::CreateFile(file1.string())); + + path file3 = subdir1 / "a_file3"; + ASSERT_OK(FileSystemUtil::CreateFile(file3.string())); + + string path = FileSystemUtil::FindFileInPath(dir.string(), "a.*1"); + EXPECT_EQ(file1.string(), path); + path = FileSystemUtil::FindFileInPath(dir.string(), "a.*2"); + EXPECT_EQ("", path); + path = FileSystemUtil::FindFileInPath(dir.string(), "a.*3"); + EXPECT_EQ("", path); + path = FileSystemUtil::FindFileInPath(dir.string(), ""); + EXPECT_EQ("", path); + + path = FileSystemUtil::FindFileInPath(dir.string() + "/*", "a_file1"); + EXPECT_EQ(file1.string(), path); + path = FileSystemUtil::FindFileInPath(dir.string(), "a_file2"); + EXPECT_EQ("", path); + + path = FileSystemUtil::FindFileInPath(dir.string(), "impala1"); + EXPECT_EQ(subdir1.string(), path); + path = FileSystemUtil::FindFileInPath(dir.string(), "impala2"); + EXPECT_EQ("", path); + + path = FileSystemUtil::FindFileInPath(subdir1.string(), "a.*3"); + EXPECT_EQ(file3.string(), path); + path = FileSystemUtil::FindFileInPath(subdir1.string(), "anything"); + EXPECT_EQ("", path); + path = FileSystemUtil::FindFileInPath(subdir2.string(), ".*"); + EXPECT_EQ("", path); + + path = FileSystemUtil::FindFileInPath(file1.string(), "a.*1"); + EXPECT_EQ(file1.string(), path); + path = FileSystemUtil::FindFileInPath(file1.string(), "a.*2"); + EXPECT_EQ("", path); + path = FileSystemUtil::FindFileInPath(file2.string(), "a.*2"); + EXPECT_EQ("", path); + + // Cleanup + filesystem::remove_all(dir); +} + // This test exercises the handling of different directory entry types by GetEntryNames(). TEST(FilesystemUtil, DirEntryTypes) { // Setup a temporary directory with one subdir diff --git a/be/src/util/filesystem-util.cc b/be/src/util/filesystem-util.cc index c5810caaa..a46e431ac 100644 --- a/be/src/util/filesystem-util.cc +++ b/be/src/util/filesystem-util.cc @@ -29,6 +29,7 @@ #include <string> #include <vector> +#include <boost/algorithm/string/trim.hpp> #include <boost/filesystem.hpp> #include "common/status.h" @@ -263,6 +264,25 @@ bool FileSystemUtil::GetRelativePath(const string& path, const string& start, return false; } +std::string FileSystemUtil::FindFileInPath(string path, const std::string& regex) { + // Wildcard at the end matches any file, so trim and treat as a directory. + boost::algorithm::trim_right_if(path, boost::algorithm::is_any_of("*")); + if (path.empty()) return ""; + if (!filesystem::exists(path)) return ""; + std::regex pattern{regex}; + if (filesystem::is_directory(path)) { + for (filesystem::directory_entry& child : filesystem::directory_iterator(path)) { + // If child matches pattern, use it + if (std::regex_match(child.path().filename().string(), pattern)) { + return child.path().string(); + } + } + } else if (std::regex_match(filesystem::path(path).filename().string(), pattern)) { + return path; + } + return ""; +} + FileSystemUtil::Directory::Directory(const string& path) : dir_path_(path), status_(Status::OK()) { diff --git a/be/src/util/filesystem-util.h b/be/src/util/filesystem-util.h index e37a76fc1..916cb5ec2 100644 --- a/be/src/util/filesystem-util.h +++ b/be/src/util/filesystem-util.h @@ -88,6 +88,15 @@ class FileSystemUtil { static bool GetRelativePath(const std::string& path, const std::string& start, std::string* relpath); + /// Finds the first file matching the supplied 'regex' in 'path', or returns empty + /// string; matches the behavior of paths specified in Java CLASSPATH for JARs. + /// If 'path' is a file, returns 'path' if 'regex' matches its filename. + /// If 'path' is a directory, returns the absolute path for a match if 'regex' matches + /// any files in that directory. + /// If 'path' ends with a wildcard - as in "/lib/*" - it will treat path as a directory + /// excluding the wildcard. + static std::string FindFileInPath(string path, const std::string& regex); + /// Ext filesystem on certain kernel versions may result in inconsistent metadata after /// punching holes in files. The filesystem may require fsck repair on next reboot. /// See KUDU-1508 for details. This function checks if the filesystem at 'path' resides diff --git a/be/src/util/impalad-metrics.cc b/be/src/util/impalad-metrics.cc index 7f735b33d..697864087 100644 --- a/be/src/util/impalad-metrics.cc +++ b/be/src/util/impalad-metrics.cc @@ -138,6 +138,10 @@ const char* ImpaladMetricKeys::CATALOG_CACHE_REQUEST_COUNT = "catalog.cache.request-count"; const char* ImpaladMetricKeys::CATALOG_CACHE_TOTAL_LOAD_TIME = "catalog.cache.total-load-time"; +const char* ImpaladMetricKeys::CATALOG_CACHE_ENTRY_MEDIAN_SIZE = + "catalog.cache.entry-median-size"; +const char* ImpaladMetricKeys::CATALOG_CACHE_ENTRY_99TH_SIZE = + "catalog.cache.entry-99th-size"; const char* ImpaladMetricKeys::NUM_FILES_OPEN_FOR_INSERT = "impala-server.num-files-open-for-insert"; const char* ImpaladMetricKeys::IMPALA_SERVER_NUM_OPEN_HS2_SESSIONS = @@ -237,6 +241,8 @@ DoubleGauge* ImpaladMetrics::CATALOG_CACHE_AVG_LOAD_TIME = nullptr; DoubleGauge* ImpaladMetrics::CATALOG_CACHE_HIT_RATE = nullptr; DoubleGauge* ImpaladMetrics::CATALOG_CACHE_LOAD_EXCEPTION_RATE = nullptr; DoubleGauge* ImpaladMetrics::CATALOG_CACHE_MISS_RATE = nullptr; +DoubleGauge* ImpaladMetrics::CATALOG_CACHE_ENTRY_MEDIAN_SIZE = nullptr; +DoubleGauge* ImpaladMetrics::CATALOG_CACHE_ENTRY_99TH_SIZE = nullptr; // Properties BooleanProperty* ImpaladMetrics::CATALOG_READY = nullptr; @@ -296,6 +302,10 @@ void ImpaladMetrics::InitCatalogMetrics(MetricGroup* m) { catalog_metrics->AddCounter(ImpaladMetricKeys::CATALOG_CACHE_REQUEST_COUNT, 0); CATALOG_CACHE_TOTAL_LOAD_TIME = catalog_metrics->AddCounter(ImpaladMetricKeys::CATALOG_CACHE_TOTAL_LOAD_TIME, 0); + CATALOG_CACHE_ENTRY_MEDIAN_SIZE = catalog_metrics->AddDoubleGauge( + ImpaladMetricKeys::CATALOG_CACHE_ENTRY_MEDIAN_SIZE, 0); + CATALOG_CACHE_ENTRY_99TH_SIZE = catalog_metrics->AddDoubleGauge( + ImpaladMetricKeys::CATALOG_CACHE_ENTRY_99TH_SIZE, 0); } } diff --git a/be/src/util/impalad-metrics.h b/be/src/util/impalad-metrics.h index 35e53c82a..d206f916c 100644 --- a/be/src/util/impalad-metrics.h +++ b/be/src/util/impalad-metrics.h @@ -218,6 +218,12 @@ class ImpaladMetricKeys { /// Total time spent in Impalad Catalog cache loading new values. static const char* CATALOG_CACHE_TOTAL_LOAD_TIME; + /// Median size of Impalad Catalog cache entries. + static const char* CATALOG_CACHE_ENTRY_MEDIAN_SIZE; + + /// 99th percentile size of Impalad Catalog cache entries. + static const char* CATALOG_CACHE_ENTRY_99TH_SIZE; + /// Number of files open for insert static const char* NUM_FILES_OPEN_FOR_INSERT; @@ -320,6 +326,8 @@ class ImpaladMetrics { static DoubleGauge* CATALOG_CACHE_HIT_RATE; static DoubleGauge* CATALOG_CACHE_LOAD_EXCEPTION_RATE; static DoubleGauge* CATALOG_CACHE_MISS_RATE; + static DoubleGauge* CATALOG_CACHE_ENTRY_MEDIAN_SIZE; + static DoubleGauge* CATALOG_CACHE_ENTRY_99TH_SIZE; static IntGauge* IMPALA_SERVER_NUM_OPEN_BEESWAX_SESSIONS; static IntGauge* IMPALA_SERVER_NUM_OPEN_HS2_SESSIONS; static MetricGroup* IO_MGR_METRICS; diff --git a/bin/impala-config-java.sh b/bin/impala-config-java.sh index 0b1fa929b..276d37f85 100644 --- a/bin/impala-config-java.sh +++ b/bin/impala-config-java.sh @@ -21,12 +21,9 @@ IMPALA_JDK_VERSION=${IMPALA_JDK_VERSION:-system} if [[ "${IMPALA_JDK_VERSION}" == "system" || "${IMPALA_JDK_VERSION}" == "8" ]]; then UBUNTU_JAVA_VERSION=8 REDHAT_JAVA_VERSION=1.8.0 -elif [[ "${IMPALA_JDK_VERSION}" == "11" ]]; then - UBUNTU_JAVA_VERSION=11 - REDHAT_JAVA_VERSION=11 else - echo "Unknown value for IMPALA_JDK_VERSION=${IMPALA_JDK_VERSION}" - exit 1 + UBUNTU_JAVA_VERSION="${IMPALA_JDK_VERSION}" + REDHAT_JAVA_VERSION="${IMPALA_JDK_VERSION}" fi if [[ "$(uname -p)" == 'aarch64' ]]; then diff --git a/bin/impala-config.sh b/bin/impala-config.sh index d6b7715f0..85e0b7095 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -267,10 +267,14 @@ export IMPALA_REDHAT7_DOCKER_BASE=${IMPALA_REDHAT7_DOCKER_BASE:-"centos:centos7. export IMPALA_REDHAT8_DOCKER_BASE=${IMPALA_REDHAT8_DOCKER_BASE:-"rockylinux:8.5"} export IMPALA_REDHAT9_DOCKER_BASE=${IMPALA_REDHAT9_DOCKER_BASE:-"rockylinux:9.2"} -# When set to true, this instructs start-impala-cluster.py to start with Java 11 images -# created via 'make docker_java11_images' (or 'docker_debug_java11_images'). The Java -# version used in these images is independent of the Java version used to compile Impala. -export IMPALA_DOCKER_USE_JAVA11=${IMPALA_DOCKER_USE_JAVA11:-"false"} +# Selects the version of Java to use when start-impala-cluster.py starts with container +# images (created via e.g. 'make docker_debug_java11_images'). The Java version used in +# these images is independent of the Java version used to compile Impala. +# Accepts 8, 11, 17. +export IMPALA_DOCKER_JAVA=${IMPALA_DOCKER_JAVA:-"8"} +if [ "${IMPALA_DOCKER_USE_JAVA11:-}" = "true" ]; then + export IMPALA_DOCKER_JAVA=11 +fi # There are multiple compatible implementations of zlib. Cloudflare Zlib is an # implementation with optimizations to use platform-specific CPU features that are not @@ -387,8 +391,7 @@ fi # Decision tree: # if IMPALA_JAVA_HOME_OVERRIDE is set, respect it # else if IMPALA_JDK_VERSION == system, look for system JDK -# else if IMPALA_JDK_VERSION == 8, look for Java 8 JDK -# else if IMPALA_JDK_VERSION == 11, look for Java 11 JDK +# else if IMPALA_JDK_VERSION == 8+, look for Java 8+ JDK # Initialize IMPALA_JDK_VERSION and set package variables for Docker builds . "$IMPALA_HOME/bin/impala-config-java.sh" diff --git a/bin/jenkins/dockerized-impala-bootstrap-and-test.sh b/bin/jenkins/dockerized-impala-bootstrap-and-test.sh index 19b8c0220..07ab2612d 100755 --- a/bin/jenkins/dockerized-impala-bootstrap-and-test.sh +++ b/bin/jenkins/dockerized-impala-bootstrap-and-test.sh @@ -36,7 +36,7 @@ source ./bin/bootstrap_system.sh # to preserve additional variables. ./bin/jenkins/dockerized-impala-preserve-vars.py \ EE_TEST EE_TEST_FILES JDBC_TEST EXPLORATION_STRATEGY CMAKE_BUILD_TYPE \ - IMPALA_DOCKER_USE_JAVA11 + IMPALA_DOCKER_JAVA # Execute the tests using su to re-login so that group change made above # setup_docker takes effect. This does a full re-login and does not stay diff --git a/bin/jenkins/dockerized-impala-run-tests.sh b/bin/jenkins/dockerized-impala-run-tests.sh index cb665b18f..a5e3b165a 100755 --- a/bin/jenkins/dockerized-impala-run-tests.sh +++ b/bin/jenkins/dockerized-impala-run-tests.sh @@ -81,9 +81,16 @@ start-impala-cluster.py --kill # parquet-reader and impala-profile-tool are needed for e2e tests but not built for # non-test build. IMAGE_TYPE=docker_debug -if ${IMPALA_DOCKER_USE_JAVA11-false}; then - IMAGE_TYPE=${IMAGE_TYPE}_java11 -fi +case ${IMPALA_DOCKER_JAVA:-8} in + 11) + IMAGE_TYPE=${IMAGE_TYPE}_java11 + ;; + 17) + IMAGE_TYPE=${IMAGE_TYPE}_java17 + ;; + *) + ;; +esac make -j ${IMPALA_BUILD_THREADS} ${IMAGE_TYPE}_images parquet-reader impala-profile-tool source_impala_config diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt index 2ec1575d6..ffa059080 100644 --- a/bin/rat_exclude_files.txt +++ b/bin/rat_exclude_files.txt @@ -133,6 +133,7 @@ fe/src/test/resources/adusers.ldif fe/src/test/resources/hbase-jaas-client.conf.template fe/src/test/resources/hbase-jaas-server.conf.template fe/src/test/resources/users.ldif +java/toolchains.xml.tmpl testdata/AllTypesError/*.txt testdata/AllTypesErrorNoNulls/*.txt *.avsc diff --git a/bin/run-all-tests.sh b/bin/run-all-tests.sh index 051a89efd..8f419c777 100755 --- a/bin/run-all-tests.sh +++ b/bin/run-all-tests.sh @@ -243,9 +243,14 @@ do # Requires a running impalad cluster because some tests (such as DataErrorTest and # JdbcTest) queries against an impala cluster. pushd "${IMPALA_FE_DIR}" + + # Add Jamm as javaagent for CatalogdMetaProviderTest.testWeights + JAMM_JAR=$(compgen -G ${IMPALA_HOME}/fe/target/dependency/jamm-*.jar) + export JAVA_TOOL_OPTIONS="${JAVA_TOOL_OPTIONS-} -javaagent:${JAMM_JAR}" + if $JAVA -version 2>&1 | grep -q -E ' version "(9|[1-9][0-9])\.'; then # If running with Java 9+, add-opens to JAVA_TOOL_OPTIONS for - # CatalogdMetaProviderTest.testWeights + # CatalogdMetaProviderTest.testWeights with ehcache.sizeof. JAVA_OPTIONS=" --add-opens=java.base/java.io=ALL-UNNAMED" JAVA_OPTIONS+=" --add-opens=java.base/java.lang.module=ALL-UNNAMED" JAVA_OPTIONS+=" --add-opens=java.base/java.lang.ref=ALL-UNNAMED" diff --git a/bin/run-jvm-binary.sh b/bin/run-jvm-binary.sh index 0aaf1eda4..1b7d34537 100755 --- a/bin/run-jvm-binary.sh +++ b/bin/run-jvm-binary.sh @@ -26,6 +26,8 @@ set -euo pipefail . "${IMPALA_HOME}/bin/set-classpath.sh" . "${IMPALA_HOME}/bin/set-ld-library-path.sh" +export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:$(dirname ${LIB_JVM})" + # LLVM must be on path to symbolise sanitiser stack traces. export PATH="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/llvm-${IMPALA_LLVM_VERSION}/bin:${PATH}" exec "$@" diff --git a/bin/start-impala-cluster.py b/bin/start-impala-cluster.py index 4b0883dc2..0f0c0e216 100755 --- a/bin/start-impala-cluster.py +++ b/bin/start-impala-cluster.py @@ -759,8 +759,8 @@ class DockerMiniClusterOperations(object): image_tag = daemon + "_debug" else: image_tag = daemon - if os.getenv('IMPALA_DOCKER_USE_JAVA11', '') == "true": - image_tag += "_java11" + java_versions = {"8": "", "11": "_java11", "17": "_java17"} + image_tag += java_versions[os.getenv('IMPALA_DOCKER_JAVA', '8')] host_name = self.__gen_host_name__(daemon, instance) container_name = self.__gen_container_name__(daemon, instance) # Mount configuration into container so that we don't need to rebuild container diff --git a/common/thrift/BackendGflags.thrift b/common/thrift/BackendGflags.thrift index 77e7420c0..5e4868082 100644 --- a/common/thrift/BackendGflags.thrift +++ b/common/thrift/BackendGflags.thrift @@ -256,4 +256,6 @@ struct TBackendGflags { 112: required string file_metadata_reload_properties 113: required double scan_range_cost_factor + + 114: required bool use_jamm_weigher } diff --git a/common/thrift/Frontend.thrift b/common/thrift/Frontend.thrift index 89c46dd3d..45035389f 100644 --- a/common/thrift/Frontend.thrift +++ b/common/thrift/Frontend.thrift @@ -113,6 +113,8 @@ struct TGetCatalogMetricsResult { 12: optional double cache_hit_rate 13: optional double cache_load_exception_rate 14: optional double cache_miss_rate + 15: optional double cache_entry_median_size + 16: optional double cache_entry_99th_size } // Arguments to getDbs, which returns a list of dbs that match an optional pattern diff --git a/common/thrift/metrics.json b/common/thrift/metrics.json index 7c144b9a6..8db554f1f 100644 --- a/common/thrift/metrics.json +++ b/common/thrift/metrics.json @@ -2917,6 +2917,26 @@ "kind": "COUNTER", "key": "catalog.cache.total-load-time" }, + { + "description": "Median size of entries added to the Impalad Catalog cache.", + "contexts": [ + "IMPALAD" + ], + "label": "Impalad catalog cache entry median size", + "units": "BYTES", + "kind": "GAUGE", + "key": "catalog.cache.entry-median-size" + }, + { + "description": "99th percentile size of entries added to the Impalad Catalog.", + "contexts": [ + "IMPALAD" + ], + "label": "Impalad catalog cache entry 99th percentile size", + "units": "BYTES", + "kind": "GAUGE", + "key": "catalog.cache.entry-99th-size" + }, { "description": "RPC queue length for partial object fetches.", "contexts": [ diff --git a/docker/CMakeLists.txt b/docker/CMakeLists.txt index fb620fa31..90fa34cc4 100644 --- a/docker/CMakeLists.txt +++ b/docker/CMakeLists.txt @@ -103,16 +103,20 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED") endfunction() add_base_image(release "" "") add_base_image(release_java11 "" "--java 11") + add_base_image(release_java17 "" "--java 17") # Debug images include debug tools add_base_image(debug "--debug-build" "--install-debug-tools") add_base_image(debug_java11 "--debug-build" "--install-debug-tools --java 11") + add_base_image(debug_java17 "--debug-build" "--install-debug-tools --java 17") # Target to build all docker images. Dependencies are added for each docker image # instantiated below. add_custom_target(docker_images) add_custom_target(docker_java11_images) + add_custom_target(docker_java17_images) add_custom_target(docker_debug_images) add_custom_target(docker_debug_java11_images) + add_custom_target(docker_debug_java17_images) add_custom_target(quickstart_docker_images) set(exported_image_names "") @@ -145,18 +149,26 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED") set(release_target ${daemon_name}_image) set(release_java11_image ${daemon_name}_java11) set(release_java11_target ${daemon_name}_java11_image) + set(release_java17_image ${daemon_name}_java17) + set(release_java17_target ${daemon_name}_java17_image) set(debug_image ${daemon_name}_debug) set(debug_target ${daemon_name}_debug_image) set(debug_java11_image ${daemon_name}_debug_java11) set(debug_java11_target ${daemon_name}_debug_java11_image) + set(debug_java17_image ${daemon_name}_debug_java17) + set(debug_java17_target ${daemon_name}_debug_java17_image) add_daemon_docker_image(${release_target} ${daemon_name} ${release_image} release) add_daemon_docker_image(${release_java11_target} ${daemon_name} ${release_java11_image} release_java11) + add_daemon_docker_image(${release_java17_target} ${daemon_name} ${release_java17_image} release_java17) add_daemon_docker_image(${debug_target} ${daemon_name} ${debug_image} debug) add_daemon_docker_image(${debug_java11_target} ${daemon_name} ${debug_java11_image} debug_java11) + add_daemon_docker_image(${debug_java17_target} ${daemon_name} ${debug_java17_image} debug_java17) ADD_DEPENDENCIES(docker_images ${release_target}) ADD_DEPENDENCIES(docker_java11_images ${release_java11_target}) + ADD_DEPENDENCIES(docker_java17_images ${release_java17_target}) ADD_DEPENDENCIES(docker_debug_images ${debug_target}) ADD_DEPENDENCIES(docker_debug_java11_images ${debug_java11_target}) + ADD_DEPENDENCIES(docker_debug_java17_images ${debug_java17_target}) endfunction() # Stamp out image targets for all of the Impala daemons. diff --git a/docker/daemon_entrypoint.sh b/docker/daemon_entrypoint.sh index fda7a65b9..e8d77a2c4 100755 --- a/docker/daemon_entrypoint.sh +++ b/docker/daemon_entrypoint.sh @@ -52,28 +52,28 @@ fi # Need libjvm.so and libjsig.so on LD_LIBRARY_PATH # Ubuntu and Redhat use different locations for JAVA_HOME. -# This detection logic handles both Java 8 and Java 11. -# If both are present (which shouldn't happen), Java 11 is preferred. +# This detection logic handles Java 8, 11, and 17. Prefers the newest version. JAVA_HOME=Unknown if [[ $DISTRIBUTION == Ubuntu ]]; then # Since the Java location includes the CPU architecture, use a glob # to find Java home - if compgen -G "/usr/lib/jvm/java-11-openjdk*" ; then + if compgen -G "/usr/lib/jvm/java-17-openjdk*" ; then + echo "Detected Java 17" + JAVA_HOME=$(compgen -G "/usr/lib/jvm/java-17-openjdk*") + elif compgen -G "/usr/lib/jvm/java-11-openjdk*" ; then + echo "Detected Java 11" JAVA_HOME=$(compgen -G "/usr/lib/jvm/java-11-openjdk*") - if compgen -G "/usr/lib/jvm/java-8-openjdk*" ; then - echo "WARNING: Java 8 is also present" - fi elif compgen -G "/usr/lib/jvm/java-8-openjdk*" ; then echo "Detected Java 8" JAVA_HOME=$(compgen -G "/usr/lib/jvm/java-8-openjdk*") fi elif [[ $DISTRIBUTION == Redhat ]]; then - if [[ -d /usr/lib/jvm/jre-11 ]]; then + if [[ -d /usr/lib/jvm/jre-17 ]]; then + echo "Detected Java 17" + JAVA_HOME=/usr/lib/jvm/jre-17 + elif [[ -d /usr/lib/jvm/jre-11 ]]; then echo "Detected Java 11" JAVA_HOME=/usr/lib/jvm/jre-11 - if [[ -d /usr/lib/jvm/jre-1.8.0 ]]; then - echo "WARNING: Java 8 is also present" - fi elif [[ -d /usr/lib/jvm/jre-1.8.0 ]]; then echo "Detected Java 8" JAVA_HOME=/usr/lib/jvm/jre-1.8.0 @@ -82,7 +82,7 @@ fi if [[ $JAVA_HOME == Unknown ]]; then echo "ERROR: Did not find Java in any expected location." - echo "Only Java 8 and Java 11 are supported." + echo "Only Java 8, 11, and 17 are supported." exit 1 fi diff --git a/fe/pom.xml b/fe/pom.xml index 549abf499..bfa588176 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -358,9 +358,15 @@ under the License. </dependency> <dependency> - <groupId>org.ehcache</groupId> - <artifactId>sizeof</artifactId> - <version>0.4.0</version> + <groupId>org.ehcache</groupId> + <artifactId>sizeof</artifactId> + <version>0.4.0</version> + </dependency> + + <dependency> + <groupId>com.github.jbellis</groupId> + <artifactId>jamm</artifactId> + <version>0.4.0-IMPALA</version> </dependency> <dependency> diff --git a/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java b/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java index a238ff427..245a6ff12 100644 --- a/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java +++ b/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java @@ -51,6 +51,7 @@ import org.apache.impala.util.MetaStoreUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.codahale.metrics.Snapshot; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.cache.CacheStats; @@ -418,6 +419,10 @@ public abstract class FeCatalogUtils { metrics.setCache_hit_rate(stats.hitRate()); metrics.setCache_load_exception_rate(stats.loadExceptionRate()); metrics.setCache_miss_rate(stats.missRate()); + + Snapshot cacheEntrySize = ((CatalogdMetaProvider) provider).getCacheEntrySize(); + metrics.setCache_entry_median_size(cacheEntrySize.getMedian()); + metrics.setCache_entry_99th_size(cacheEntrySize.get99thPercentile()); } diff --git a/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java b/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java index de0f7d653..f35832c26 100644 --- a/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java +++ b/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java @@ -35,6 +35,9 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; +import com.codahale.metrics.Histogram; +import com.codahale.metrics.Snapshot; +import com.codahale.metrics.UniformReservoir; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableCollection; import com.google.common.collect.ImmutableMap; @@ -107,6 +110,8 @@ import org.apache.thrift.TException; import org.apache.thrift.TSerializer; import org.apache.thrift.protocol.TBinaryProtocol; import org.ehcache.sizeof.SizeOf; +import org.github.jamm.CannotAccessFieldException; +import org.github.jamm.MemoryMeter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -285,6 +290,8 @@ public class CatalogdMetaProvider implements MetaProvider { */ final Cache<Object,Object> cache_; + private final Histogram cacheEntrySize_ = new Histogram(new UniformReservoir()); + /** * The last catalog version seen in an update from the catalogd. * @@ -364,7 +371,8 @@ public class CatalogdMetaProvider implements MetaProvider { cache_ = CacheBuilder.newBuilder() .maximumWeight(cacheSizeBytes) .expireAfterAccess(expirationSecs, TimeUnit.SECONDS) - .weigher(new SizeOfWeigher()) + .weigher(new SizeOfWeigher( + BackendConfig.INSTANCE.useJammWeigher(), cacheEntrySize_)) .recordStats() .build(); } @@ -373,6 +381,10 @@ public class CatalogdMetaProvider implements MetaProvider { return cache_.stats(); } + public Snapshot getCacheEntrySize() { + return cacheEntrySize_.getSnapshot(); + } + @Override public Iterable<HdfsCachePool> getHdfsCachePools() { return hdfsCachePools_; @@ -2004,8 +2016,6 @@ public class CatalogdMetaProvider implements MetaProvider { // Cache the reflected sizes of classes seen. private static final boolean CACHE_SIZES = true; - private static SizeOf SIZEOF = SizeOf.newInstance(BYPASS_FLYWEIGHT, CACHE_SIZES); - private static final int BYTES_PER_WORD = 8; // Assume 64-bit VM. // Guava cache overhead based on: // http://code-o-matic.blogspot.com/2012/02/updated-memory-cost-per-javaguava.html @@ -2013,9 +2023,51 @@ public class CatalogdMetaProvider implements MetaProvider { 12 * BYTES_PER_WORD + // base cost per entry 4 * BYTES_PER_WORD; // for use of 'maximumSize()' + // Retain Ehcache while Jamm is thoroughly tested. We only expect to make one + // CatalogdMetaProvider and thus one SizeOfWeigher. + private final SizeOf SIZEOF; + private final MemoryMeter METER; + + private final boolean useJamm_; + private final Histogram entrySize_; + + public SizeOfWeigher(boolean useJamm, Histogram entrySize) { + if (useJamm) { + METER = MemoryMeter.builder().build(); + SIZEOF = null; + } else { + SIZEOF = SizeOf.newInstance(BYPASS_FLYWEIGHT, CACHE_SIZES); + METER = null; + } + useJamm_ = useJamm; + entrySize_ = entrySize; + } + + public SizeOfWeigher() { + this(true, null); + } + @Override public int weigh(Object key, Object value) { - long size = SIZEOF.deepSizeOf(key, value) + OVERHEAD_PER_ENTRY; + long size = OVERHEAD_PER_ENTRY; + try { + if (useJamm_) { + size += METER.measureDeep(key); + size += METER.measureDeep(value); + } else { + size += SIZEOF.deepSizeOf(key, value); + } + } catch (CannotAccessFieldException e) { + // This may happen if we miss an add-opens call for lambdas in Java 17. + LOG.warn("Unable to weigh cache entry, additional add-opens needed", e); + } catch (UnsupportedOperationException e) { + // Thrown by ehcache for lambdas in Java 17. + LOG.warn("Unable to weigh cache entry", e); + } + + if (entrySize_ != null) { + entrySize_.update(size); + } if (size > Integer.MAX_VALUE) { return Integer.MAX_VALUE; } diff --git a/fe/src/main/java/org/apache/impala/service/BackendConfig.java b/fe/src/main/java/org/apache/impala/service/BackendConfig.java index 607910568..ac7f62715 100644 --- a/fe/src/main/java/org/apache/impala/service/BackendConfig.java +++ b/fe/src/main/java/org/apache/impala/service/BackendConfig.java @@ -411,4 +411,8 @@ public class BackendConfig { public float getScanRangeCostFactor() { return (float) backendCfg_.scan_range_cost_factor; } + + public boolean useJammWeigher() { + return backendCfg_.use_jamm_weigher; + } } diff --git a/fe/src/test/java/org/apache/impala/catalog/local/CatalogdMetaProviderTest.java b/fe/src/test/java/org/apache/impala/catalog/local/CatalogdMetaProviderTest.java index dbfb59639..d0cc54deb 100644 --- a/fe/src/test/java/org/apache/impala/catalog/local/CatalogdMetaProviderTest.java +++ b/fe/src/test/java/org/apache/impala/catalog/local/CatalogdMetaProviderTest.java @@ -274,6 +274,12 @@ public class CatalogdMetaProviderTest { int weigh = weigher.weigh(refs, null); assertTrue("Actual weigh: " + weigh, weigh > 4000); assertTrue("Actual weigh: " + weigh, weigh < 5000); + + // Also continue to test ehcache. + weigher = new SizeOfWeigher(false, null); + weigh = weigher.weigh(refs, null); + assertTrue("Actual weigh: " + weigh, weigh > 4000); + assertTrue("Actual weigh: " + weigh, weigh < 5000); } @Test diff --git a/java/.gitignore b/java/.gitignore new file mode 100644 index 000000000..173495683 --- /dev/null +++ b/java/.gitignore @@ -0,0 +1 @@ +jamm-prefix/ diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index d26c4614e..89313e1f4 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -19,6 +19,18 @@ add_custom_target(validate_java_pom_versions ALL COMMAND $ENV{IMPALA_HOME}/bin/validate-java-pom-versions.sh ) -add_custom_target(java ALL DEPENDS gen-deps function-registry geospatial-udf-wrappers validate_java_pom_versions +include(ExternalProject) +ExternalProject_Add(jamm + GIT_REPOSITORY https://github.com/jbellis/jamm + # 0.4.0 RC. A release is expected soon: https://github.com/jbellis/jamm/issues/44 + GIT_TAG 49c3e691c43a39ddaccaf19839ef316807009377 + PATCH_COMMAND sed -i.bak s/0.4.0-SNAPSHOT/0.4.0-IMPALA/ pom.xml + CONFIGURE_COMMAND sed s:JAVA_HOME:$ENV{JAVA_HOME}:g ${CMAKE_CURRENT_SOURCE_DIR}/toolchains.xml.tmpl > toolchains.xml + BUILD_IN_SOURCE true + BUILD_COMMAND mvn --toolchains toolchains.xml install -Dmaven.test.skip=true -DskipTests + INSTALL_COMMAND "" +) + +add_custom_target(java ALL DEPENDS gen-deps function-registry geospatial-udf-wrappers validate_java_pom_versions jamm COMMAND $ENV{IMPALA_HOME}/bin/mvn-quiet.sh -B install -DskipTests ) diff --git a/java/toolchains.xml.tmpl b/java/toolchains.xml.tmpl new file mode 100644 index 000000000..c5e0215ef --- /dev/null +++ b/java/toolchains.xml.tmpl @@ -0,0 +1,35 @@ +<?xml version="1.0" encoding="UTF-8"?> +<toolchains> + <!-- Provide toolchain entries for all versions Jamm expects. We don't actually use + 11 or 17 to build or run tests however, so what they point to doesn't matter. --> + <toolchain> + <type>jdk</type> + <provides> + <version>1.8</version> + <vendor>Oracle Corporation</vendor> + </provides> + <configuration> + <jdkHome>JAVA_HOME</jdkHome> + </configuration> + </toolchain> + <toolchain> + <type>jdk</type> + <provides> + <version>11</version> + <vendor>Oracle Corporation</vendor> + </provides> + <configuration> + <jdkHome>JAVA_HOME</jdkHome> + </configuration> + </toolchain> + <toolchain> + <type>jdk</type> + <provides> + <version>17</version> + <vendor>Oracle Corporation</vendor> + </provides> + <configuration> + <jdkHome>JAVA_HOME</jdkHome> + </configuration> + </toolchain> +</toolchains> diff --git a/tests/custom_cluster/test_local_catalog.py b/tests/custom_cluster/test_local_catalog.py index 89f19e075..32c65f872 100644 --- a/tests/custom_cluster/test_local_catalog.py +++ b/tests/custom_cluster/test_local_catalog.py @@ -486,6 +486,8 @@ class TestObservability(CustomClusterTestSuite): cache_miss_rate_metric_key = "catalog.cache.miss-rate" cache_hit_count_metric_key = "catalog.cache.hit-count" cache_request_count_metric_key = "catalog.cache.request-count" + cache_entry_median_size_key = "catalog.cache.entry-median-size" + cache_entry_99th_size_key = "catalog.cache.entry-99th-size" cache_request_count_prev_run = 0 cache_hit_count_prev_run = 0 test_table_name = "%s.test_cache_metrics_test_tbl" % unique_database @@ -512,6 +514,12 @@ class TestObservability(CustomClusterTestSuite): assert cache_request_count > cache_request_count_prev_run,\ "%s not updated betweeen two query runs, query - %s"\ % (cache_request_count_metric_key, query) + + cache_entry_median_size = cache_metrics[cache_entry_median_size_key] + cache_entry_99th_size = cache_metrics[cache_entry_99th_size_key] + assert cache_entry_median_size > 300 and cache_entry_median_size < 1000 + assert cache_entry_99th_size > 12500 and cache_entry_99th_size < 18000 + cache_hit_count_prev_run = cache_hit_count cache_request_count_prev_run = cache_request_count finally: diff --git a/tests/verifiers/test_banned_log_messages.py b/tests/verifiers/test_banned_log_messages.py index ac5d1e3e0..1f4d52162 100644 --- a/tests/verifiers/test_banned_log_messages.py +++ b/tests/verifiers/test_banned_log_messages.py @@ -22,7 +22,6 @@ import os import subprocess from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfDockerizedCluster class TestBannedLogMessages(ImpalaTestSuite): @@ -31,13 +30,17 @@ class TestBannedLogMessages(ImpalaTestSuite): This test suite should be run after all the tests have been run. """ - @SkipIfDockerizedCluster.daemon_logs_not_exposed - def test_no_inaccessible_objects(self): - """Test that cluster logs do not contain InaccessibleObjectException""" - log_dir = os.environ["IMPALA_LOGS_DIR"] - message = 'InaccessibleObjectException' + def assert_message_absent(self, message, log_dir=os.environ["IMPALA_LOGS_DIR"]): for root, _, files in os.walk(log_dir): for file in files: log_file_path = os.path.join(root, file) returncode = subprocess.call(['grep', message, log_file_path]) assert returncode == 1, "%s contains '%s'" % (log_file_path, message) + + def test_no_inaccessible_objects(self): + """Test that cluster logs do not contain InaccessibleObjectException""" + self.assert_message_absent('InaccessibleObjectException') + + def test_no_unsupported_operations(self): + """Test that cluster logs do not contain jamm.CannotAccessFieldException""" + self.assert_message_absent('CannotAccessFieldException')
