This is an automated email from the ASF dual-hosted git repository. dbecker pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 7c02ba952b1dc7b5e43c302f3bf76a18316ef417 Author: jasonmfehr <[email protected]> AuthorDate: Wed Feb 26 13:17:28 2025 -0800 IMPALA-13803: Fix hiveserver2_protocol_version Values in Workload Management The workload management tables sys.impala_query_live and sys.impala_query_log contain different values for the hiveserver2_protocol_version column. The live table contains a short string such as "V6" while the log table contains a much longer string with an unnecessary prefix such as "HIVE_CLI_SERVICE_PROTOCOL_V6". This patch modifies the value stored in the hiveserver2_protocol_version column in the sys.impala_query_log table to match the value stored in the sys.impala_query_live table and the query profile. Testing was accomplished by running the following test suites locally. These tests all call the 'assert_query' function of workload_management.py which checks the value of the hiveserver2_protocol_version column. A new test was added to test_query_live.py which uses HS2 as the client protocol. * tests/custom_cluster/test_query_live.py * tests/custom_cluster/test_query_log.py * tests/custom_cluster/test_workload_mgmt_init.py Change-Id: Idd8121d4fbf7abe12d313f3314377db6f1ec017a Reviewed-on: http://gerrit.cloudera.org:8080/22553 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/exec/system-table-scanner.cc | 2 +- be/src/service/query-state-record.cc | 7 ++++++- be/src/service/query-state-record.h | 5 +++++ be/src/service/workload-management-worker.cc | 2 +- tests/custom_cluster/test_query_live.py | 17 ++++++++++++++++- tests/util/workload_management.py | 2 +- 6 files changed, 30 insertions(+), 5 deletions(-) diff --git a/be/src/exec/system-table-scanner.cc b/be/src/exec/system-table-scanner.cc index 141aa77c6..e286c8353 100644 --- a/be/src/exec/system-table-scanner.cc +++ b/be/src/exec/system-table-scanner.cc @@ -200,7 +200,7 @@ Status QueryScanner::MaterializeNextTuple( case TQueryTableColumn::HIVESERVER2_PROTOCOL_VERSION: if (query.session_type == TSessionType::HIVESERVER2) { RETURN_IF_ERROR(WriteStringSlot( - Substitute("V$0", 1 + query.hiveserver2_protocol_version), pool, slot)); + query.hiveserver2_protocol_version_formatted(), pool, slot)); } break; case TQueryTableColumn::DB_USER: diff --git a/be/src/service/query-state-record.cc b/be/src/service/query-state-record.cc index 09d9b5720..1b91ed41d 100644 --- a/be/src/service/query-state-record.cc +++ b/be/src/service/query-state-record.cc @@ -26,9 +26,10 @@ #include <vector> #include <boost/algorithm/string.hpp> - #include <gutil/strings/numbers.h> #include <gutil/strings/strcat.h> +#include <gutil/strings/substitute.h> + #include "runtime/coordinator.h" #include "scheduling/admission-controller.h" #include "scheduling/scheduler.h" @@ -414,6 +415,10 @@ bool QueryStateExpanded::events_timeline_empty() const { base_state->event_sequence.timestamps.empty(); } +string QueryStateExpanded::hiveserver2_protocol_version_formatted() const { + return strings::Substitute("V$0", 1 + hiveserver2_protocol_version); +} + bool PerHostPeakMemoryComparator(const pair<TNetworkAddress, PerHostState>& a, const pair<TNetworkAddress, PerHostState>& b) { return a.second.peak_memory_usage < b.second.peak_memory_usage; diff --git a/be/src/service/query-state-record.h b/be/src/service/query-state-record.h index 0dd02ca5b..5c3e5b46f 100644 --- a/be/src/service/query-state-record.h +++ b/be/src/service/query-state-record.h @@ -355,6 +355,11 @@ struct QueryStateExpanded { /// Events Timeline Iterator EventsTimelineIterator EventsTimeline() const; + /// Builds a shortened string containing the hiveserver2 protocol version of the Impala + /// client (if connected with the HS2 protocol). The returned string will have the + /// format 'V' followed by the version number, for example 'V7'. + std::string hiveserver2_protocol_version_formatted() const; + // Source tables accessed by this query. std::vector<TTableName> tables; diff --git a/be/src/service/workload-management-worker.cc b/be/src/service/workload-management-worker.cc index 62d4126b8..870688db3 100644 --- a/be/src/service/workload-management-worker.cc +++ b/be/src/service/workload-management-worker.cc @@ -135,7 +135,7 @@ const std::array<FieldParser, NumQueryTableColumns> FIELD_PARSERS = {{ {[](FieldParserContext& ctx) { ctx.sql << "'"; if (ctx.record->session_type == TSessionType::HIVESERVER2) { - ctx.sql << ctx.record->hiveserver2_protocol_version; + ctx.sql << ctx.record->hiveserver2_protocol_version_formatted(); } ctx.sql << "'"; }}, diff --git a/tests/custom_cluster/test_query_live.py b/tests/custom_cluster/test_query_live.py index 83e222ec9..a0868d997 100644 --- a/tests/custom_cluster/test_query_live.py +++ b/tests/custom_cluster/test_query_live.py @@ -83,12 +83,27 @@ class TestQueryLive(CustomClusterTestSuite): assert False, "did not find host {}".format(host) assert len(actual_hosts) == 0, "did not find all expected hosts" + @CustomClusterTestSuite.with_args(impalad_args="--enable_workload_mgmt " + "--cluster_id=test_query_live", + catalogd_args="--enable_workload_mgmt", + disable_log_buffering=True) + def test_query_live_hs2(self): + """Asserts the query live table shows and allows filtering queries. Uses the hs2 + client to connect to Impala.""" + # Use a query that reads data from disk for the 1st one, as more representative and a + # better fit for assert_query. + result1 = self.hs2_client.execute("select * from functional.alltypes", + fetch_profile_after_close=True) + assert_query('sys.impala_query_live', self.hs2_client, 'test_query_live', + result1.runtime_profile) + @CustomClusterTestSuite.with_args(impalad_args="--enable_workload_mgmt " "--cluster_id=test_query_live", catalogd_args="--enable_workload_mgmt", disable_log_buffering=True) def test_query_live(self): - """Asserts the query live table shows and allows filtering queries.""" + """Asserts the query live table shows and allows filtering queries. Uses the default + client to connect to Impala.""" # Use a query that reads data from disk for the 1st one, as more representative and a # better fit for assert_query. result1 = self.client.execute("select * from functional.alltypes", diff --git a/tests/util/workload_management.py b/tests/util/workload_management.py index a98c89fad..9b2f567db 100644 --- a/tests/util/workload_management.py +++ b/tests/util/workload_management.py @@ -144,7 +144,7 @@ def assert_query(query_tbl, client, expected_cluster_id="", raw_profile=None, if session_type.group(1) == "HIVESERVER2": hs2_ver = re.search(r'\n\s+HiveServer2 Protocol Version:\s+(.*)', profile_text) assert hs2_ver is not None - assert value == "HIVE_CLI_SERVICE_PROTOCOL_{0}".format(hs2_ver.group(1)) + assert value == hs2_ver.group(1) else: assert value == ""
