This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 21ef3e6ff IMPALA-13638: Translate apostrophe to underscore in
Prometheus metric names.
21ef3e6ff is described below
commit 21ef3e6ffe997c3dbb71e1d7a92844fb31387a92
Author: Andrew Sherman <[email protected]>
AuthorDate: Thu Jan 2 13:22:22 2025 -0800
IMPALA-13638: Translate apostrophe to underscore in Prometheus metric names.
Impala has some metrics that reflect the state of the JVM. Some of these
metrics have names that are partly composed of the names of the
MemoryPoolMXBean objects in the Java virtual machine. In Jdk 8 these
are names like "Code Cache" and "PS Eden Space". In Jdk 11 these names
include apostrophe characters, for example "CodeHeap 'profiled
nmethods'". The derived metric names work OK for Impala in both the
webui and in json output. However the apostrophe character is illegal
in Prometheus metric names per
https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
and these metrics cannot be consumed by Prometheus. Fix this by adding
the apostrophe to the list of characters that are mapped to underscores
when we translate the metric names for Prometheus metrics.
TESTING:
Extended the test_prometheus_metrics test to parse all generated
Prometheus metrics. Ran the test with Jdk 11 where it failed without
the server fix
Change-Id: I557b123c075dff0b14ac527de08bc6177bd2a3f6
IMPALA-13596: first cut at tidied code
Reviewed-on: http://gerrit.cloudera.org:8080/22295
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
be/src/util/metrics-test.cc | 8 ++++----
be/src/util/metrics.cc | 2 +-
infra/python/deps/requirements.txt | 1 +
tests/webserver/test_web_pages.py | 20 ++++++++++++++------
4 files changed, 20 insertions(+), 11 deletions(-)
diff --git a/be/src/util/metrics-test.cc b/be/src/util/metrics-test.cc
index 770ae2c99..be06c2386 100644
--- a/be/src/util/metrics-test.cc
+++ b/be/src/util/metrics-test.cc
@@ -501,12 +501,12 @@ TEST_F(MetricsTest, PrometheusMetricNames) {
EXPECT_EQ("impala_server_metric",
MetricGroup::ImpalaToPrometheusName("impala-server-metric"));
- // Test that . and - get transformed to _.
- EXPECT_EQ("impala_metric_name_with_punctuation_",
- MetricGroup::ImpalaToPrometheusName("metric-name.with-punctuation_"));
+ // Test that ".", "'" and "-" get transformed to _.
+ EXPECT_EQ("impala_metric__name__with_punctuation_",
+ MetricGroup::ImpalaToPrometheusName("metric-'name'.with-punctuation_"));
// Other special characters are unmodified
- EXPECT_EQ("impala_!@#$%*", MetricGroup::ImpalaToPrometheusName("!@#$%*"));
+ EXPECT_EQ("impala_!@#$%*:", MetricGroup::ImpalaToPrometheusName("!@#$%*:"));
}
void AssertPrometheus(const std::stringstream& val, const string& name,
diff --git a/be/src/util/metrics.cc b/be/src/util/metrics.cc
index b1c22085d..13e836d82 100644
--- a/be/src/util/metrics.cc
+++ b/be/src/util/metrics.cc
@@ -324,7 +324,7 @@ string MetricGroup::ImpalaToPrometheusName(const string&
impala_metric_name) {
// Substitute characters as needed to match prometheus conventions. The
string is
// already the right size so we can do this in place.
for (size_t i = 0; i < result.size(); ++i) {
- if (result[i] == '.' || result[i] == '-') result[i] = '_';
+ if (result[i] == '.' || result[i] == '-' || result[i] == '\'') result[i] =
'_';
}
if (result.compare(0, 7, "impala_") != 0) {
result.insert(0, "impala_");
diff --git a/infra/python/deps/requirements.txt
b/infra/python/deps/requirements.txt
index 242d31467..37ee20af8 100644
--- a/infra/python/deps/requirements.txt
+++ b/infra/python/deps/requirements.txt
@@ -37,6 +37,7 @@ kerberos == 1.3.1
pexpect == 3.3
pg8000 == 1.10.2
prettytable == 0.7.2
+prometheus-client == 0.12.0
psutil == 5.6.3
pyparsing == 2.0.3
pytest == 2.9.2
diff --git a/tests/webserver/test_web_pages.py
b/tests/webserver/test_web_pages.py
index 6c5800761..a97d8b42b 100644
--- a/tests/webserver/test_web_pages.py
+++ b/tests/webserver/test_web_pages.py
@@ -24,6 +24,7 @@ from tests.common.impala_test_suite import ImpalaTestSuite
from tests.util.parse_util import parse_duration_string_ms
from datetime import datetime
from multiprocessing import Process, Queue
+from prometheus_client.parser import text_string_to_metric_families
from time import sleep, time
import itertools
import json
@@ -506,7 +507,7 @@ class TestWebPage(ImpalaTestSuite):
def test_query_stmt(self):
"""Create a long select query then check if it is truncated in the
response json."""
- # The imput query is a select + 450 "x " long, which is long enough to get
truncated.
+ # The input query is a select + 450 "x " long, which is long enough to get
truncated.
query = "select \"{0}\"".format("x " * 450)
# The expected result query should be 253 long and contains the first 250
# chars + "..."
@@ -515,7 +516,7 @@ class TestWebPage(ImpalaTestSuite):
response_json = self.__run_query_and_get_debug_page(
query, self.QUERIES_URL,
expected_state=self.client.QUERY_STATES["FINISHED"])
# Search the json for the expected value.
- # The query can be in in_filght_queries even though it is in FINISHED
state.
+ # The query can be in in_flight_queries even though it is in FINISHED
state.
for json_part in itertools.chain(
response_json['completed_queries'], response_json['in_flight_queries']):
if expected_result in json_part['stmt']:
@@ -842,7 +843,7 @@ class TestWebPage(ImpalaTestSuite):
# the query is in the file.
responses = self.get_and_check_status(
self.ROOT_URL + download_link, query, self.IMPALAD_TEST_PORT)
- # Check the query id is in the content of the reponse.
+ # Check the query id is in the content of the response.
assert len(responses) == 1
assert query_id in responses[0].text
elif profile_format == 'Json':
@@ -870,6 +871,13 @@ class TestWebPage(ImpalaTestSuite):
# check if metric shows up
assert 'impala_statestore_subscriber_heartbeat_interval_time_min' in
resp[0].text
+ for response in resp:
+ # Parse Prometheus text format using prometheus_client library.
+ for _ in text_string_to_metric_families(response.text):
+ # We have to loop through the result to force the lazy parsing function
+ # to parse every line.
+ continue
+
def test_healthz_endpoint(self):
"""Test to check that the /healthz endpoint returns 200 OK."""
for port in self.TEST_PORTS_WITH_SS:
@@ -882,11 +890,11 @@ class TestWebPage(ImpalaTestSuite):
"""Checks that the template files conform to the requirements for
compatibility with
the Apache Knox service definition."""
# Matches all 'a' links with an 'href' that doesn't start with either '#'
(which stays
- # on the same page an so doesn't need to the hostname) or '{{
__common__.host-url }}'
+ # on the same page and so doesn't need to the hostname) or '{{
__common__.host-url }}'
# Note that if we ever need to add a link that doesn't conform to this, we
will
# probably also have to change the Knox service definition.
href_regex = "<(a|link) .*? href=['\"](?!({{ __common__.host-url }})|#)"
- # Matches all 'script' tags that aren't absoluve urls.
+ # Matches all 'script' tags that aren't absolute urls.
script_regex = "<script .*?src=['\"](?!({{ __common__.host-url }})|http)"
# Matches all 'form' tags that are not followed by including the hidden
inputs.
form_regex = "<form [^{]*?>(?!{{>www/form-hidden-inputs.tmpl}})"
@@ -1128,7 +1136,7 @@ class TestWebPage(ImpalaTestSuite):
@pytest.mark.execute_serially
def test_hadoop_varz_page(self):
- """test for /hadoop-var to check availablity of haqoop configuration like
+ """test for /hadoop-var to check availability of hadoop configuration like
hive warehouse dir, fs.defaultFS"""
responses = self.get_and_check_status(self.HADOOP_VARZ_URL,
"hive.metastore.warehouse.dir",
ports_to_test=self.TEST_PORTS_WITHOUT_SS)