This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 21ef3e6ff IMPALA-13638: Translate apostrophe to underscore in 
Prometheus metric names.
21ef3e6ff is described below

commit 21ef3e6ffe997c3dbb71e1d7a92844fb31387a92
Author: Andrew Sherman <[email protected]>
AuthorDate: Thu Jan 2 13:22:22 2025 -0800

    IMPALA-13638: Translate apostrophe to underscore in Prometheus metric names.
    
    Impala has some metrics that reflect the state of the JVM. Some of these
    metrics have names that are partly composed of the names of the
    MemoryPoolMXBean objects in the Java virtual machine. In Jdk 8 these
    are names like "Code Cache" and "PS Eden Space". In Jdk 11 these names
    include apostrophe characters, for example "CodeHeap 'profiled
    nmethods'". The derived metric names work OK for Impala in both the
    webui and in json output. However the apostrophe character is illegal
    in Prometheus metric names per
    https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
    and these metrics cannot be consumed by Prometheus. Fix this by adding
    the apostrophe to the list of characters that are mapped to underscores
    when we translate the metric names for Prometheus metrics.
    
    TESTING:
    
    Extended the test_prometheus_metrics test to parse all generated
    Prometheus metrics. Ran the test with Jdk 11 where it failed without
    the server fix
    
    Change-Id: I557b123c075dff0b14ac527de08bc6177bd2a3f6
    IMPALA-13596: first cut at tidied code
    Reviewed-on: http://gerrit.cloudera.org:8080/22295
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/util/metrics-test.cc        |  8 ++++----
 be/src/util/metrics.cc             |  2 +-
 infra/python/deps/requirements.txt |  1 +
 tests/webserver/test_web_pages.py  | 20 ++++++++++++++------
 4 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/be/src/util/metrics-test.cc b/be/src/util/metrics-test.cc
index 770ae2c99..be06c2386 100644
--- a/be/src/util/metrics-test.cc
+++ b/be/src/util/metrics-test.cc
@@ -501,12 +501,12 @@ TEST_F(MetricsTest, PrometheusMetricNames) {
   EXPECT_EQ("impala_server_metric",
       MetricGroup::ImpalaToPrometheusName("impala-server-metric"));
 
-  // Test that . and - get transformed to _.
-  EXPECT_EQ("impala_metric_name_with_punctuation_",
-      MetricGroup::ImpalaToPrometheusName("metric-name.with-punctuation_"));
+  // Test that ".", "'" and "-" get transformed to _.
+  EXPECT_EQ("impala_metric__name__with_punctuation_",
+      MetricGroup::ImpalaToPrometheusName("metric-'name'.with-punctuation_"));
 
   // Other special characters are unmodified
-  EXPECT_EQ("impala_!@#$%*", MetricGroup::ImpalaToPrometheusName("!@#$%*"));
+  EXPECT_EQ("impala_!@#$%*:", MetricGroup::ImpalaToPrometheusName("!@#$%*:"));
 }
 
 void AssertPrometheus(const std::stringstream& val, const string& name,
diff --git a/be/src/util/metrics.cc b/be/src/util/metrics.cc
index b1c22085d..13e836d82 100644
--- a/be/src/util/metrics.cc
+++ b/be/src/util/metrics.cc
@@ -324,7 +324,7 @@ string MetricGroup::ImpalaToPrometheusName(const string& 
impala_metric_name) {
   // Substitute characters as needed to match prometheus conventions. The 
string is
   // already the right size so we can do this in place.
   for (size_t i = 0; i < result.size(); ++i) {
-    if (result[i] == '.' || result[i] == '-') result[i] = '_';
+    if (result[i] == '.' || result[i] == '-' || result[i] == '\'') result[i] = 
'_';
   }
   if (result.compare(0, 7, "impala_") != 0) {
     result.insert(0, "impala_");
diff --git a/infra/python/deps/requirements.txt 
b/infra/python/deps/requirements.txt
index 242d31467..37ee20af8 100644
--- a/infra/python/deps/requirements.txt
+++ b/infra/python/deps/requirements.txt
@@ -37,6 +37,7 @@ kerberos == 1.3.1
 pexpect == 3.3
 pg8000 == 1.10.2
 prettytable == 0.7.2
+prometheus-client == 0.12.0
 psutil == 5.6.3
 pyparsing == 2.0.3
 pytest == 2.9.2
diff --git a/tests/webserver/test_web_pages.py 
b/tests/webserver/test_web_pages.py
index 6c5800761..a97d8b42b 100644
--- a/tests/webserver/test_web_pages.py
+++ b/tests/webserver/test_web_pages.py
@@ -24,6 +24,7 @@ from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.util.parse_util import parse_duration_string_ms
 from datetime import datetime
 from multiprocessing import Process, Queue
+from prometheus_client.parser import text_string_to_metric_families
 from time import sleep, time
 import itertools
 import json
@@ -506,7 +507,7 @@ class TestWebPage(ImpalaTestSuite):
 
   def test_query_stmt(self):
     """Create a long select query then check if it is truncated in the 
response json."""
-    # The imput query is a select + 450 "x " long, which is long enough to get 
truncated.
+    # The input query is a select + 450 "x " long, which is long enough to get 
truncated.
     query = "select \"{0}\"".format("x " * 450)
     # The expected result query should be 253 long and contains the first 250
     # chars + "..."
@@ -515,7 +516,7 @@ class TestWebPage(ImpalaTestSuite):
     response_json = self.__run_query_and_get_debug_page(
       query, self.QUERIES_URL, 
expected_state=self.client.QUERY_STATES["FINISHED"])
     # Search the json for the expected value.
-    # The query can be in in_filght_queries even though it is in FINISHED 
state.
+    # The query can be in in_flight_queries even though it is in FINISHED 
state.
     for json_part in itertools.chain(
       response_json['completed_queries'], response_json['in_flight_queries']):
       if expected_result in json_part['stmt']:
@@ -842,7 +843,7 @@ class TestWebPage(ImpalaTestSuite):
         # the query is in the file.
         responses = self.get_and_check_status(
             self.ROOT_URL + download_link, query, self.IMPALAD_TEST_PORT)
-        # Check the query id is in the content of the reponse.
+        # Check the query id is in the content of the response.
         assert len(responses) == 1
         assert query_id in responses[0].text
       elif profile_format == 'Json':
@@ -870,6 +871,13 @@ class TestWebPage(ImpalaTestSuite):
     # check if metric shows up
     assert 'impala_statestore_subscriber_heartbeat_interval_time_min' in 
resp[0].text
 
+    for response in resp:
+      # Parse Prometheus text format using prometheus_client library.
+      for _ in text_string_to_metric_families(response.text):
+        # We have to loop through the result to force the lazy parsing function
+        # to parse every line.
+        continue
+
   def test_healthz_endpoint(self):
     """Test to check that the /healthz endpoint returns 200 OK."""
     for port in self.TEST_PORTS_WITH_SS:
@@ -882,11 +890,11 @@ class TestWebPage(ImpalaTestSuite):
     """Checks that the template files conform to the requirements for 
compatibility with
     the Apache Knox service definition."""
     # Matches all 'a' links with an 'href' that doesn't start with either '#' 
(which stays
-    # on the same page an so doesn't need to the hostname) or '{{ 
__common__.host-url }}'
+    # on the same page and so doesn't need to the hostname) or '{{ 
__common__.host-url }}'
     # Note that if we ever need to add a link that doesn't conform to this, we 
will
     # probably also have to change the Knox service definition.
     href_regex = "<(a|link) .*? href=['\"](?!({{ __common__.host-url }})|#)"
-    # Matches all 'script' tags that aren't absoluve urls.
+    # Matches all 'script' tags that aren't absolute urls.
     script_regex = "<script .*?src=['\"](?!({{ __common__.host-url }})|http)"
     # Matches all 'form' tags that are not followed by including the hidden 
inputs.
     form_regex = "<form [^{]*?>(?!{{>www/form-hidden-inputs.tmpl}})"
@@ -1128,7 +1136,7 @@ class TestWebPage(ImpalaTestSuite):
 
   @pytest.mark.execute_serially
   def test_hadoop_varz_page(self):
-    """test for /hadoop-var to check availablity of haqoop configuration like
+    """test for /hadoop-var to check availability of hadoop configuration like
     hive warehouse dir, fs.defaultFS"""
     responses = self.get_and_check_status(self.HADOOP_VARZ_URL,
         "hive.metastore.warehouse.dir", 
ports_to_test=self.TEST_PORTS_WITHOUT_SS)

Reply via email to