This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 52ad12bc0 IMPALA-12544: Add additional query progress reporting for
the shell
52ad12bc0 is described below
commit 52ad12bc0c847154263797e14e38a254e34760a2
Author: Eyizoha <[email protected]>
AuthorDate: Tue Nov 7 10:32:25 2023 +0800
IMPALA-12544: Add additional query progress reporting for the shell
This patch modifies the dynamic query progress reporting in impala-shell
by adding an extra query progress bar below the scan progress bar.
The query progress is calculated using the number of completed fragment
instances divided by the total number of fragment instances. Compared to
the scan progress, which is calculated based on completed scan ranges
divided by the total scan ranges, the query progress provides a more
accurate reflection of the actual completion progress of the query.
Particularly for computationally intensive queries involving complex
aggregations or sorting, such as tpcds query78, there is often
additional computation time required after the scanning is complete. In
such cases, displaying only 100% scan progress would be inaccurate.
Change-Id: I11a704885505442b7499a026fcee3b86696cd064
Reviewed-on: http://gerrit.cloudera.org:8080/20672
Tested-by: Impala Public Jenkins <[email protected]>
Reviewed-by: Michael Smith <[email protected]>
---
shell/impala_shell.py | 22 ++++++++++++++++------
tests/custom_cluster/test_web_pages.py | 2 +-
tests/shell/test_shell_commandline.py | 2 +-
tests/shell/util.py | 4 ++--
4 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index 919aea24b..cadce7a89 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -1304,15 +1304,25 @@ class ImpalaShell(cmd.Cmd, object):
progress = summary.progress
# If the data is not complete return and wait for a good result.
- if not progress.total_scan_ranges and not
progress.num_completed_scan_ranges:
+ if not progress.total_scan_ranges and not
progress.num_completed_scan_ranges and \
+ not progress.total_fragment_instances and \
+ not progress.num_completed_fragment_instances:
self.last_summary = time.time()
return
if self.live_progress and progress.total_scan_ranges > 0:
- val = ((summary.progress.num_completed_scan_ranges * 100) //
- summary.progress.total_scan_ranges)
- fragment_text = "[%s%s] %s%%\n" % ("#" * val, " " * (100 - val), val)
- data += fragment_text
+ val = ((summary.progress.num_completed_scan_ranges * 100)
+ // summary.progress.total_scan_ranges)
+ scan_progress_text =\
+ " Scan Progress:[%s%s] %s%%\n" % ("#" * val, " " * (100 - val),
val)
+ data += scan_progress_text
+
+ if self.live_progress and progress.total_fragment_instances > 0:
+ val = ((progress.num_completed_fragment_instances * 100)
+ // progress.total_fragment_instances)
+ query_progress_text =\
+ "Query Progress:[%s%s] %s%%\n" % ("#" * val, " " * (100 - val),
val)
+ data += query_progress_text
if self.live_summary:
table = self._default_summary_table()
@@ -1357,7 +1367,7 @@ class ImpalaShell(cmd.Cmd, object):
self.last_summary = time.time()
if print_web_link:
self._print_if_verbose(
- "Query progress can be monitored at: %s" %
self.imp_client.get_query_link(
+ "Query state can be monitored at: %s" %
self.imp_client.get_query_link(
self.imp_client.get_query_id_str(self.last_query_handle)))
wait_to_finish = self.imp_client.wait_to_finish(self.last_query_handle,
diff --git a/tests/custom_cluster/test_web_pages.py
b/tests/custom_cluster/test_web_pages.py
index 495aa70a9..d491452c4 100644
--- a/tests/custom_cluster/test_web_pages.py
+++ b/tests/custom_cluster/test_web_pages.py
@@ -161,7 +161,7 @@ class TestWebPage(CustomClusterTestSuite):
pytest.skip('runs only for text table_format')
# If webserver url is not exposed, debug web urls shouldn't be printed out.
shell_messages = ["Query submitted at: ", "(Coordinator: ",
- "Query progress can be monitored at: "]
+ "Query state can be monitored at: "]
query_shell_arg = '--query=select * from functional.alltypes'
# hs2
results = run_impala_shell_cmd(vector, [query_shell_arg])
diff --git a/tests/shell/test_shell_commandline.py
b/tests/shell/test_shell_commandline.py
index b0cf436fd..380441fd7 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -866,7 +866,7 @@ class TestImpalaShell(ImpalaTestSuite):
def test_query_time_and_link_message(self, vector, unique_database):
shell_messages = ["Query submitted at: ", "(Coordinator: ",
- "Query progress can be monitored at: "]
+ "Query state can be monitored at: "]
# CREATE statements should not print query time and webserver address.
results = run_impala_shell_cmd(
vector, ['--query=create table %s.shell_msg_test (id int)' %
unique_database])
diff --git a/tests/shell/util.py b/tests/shell/util.py
index 892ef22f1..889c075c8 100755
--- a/tests/shell/util.py
+++ b/tests/shell/util.py
@@ -293,10 +293,10 @@ class ImpalaShell(object):
# Server version: impalad version...
# Query: select sleep(10)
# Query submitted at:...
- # Query progress can be monitored at:...
+ # Query state can be monitored at:...
# We stop at 10 iterations to prevent an infinite loop if somehting goes
wrong.
iters = 0
- while "Query progress" not in self.shell_process.stderr.readline() and
iters < 10:
+ while "Query state" not in self.shell_process.stderr.readline() and iters
< 10:
iters += 1
def get_result(self, stdin_input=None):