This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new eb8c8bd4c IMPALA-14674: Implement connect_timeout_ms for HS2-HTTP
eb8c8bd4c is described below
commit eb8c8bd4ceb233b7cc17222ff431fa8aab7694b3
Author: Michael Smith <[email protected]>
AuthorDate: Mon Oct 6 15:52:27 2025 -0700
IMPALA-14674: Implement connect_timeout_ms for HS2-HTTP
Implements connect_timeout_ms support for HS2-HTTP. This only applies
the timeout while establishing the connection, so later requests do not
timeout. This is preferrable to configuring http_socket_timeout_s, which
can result in timing out operations that just take awhile to execute.
Removed ImpalaHttpClient.setTimeout as it's unused and not part of the
specification for TTransportBase.
Testing:
- updates tests now that HS2-HTTP supports connect_timeout_ms
- test_impala_shell_timeout for HS2-HTTP without http_socket_timeout_s
- marks test_http_socket_timeout to run serially because it relies on
short timeouts; inconsistent behavior can leave a dangling session
Change-Id: I9012066fb0d16497f309532021d7b323404b9fb2
Reviewed-on: http://gerrit.cloudera.org:8080/23499
Reviewed-by: Michael Smith <[email protected]>
Tested-by: Michael Smith <[email protected]>
---
shell/impala_shell/ImpalaHttpClient.py | 20 ++++++++--------
shell/impala_shell/impala_client.py | 23 ++++++++-----------
tests/shell/test_shell_commandline.py | 42 ++++++++++++----------------------
3 files changed, 33 insertions(+), 52 deletions(-)
diff --git a/shell/impala_shell/ImpalaHttpClient.py
b/shell/impala_shell/ImpalaHttpClient.py
index ae9eda609..5a6390705 100644
--- a/shell/impala_shell/ImpalaHttpClient.py
+++ b/shell/impala_shell/ImpalaHttpClient.py
@@ -56,7 +56,8 @@ class ImpalaHttpClient(TTransportBase):
MIN_REQUEST_SIZE_FOR_EXPECT = 1024
def __init__(self, uri_or_host, ssl_context=None, http_cookie_names=None,
- socket_timeout_s=None, verbose=False, reuse_connection=True):
+ socket_timeout_s=None, verbose=False, reuse_connection=True,
+ connect_timeout_ms=None):
"""To properly authenticate against an HTTPS server, provide an
ssl_context created
with ssl.create_default_context() to validate the server certificate.
@@ -121,7 +122,8 @@ class ImpalaHttpClient(TTransportBase):
self.__wbuf = BytesIO()
self.__http = None
self.__http_response = None
- self.__timeout = socket_timeout_s
+ self.__connect_timeout_s = connect_timeout_ms / 1000.0 if
connect_timeout_ms else None
+ self.__socket_timeout_s = socket_timeout_s
# __custom_headers is used to store HTTP headers which are generated in
runtime for
# new request.
self.__custom_headers = None
@@ -147,14 +149,18 @@ class ImpalaHttpClient(TTransportBase):
def open(self):
if self.scheme == 'http':
self.__http = http_client.HTTPConnection(self.host, self.port,
- timeout=self.__timeout)
+
timeout=self.__connect_timeout_s)
elif self.scheme == 'https':
self.__http = http_client.HTTPSConnection(self.host, self.port,
- timeout=self.__timeout,
+
timeout=self.__connect_timeout_s,
context=self.context)
if self.using_proxy():
self.__http.set_tunnel(self.realhost, self.realport,
{"Proxy-Authorization": self.proxy_auth})
+ # Initiate connection with configured timeout.
+ self.__http.connect()
+ # Set socket_timeout_s for the remainder of the session.
+ self.__http.sock.settimeout(self.__socket_timeout_s)
def close(self):
self.__http.close()
@@ -164,12 +170,6 @@ class ImpalaHttpClient(TTransportBase):
def isOpen(self):
return self.__http is not None
- def setTimeout(self, ms):
- if ms is None:
- self.__timeout = None
- else:
- self.__timeout = ms / 1000.0
-
def getCustomHeadersWithBasicAuth(self, cookie_header, has_auth_cookie):
custom_headers = {}
if cookie_header:
diff --git a/shell/impala_shell/impala_client.py
b/shell/impala_shell/impala_client.py
index ea6de5045..61a3f227c 100644
--- a/shell/impala_shell/impala_client.py
+++ b/shell/impala_shell/impala_client.py
@@ -415,21 +415,14 @@ class ImpalaClient(object):
if not hasattr(ssl, "create_default_context"):
print("Python version too old. SSLContext not supported.",
file=sys.stderr)
raise NotImplementedError()
- # Current implementation of ImpalaHttpClient does a close() and open() of
the
- # underlying http connection on every flush() (THRIFT-4600). Due to this,
setting a
- # connect timeout does not achieve the desirable result as the subsequent
open() could
- # block similary in case of problematic remote end points.
- # TODO: Investigate connection reuse in ImpalaHttpClient and revisit this.
- if connect_timeout_ms > 0 and self.verbose:
- print("Warning: --connect_timeout_ms is currently ignored with HTTP
transport.",
- file=sys.stderr)
-
- # Notes on http socket timeout:
+
+ # Notes on http socket timeout
# https://docs.python.org/3/library/socket.html#socket-timeouts
- # Having a default timeout of 'None' (blocking mode) could result in hang
like
- # symptoms in case of a problematic remote endpoint. It's better to have a
finite
- # timeout so that in case of any connection errors, the client retries
have a better
- # chance of succeeding.
+ # We set connect_timeout_ms while establishing a connection to quickly
detect problem
+ # servers, similar to other transports. After the connection is
established, we set
+ # the http_socket_timeout_s (if provided) for the remainder of the
session. Setting
+ # this can be more problematic because Impala HS2-HTTP can have
long-running requests
+ # that may exceed a timeout, particularly during slow planning or large
result sets.
host_and_port = self._to_host_port(self.impalad_host, self.impalad_port)
assert self.http_path
# ImpalaHttpClient relies on the URI scheme (http vs https) to open an
appropriate
@@ -444,12 +437,14 @@ class ImpalaClient(object):
url = "https://{0}/{1}".format(host_and_port, self.http_path)
transport = ImpalaHttpClient(url, ssl_context=ssl_ctx,
http_cookie_names=self.http_cookie_names,
+ connect_timeout_ms=connect_timeout_ms,
socket_timeout_s=self.http_socket_timeout_s,
verbose=self.verbose,
reuse_connection=self.reuse_http_connection)
else:
url = "http://{0}/{1}".format(host_and_port, self.http_path)
transport = ImpalaHttpClient(url,
http_cookie_names=self.http_cookie_names,
+ connect_timeout_ms=connect_timeout_ms,
socket_timeout_s=self.http_socket_timeout_s,
verbose=self.verbose,
reuse_connection=self.reuse_http_connection)
diff --git a/tests/shell/test_shell_commandline.py
b/tests/shell/test_shell_commandline.py
index 095ec4ed5..f67239c46 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -1175,20 +1175,13 @@ class TestImpalaShell(ImpalaTestSuite):
indefinitely while connecting
"""
- # --connect_timeout_ms not supported with HTTP transport. Refer to the
comment
- # in ImpalaClient::_get_http_transport() for details.
- # --http_socket_timeout_s not supported for strict_hs2_protocol.
- if (vector.get_value('protocol') == 'hs2-http'
- and vector.get_value('strict_hs2_protocol')):
- pytest.skip("THRIFT-4600")
-
with closing(socket.socket()) as s:
s.bind(("", 0))
# maximum number of queued connections on this socket is 1.
s.listen(1)
test_port = s.getsockname()[1]
- args = ['-q', 'select foo; select bar;', '--ssl', '-t', '2000',
- '--http_socket_timeout_s', '2', '-i', 'localhost:%d' %
(test_port)]
+ args = ['-q', 'select 1', '--ssl', '-t', '1000',
+ '-i', 'localhost:%d' % (test_port)]
run_impala_shell_cmd(vector, args, expect_success=False)
def test_client_identifier(self, vector):
@@ -1409,34 +1402,27 @@ class TestImpalaShell(ImpalaTestSuite):
rows_from_file = [line.rstrip() for line in f]
assert rows_from_stdout == rows_from_file
+ @pytest.mark.execute_serially
def test_http_socket_timeout(self, vector):
"""Test setting different http_socket_timeout_s values."""
if (vector.get_value('strict_hs2_protocol')
or vector.get_value('protocol') != 'hs2-http'):
pytest.skip("http socket timeout not supported in strict hs2 mode."
" Only supported with hs2-http protocol.")
- # Test http_socket_timeout_s=0, expect errors
- args = ['--quiet', '-B', '--query', 'select 0;']
- result = run_impala_shell_cmd(vector, args + ['--http_socket_timeout_s=0'],
+ # Test very short http_socket_timeout_s, expect errors. After the
connection is
+ # established - which now uses connect_timeout_ms - RPCs in the
minicluster appear to
+ # respond immediately, so non-blocking mode (timeout=0) does not result in
an error.
+ # Instead, use a very small timeout that the RPC cannot meet.
+ args = ['--quiet', '-B', '--query', 'select 0']
+ result = run_impala_shell_cmd(vector, args +
['--http_socket_timeout_s=0.001'],
expect_success=False)
- # Outside the docker-based tests, Python 2 and Python 3 produce "Operating
- # now in progress" with slightly different error classes. When running with
- # docker-based tests, it results in a different error code and "Cannot
- # assign requested address" for both Python 2 and Python 3.
- # Tolerate all three of these variants.
- error_template = (
- "[Exception] type=<class '{0}'> in OpenSession. Num remaining tries: 3 "
- "[Errno {1}] {2}")
- expected_err_py2 = \
- error_template.format("socket.error", 115, "Operation now in progress")
- expected_err_py3 = \
- error_template.format("BlockingIOError", 115, "Operation now in
progress")
- expected_err_docker = \
- error_template.format("OSError", 99, "Cannot assign requested address")
+ # Python 2 throws socket.timeout and Python 3 throws TimeoutError.
+ error_template = "[Exception] type=<class '{0}'> in ExecuteStatement.
timed out"
+ pyver_exceptions = ["socket.timeout", "TimeoutError"]
actual_err = result.stderr.splitlines()[0]
- assert actual_err[TS_LEN:] in [expected_err_py2, expected_err_py3,
- expected_err_docker]
+ assert actual_err[TS_LEN:] in [error_template.format(pever_exception)
+ for pever_exception in pyver_exceptions]
# Test http_socket_timeout_s=-1, expect errors
result = run_impala_shell_cmd(vector, args +
['--http_socket_timeout_s=-1'],