This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new d5da6ee2a IMPALA-14634: Organize custom cluster logs into
subdirectories by test name
d5da6ee2a is described below
commit d5da6ee2a5b570fdb99d9da5d74885c48ec5d539
Author: Joe McDonnell <[email protected]>
AuthorDate: Tue Dec 16 10:29:51 2025 -0800
IMPALA-14634: Organize custom cluster logs into subdirectories by test name
This changes the logic for setting the log directory for
custom cluster tests. It uses the test class name and (optionally)
the test method name to put log files into subdirectories.
For a test that restarts the cluster for each test method, this
would have a directory structure of
base_dir / test class / test method name /
For a test that starts the cluster at the class level and reuses
it across tests methods, this would be:
base_dir / test class /
This directory structure can still have logs from multiple tests
going to a single directory. For example, some tests are parameterized
so that a test method runs multiple times with different parameter
values. Those will share a directory. This should be rare and most
directories will have logs from a single test.
Some test methods restart the cluster within the test itself rather
than just at the beginning. This stores the test method so these
restarts continue to use the same directory. The code is often in
@classmethod functions, so this stores the test method name at the
class level and manipulates it through @classmethod functions.
This is awkward, but it works because the custom cluster tests are
single threaded.
This also adjusts the logic for determining the base directory
for custom cluster logs. tests/run-custom-cluster-tests.sh sets
LOG_DIR to $IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR (i.e.
logs/custom_cluster_tests) and this continues to respect LOG_DIR.
However, in the dev environment using impala-py.test, LOG_DIR
is not set and the code currently defaults to /tmp. This changes it
to use $IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR so that logs go to
logs/custom_cluster_tests rather than /tmp.
Testing:
- Ran locally and verified the output goes into directories
- Ran exhaustive custom cluster tests
Change-Id: I8a8402fed1584a99f91451a3976e7026d0deb834
Reviewed-on: http://gerrit.cloudera.org:8080/23796
Reviewed-by: Michael Smith <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
Reviewed-by: Quanlong Huang <[email protected]>
---
tests/common/custom_cluster_test_suite.py | 54 +++++++++++++++++++++++++++++--
tests/custom_cluster/test_breakpad.py | 2 +-
2 files changed, 52 insertions(+), 4 deletions(-)
diff --git a/tests/common/custom_cluster_test_suite.py
b/tests/common/custom_cluster_test_suite.py
index 98b3b3e2c..a5a1198e6 100644
--- a/tests/common/custom_cluster_test_suite.py
+++ b/tests/common/custom_cluster_test_suite.py
@@ -116,6 +116,15 @@ class CustomClusterTestSuite(ImpalaTestSuite):
# Args for cluster startup/teardown when sharing a single cluster for the
entire class.
SHARED_CLUSTER_ARGS = {}
+ # The currently executing test method. setup_method() populates this and
tear_method()
+ # clears it. This is used to set the log directory location when a test
manually
+ # restarts the cluster during the test. This is left unset for tests that
use a single
+ # cluster for multiple tests (i.e. with SHARED_CLUSTER_ARGS), as the logs
will be
+ # shared across multiple tests. Since this is used from @classmethod
functions, this is
+ # set and accessed via @classmethod functions set/get_current_test_method().
This is
+ # awkward, but it should work because custom cluster tests are single
threaded.
+ CURRENT_TEST_METHOD_NAME = None
+
@classmethod
def add_test_dimensions(cls):
super(CustomClusterTestSuite, cls).add_test_dimensions()
@@ -268,6 +277,14 @@ class CustomClusterTestSuite(ImpalaTestSuite):
cleanup_tmp_test_dir(self.TMP_DIRS[name])
del self.TMP_DIRS[name]
+ @classmethod
+ def set_current_test_method_name(cls, test_method_name):
+ cls.CURRENT_TEST_METHOD_NAME = test_method_name
+
+ @classmethod
+ def get_current_test_method_name(self):
+ return self.CURRENT_TEST_METHOD_NAME
+
@classmethod
def cluster_setup(cls, args):
cluster_args = list()
@@ -393,6 +410,10 @@ class CustomClusterTestSuite(ImpalaTestSuite):
def setup_method(self, method):
if not self.SHARED_CLUSTER_ARGS:
+ # Store the test method name so that we can put logs in different
directories for
+ # different tests. This only applies if the cluster is being restarted
per test
+ # method. If this cluster is used for multiple test methods, leave this
unset.
+ self.set_current_test_method_name(method.__name__)
self.cluster_setup(method.__dict__)
elif method.__dict__.get(WITH_ARGS_METHOD):
pytest.fail("Cannot specify with_args on both class and methods")
@@ -426,6 +447,7 @@ class CustomClusterTestSuite(ImpalaTestSuite):
def teardown_method(self, method):
if not self.SHARED_CLUSTER_ARGS:
self.cluster_teardown(method.__name__, method.__dict__)
+ self.set_current_test_method_name(None)
def wait_for_wm_init_complete(self, timeout_s=180):
"""
@@ -554,7 +576,7 @@ class CustomClusterTestSuite(ImpalaTestSuite):
@classmethod
def _start_impala_cluster(cls,
options,
- impala_log_dir=os.getenv('LOG_DIR', "/tmp/"),
+ impala_log_dir=None,
cluster_size=DEFAULT_CLUSTER_SIZE,
num_coordinators=NUM_COORDINATORS,
use_exclusive_coordinators=False,
@@ -570,7 +592,33 @@ class CustomClusterTestSuite(ImpalaTestSuite):
wait_for_backends=True,
log_symlinks=False,
force_restart=True):
- cls.impala_log_dir = impala_log_dir
+ if impala_log_dir:
+ # If the test gave a specific location, use it, as the test may be
parsing the logs
+ # to find certain output.
+ cls.impala_log_dir = impala_log_dir
+ else:
+ # The test didn't customize the log dir, so calculate a reasonable base
directory
+ # To find the log directory, we proceed in this order:
+ # 1. LOG_DIR environment variable (used in test scripts for Jenkins
jobs, etc)
+ # 2. IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR - set impala-config.sh (used in
devenvs)
+ # 3. /tmp/ - This probably shouldn't happen, but at least the logs can
go somewhere
+ impala_base_log_dir = os.getenv("LOG_DIR",
+ os.getenv("IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR", "/tmp/"))
+
+ # To make it easier to find logs across multiple custom cluster tests,
organize
+ # them into subdirectories based on their test class and their test
method name
+ # (where applicable).
+ impala_log_dir_per_test = os.path.join(impala_base_log_dir, cls.__name__)
+ # The CURRENT_TEST_METHOD_NAME will be None when using
SHARED_CLUSTER_ARGS as the
+ # cluster is not restarted for each test method
+ if cls.CURRENT_TEST_METHOD_NAME:
+ impala_log_dir_per_test = os.path.join(impala_log_dir_per_test,
+ cls.CURRENT_TEST_METHOD_NAME)
+
+ if not os.path.isdir(impala_log_dir_per_test):
+ os.makedirs(impala_log_dir_per_test)
+ cls.impala_log_dir = impala_log_dir_per_test
+
# We ignore TEST_START_CLUSTER_ARGS here. Custom cluster tests
specifically test that
# certain custom startup arguments work and we want to keep them
independent of dev
# environments.
@@ -578,7 +626,7 @@ class CustomClusterTestSuite(ImpalaTestSuite):
'--state_store_args=%s' % DEFAULT_STATESTORE_ARGS,
'--cluster_size=%d' % cluster_size,
'--num_coordinators=%d' % num_coordinators,
- '--log_dir=%s' % impala_log_dir,
+ '--log_dir=%s' % cls.impala_log_dir,
'--log_level=%s' % log_level]
if ignore_pid_on_log_rotation:
diff --git a/tests/custom_cluster/test_breakpad.py
b/tests/custom_cluster/test_breakpad.py
index db3f2cc89..666f384bb 100644
--- a/tests/custom_cluster/test_breakpad.py
+++ b/tests/custom_cluster/test_breakpad.py
@@ -264,7 +264,7 @@ class TestBreakpadExhaustive(TestBreakpadBase):
"""Check that setting 'minidump_path' to a relative value results in
minidump files
written to 'log_dir'.
"""
- minidump_base_dir = os.path.join(os.environ.get('LOG_DIR', '/tmp'),
'minidumps')
+ minidump_base_dir = os.path.join(self.impala_log_dir, 'minidumps')
shutil.rmtree(minidump_base_dir, ignore_errors=True)
# Omitting minidump_path as a parameter to the cluster will choose the
default
# configuration, which is a FLAGS_log_dir/minidumps.