This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 9fc941b61 IMPALA-14327: Update load-data.py and run-workload.py to use
HS2
9fc941b61 is described below
commit 9fc941b6113d2f4282c00ae208f1e26d630aba90
Author: Riza Suminto <[email protected]>
AuthorDate: Tue Aug 12 14:36:31 2025 -0700
IMPALA-14327: Update load-data.py and run-workload.py to use HS2
load-data.py is used for dataloading while run-workload.py is used for
running perf-AB-test. This patch change the script from using beeswax
protocol to HS2 protocol.
Testing:
Run data loading and perf-AB-test-ub2004 based on this patch.
Change-Id: I1c3727871b8b2e75c3f10ceabfbe9cb96e36ead3
Reviewed-on: http://gerrit.cloudera.org:8080/23309
Reviewed-by: Riza Suminto <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
bin/load-data.py | 10 +++++++---
bin/run-workload.py | 2 +-
bin/single_node_perf_run.py | 2 +-
3 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/bin/load-data.py b/bin/load-data.py
index d729c024f..e86a3d698 100755
--- a/bin/load-data.py
+++ b/bin/load-data.py
@@ -33,7 +33,8 @@ import time
import traceback
from optparse import OptionParser
-from tests.beeswax.impala_beeswax import ImpalaBeeswaxClient
+from tests.common.impala_connection import ImpylaHS2Connection
+from tests.common.test_vector import HS2
from multiprocessing.pool import ThreadPool
LOG = logging.getLogger('load-data.py')
@@ -112,6 +113,8 @@ HIVE_ARGS = '-n %s -u "jdbc:hive2://%s/default;%s"
--verbose=true'\
HADOOP_CMD = os.path.join(os.environ['HADOOP_HOME'], 'bin/hadoop')
+HS2_HOST_PORT = "{}:{}".format(options.impalad, 21050)
+
def available_workloads(workload_dir):
return [subdir for subdir in os.listdir(workload_dir)
if os.path.isdir(os.path.join(workload_dir, subdir))]
@@ -181,7 +184,8 @@ def exec_impala_query_from_file(file_name):
LOG.info("Beginning execution of impala SQL on {0}: {1}".format(
options.impalad, file_name))
is_success = True
- impala_client = ImpalaBeeswaxClient(options.impalad,
use_kerberos=options.use_kerberos)
+ impala_client = ImpylaHS2Connection(HS2_HOST_PORT,
+ use_kerberos=options.use_kerberos)
output_file = file_name + ".log"
query = None
with open(output_file, 'w') as out_file:
@@ -234,7 +238,7 @@ def generate_schema_statements(workload):
generate_cmd += " --hive_warehouse_dir=%s" % options.hive_warehouse_dir
if options.hdfs_namenode is not None:
generate_cmd += " --hdfs_namenode=%s" % options.hdfs_namenode
- generate_cmd += " --backend=%s" % options.impalad
+ generate_cmd += " --backend=%s" % HS2_HOST_PORT
LOG.info('Executing Generate Schema Command: ' + generate_cmd)
schema_cmd = os.path.join(TESTDATA_BIN_DIR, generate_cmd)
error_msg = 'Error generating schema statements for workload: ' + workload
diff --git a/bin/run-workload.py b/bin/run-workload.py
index 98af05ad7..b40070ef8 100755
--- a/bin/run-workload.py
+++ b/bin/run-workload.py
@@ -95,7 +95,7 @@ parser.add_option("--use_kerberos", dest="use_kerberos",
action="store_true",
parser.add_option("--continue_on_query_error", dest="continue_on_query_error",
action="store_true", default=False,
help="If set, continue execution on each query error.")
-parser.add_option("-c", "--client_type", dest="client_type", default='beeswax',
+parser.add_option("-c", "--client_type", dest="client_type", default='hs2',
choices=['beeswax', 'jdbc', 'hs2'],
help="Client type. Valid options are 'beeswax' or 'jdbc' or
'hs2'")
parser.add_option("--plugin_names", dest="plugin_names", default=None,
diff --git a/bin/single_node_perf_run.py b/bin/single_node_perf_run.py
index 4f77977df..88f6fb34d 100755
--- a/bin/single_node_perf_run.py
+++ b/bin/single_node_perf_run.py
@@ -147,7 +147,7 @@ def run_workload(base_dir, workloads, options):
run_workload = ["{0}/bin/run-workload.py".format(IMPALA_HOME)]
- impalads = ",".join(["localhost:{0}".format(21000 + i)
+ impalads = ",".join(["localhost:{0}".format(21050 + i)
for i in range(0, int(options.num_impalads))])
run_workload += ["--workloads={0}".format(workloads),