This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 9fc941b61 IMPALA-14327: Update load-data.py and run-workload.py to use 
HS2
9fc941b61 is described below

commit 9fc941b6113d2f4282c00ae208f1e26d630aba90
Author: Riza Suminto <[email protected]>
AuthorDate: Tue Aug 12 14:36:31 2025 -0700

    IMPALA-14327: Update load-data.py and run-workload.py to use HS2
    
    load-data.py is used for dataloading while run-workload.py is used for
    running perf-AB-test. This patch change the script from using beeswax
    protocol to HS2 protocol.
    
    Testing:
    Run data loading and perf-AB-test-ub2004 based on this patch.
    
    Change-Id: I1c3727871b8b2e75c3f10ceabfbe9cb96e36ead3
    Reviewed-on: http://gerrit.cloudera.org:8080/23309
    Reviewed-by: Riza Suminto <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 bin/load-data.py            | 10 +++++++---
 bin/run-workload.py         |  2 +-
 bin/single_node_perf_run.py |  2 +-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/bin/load-data.py b/bin/load-data.py
index d729c024f..e86a3d698 100755
--- a/bin/load-data.py
+++ b/bin/load-data.py
@@ -33,7 +33,8 @@ import time
 import traceback
 
 from optparse import OptionParser
-from tests.beeswax.impala_beeswax import ImpalaBeeswaxClient
+from tests.common.impala_connection import ImpylaHS2Connection
+from tests.common.test_vector import HS2
 from multiprocessing.pool import ThreadPool
 
 LOG = logging.getLogger('load-data.py')
@@ -112,6 +113,8 @@ HIVE_ARGS = '-n %s -u "jdbc:hive2://%s/default;%s" 
--verbose=true'\
 
 HADOOP_CMD = os.path.join(os.environ['HADOOP_HOME'], 'bin/hadoop')
 
+HS2_HOST_PORT = "{}:{}".format(options.impalad, 21050)
+
 def available_workloads(workload_dir):
   return [subdir for subdir in os.listdir(workload_dir)
             if os.path.isdir(os.path.join(workload_dir, subdir))]
@@ -181,7 +184,8 @@ def exec_impala_query_from_file(file_name):
   LOG.info("Beginning execution of impala SQL on {0}: {1}".format(
            options.impalad, file_name))
   is_success = True
-  impala_client = ImpalaBeeswaxClient(options.impalad, 
use_kerberos=options.use_kerberos)
+  impala_client = ImpylaHS2Connection(HS2_HOST_PORT,
+                                      use_kerberos=options.use_kerberos)
   output_file = file_name + ".log"
   query = None
   with open(output_file, 'w') as out_file:
@@ -234,7 +238,7 @@ def generate_schema_statements(workload):
     generate_cmd += " --hive_warehouse_dir=%s" % options.hive_warehouse_dir
   if options.hdfs_namenode is not None:
     generate_cmd += " --hdfs_namenode=%s" % options.hdfs_namenode
-  generate_cmd += " --backend=%s" % options.impalad
+  generate_cmd += " --backend=%s" % HS2_HOST_PORT
   LOG.info('Executing Generate Schema Command: ' + generate_cmd)
   schema_cmd = os.path.join(TESTDATA_BIN_DIR, generate_cmd)
   error_msg = 'Error generating schema statements for workload: ' + workload
diff --git a/bin/run-workload.py b/bin/run-workload.py
index 98af05ad7..b40070ef8 100755
--- a/bin/run-workload.py
+++ b/bin/run-workload.py
@@ -95,7 +95,7 @@ parser.add_option("--use_kerberos", dest="use_kerberos", 
action="store_true",
 parser.add_option("--continue_on_query_error", dest="continue_on_query_error",
                   action="store_true", default=False,
                   help="If set, continue execution on each query error.")
-parser.add_option("-c", "--client_type", dest="client_type", default='beeswax',
+parser.add_option("-c", "--client_type", dest="client_type", default='hs2',
                   choices=['beeswax', 'jdbc', 'hs2'],
                   help="Client type. Valid options are 'beeswax' or 'jdbc' or 
'hs2'")
 parser.add_option("--plugin_names", dest="plugin_names", default=None,
diff --git a/bin/single_node_perf_run.py b/bin/single_node_perf_run.py
index 4f77977df..88f6fb34d 100755
--- a/bin/single_node_perf_run.py
+++ b/bin/single_node_perf_run.py
@@ -147,7 +147,7 @@ def run_workload(base_dir, workloads, options):
 
   run_workload = ["{0}/bin/run-workload.py".format(IMPALA_HOME)]
 
-  impalads = ",".join(["localhost:{0}".format(21000 + i)
+  impalads = ",".join(["localhost:{0}".format(21050 + i)
                        for i in range(0, int(options.num_impalads))])
 
   run_workload += ["--workloads={0}".format(workloads),

Reply via email to