This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 24a8a29120606cdc7205b09a5bd53d52d357539a
Author: Michael Smith <[email protected]>
AuthorDate: Wed May 3 14:51:03 2023 -0700

    IMPALA-12115: Put each filesystem in a different directory
    
    Uses different node directories and databases for each filesystem so we
    don't need to recreate them from scratch when switching.
    
    Preserves current defaults so developers with a default HDFS minicluster
    don't need to recreate it after this patch. Any other cluster (Ozone,
    erasure-coded, S3) will need to be recreated.
    
    Starting with a new filesystem requires running
    0. ./testdata/bin/kill-all.sh if an old cluster is running
    1. create-test-configuration.sh. Add -create_metastore
       -create_ranger_policy_db for first run.
    2. ./testdata/bin/run-all.sh
    3. start-impala-cluster.py
    
    or "buildall.sh -noclean -start_minicluster -start_impala_cluster". Add
    "-format" for the first run to create HMS and Ranger DBs.
    
    IMPALA_CLUSTER_LOGS_DIR is shared for all clusters. Symlinks to the
    minicluster are recreated by create-test-configuration.sh.
    
    Change-Id: I8c89156fd1cefbb752fee3070e10bb08fbf80e07
    Reviewed-on: http://gerrit.cloudera.org:8080/19841
    Reviewed-by: Michael Smith <[email protected]>
    Tested-by: Michael Smith <[email protected]>
---
 bin/impala-config.sh        | 20 +++++++++++++++-----
 testdata/cluster/.gitignore |  1 +
 testdata/cluster/admin      |  9 +++++----
 3 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 06b1caa3f..88ac95d8d 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -596,15 +596,25 @@ export 
EXTERNAL_LISTEN_HOST="${EXTERNAL_LISTEN_HOST-0.0.0.0}"
 export DEFAULT_FS="${DEFAULT_FS-hdfs://${INTERNAL_LISTEN_HOST}:20500}"
 export WAREHOUSE_LOCATION_PREFIX="${WAREHOUSE_LOCATION_PREFIX-}"
 export LOCAL_FS="file:${WAREHOUSE_LOCATION_PREFIX}"
-export 
IMPALA_CLUSTER_NODES_DIR="${IMPALA_CLUSTER_NODES_DIR-$IMPALA_HOME/testdata/cluster/cdh$CDH_MAJOR_VERSION}"
+# Use different node directories for each filesystem so we don't need to 
recreate them
+# from scratch when switching.
+UNIQUE_FS_LABEL=
+if [[ "${TARGET_FILESYSTEM}" != "hdfs" ]]; then
+  UNIQUE_FS_LABEL="${UNIQUE_FS_LABEL}-${TARGET_FILESYSTEM}"
+fi
+if [[ "${ERASURE_CODING}" = true ]]; then
+  UNIQUE_FS_LABEL="${UNIQUE_FS_LABEL}-ec"
+fi
+DEFAULT_NODES_DIR="$IMPALA_HOME/testdata/cluster/cdh$CDH_MAJOR_VERSION$UNIQUE_FS_LABEL"
+export 
IMPALA_CLUSTER_NODES_DIR="${IMPALA_CLUSTER_NODES_DIR-$DEFAULT_NODES_DIR}"
 
-ESCAPED_IMPALA_HOME=$(sed "s/[^0-9a-zA-Z]/_/g" <<< "$IMPALA_HOME")
+ESCAPED_DB_UID=$(sed "s/[^0-9a-zA-Z]/_/g" <<< "$UNIQUE_FS_LABEL$IMPALA_HOME")
 if $USE_APACHE_HIVE; then
   export 
HIVE_HOME="$APACHE_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-bin"
   export 
HIVE_SRC_DIR="$APACHE_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-src"
   # if apache hive is being used change the metastore db name, so we don't 
have to
   # format the metastore db everytime we switch between hive versions
-  export METASTORE_DB=${METASTORE_DB-"$(cut -c-59 <<< 
HMS$ESCAPED_IMPALA_HOME)_apache"}
+  export METASTORE_DB=${METASTORE_DB-"$(cut -c-59 <<< 
HMS$ESCAPED_DB_UID)_apache"}
 else
   export HIVE_HOME=${HIVE_HOME_OVERRIDE:-\
 "$CDP_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-bin"}
@@ -612,7 +622,7 @@ else
 "${CDP_COMPONENTS_HOME}/hive-${IMPALA_HIVE_VERSION}"}
   # Previously, there were multiple configurations and the "_cdp" included 
below
   # allowed the two to be distinct. We keep this "_cdp" for historical reasons.
-  export METASTORE_DB=${METASTORE_DB-"$(cut -c-59 <<< 
HMS$ESCAPED_IMPALA_HOME)_cdp"}
+  export METASTORE_DB=${METASTORE_DB-"$(cut -c-59 <<< HMS$ESCAPED_DB_UID)_cdp"}
 fi
 # Set the path to the hive_metastore.thrift which is used to build thrift code
 export HIVE_METASTORE_THRIFT_DIR=${HIVE_METASTORE_THRIFT_DIR_OVERRIDE:-\
@@ -627,7 +637,7 @@ fi
 # Set the Hive binaries in the path
 export PATH="$HIVE_HOME/bin:$HBASE_HOME/bin:$OZONE_HOME/bin:$PATH"
 
-RANGER_POLICY_DB=${RANGER_POLICY_DB-$(cut -c-63 <<< 
ranger$ESCAPED_IMPALA_HOME)}
+RANGER_POLICY_DB=${RANGER_POLICY_DB-$(cut -c-63 <<< ranger$ESCAPED_DB_UID)}
 # The DB script in Ranger expects the database name to be in lower case.
 export RANGER_POLICY_DB=$(echo ${RANGER_POLICY_DB} | tr '[:upper:]' 
'[:lower:]')
 
diff --git a/testdata/cluster/.gitignore b/testdata/cluster/.gitignore
index 0c35f61e8..af83e70da 100644
--- a/testdata/cluster/.gitignore
+++ b/testdata/cluster/.gitignore
@@ -2,6 +2,7 @@
 /cdh5
 /cdh6
 /cdh7
+/cdh7-*
 /ranger/setup/impala_user.json
 /ranger/setup/impala_group.json
 /ranger/setup/impala_group_owner.json
diff --git a/testdata/cluster/admin b/testdata/cluster/admin
index af44ed565..8766a89b1 100755
--- a/testdata/cluster/admin
+++ b/testdata/cluster/admin
@@ -231,11 +231,12 @@ function create_cluster {
 
     # Add some easy access links closer to IMPALA_HOME
     EASY_ACCESS_LOG_LINK="$EASY_ACCESS_LOG_DIR/cdh$CDH_MAJOR_VERSION-$NODE"
-    if [[ ! -e "$EASY_ACCESS_LOG_LINK" ]]; then
-      mkdir -p "$EASY_ACCESS_LOG_DIR"
-      ln -s "$NODE_DIR/var/log" "$EASY_ACCESS_LOG_DIR"
-      mv "$IMPALA_CLUSTER_LOGS_DIR/log" "$EASY_ACCESS_LOG_LINK"
+    if [[ -e "$EASY_ACCESS_LOG_LINK" ]]; then
+      rm "${EASY_ACCESS_LOG_LINK}"
     fi
+    mkdir -p "$EASY_ACCESS_LOG_DIR"
+    ln -s "$NODE_DIR/var/log" "$EASY_ACCESS_LOG_DIR"
+    mv "$EASY_ACCESS_LOG_DIR/log" "$EASY_ACCESS_LOG_LINK"
 
     # Template population
     DATANODE_PORT=$((DATANODE_FREE_PORT_START++))

Reply via email to