This is an automated email from the ASF dual-hosted git repository.

laszlog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 5d1f1e0180e3ded0fd03b78f75106378c2a1e9ce
Author: ttttttz <[email protected]>
AuthorDate: Sat Aug 17 20:55:55 2024 +0800

    IMPALA-14183: Rename the environment variable USE_APACHE_HIVE to 
USE_APACHE_HIVE_3
    
    When the environment variable USE_APACHE_HIVE is set to true, build
    Impala for adapting to Apache Hive 3.x. In order to better distinguish it
    from Apache Hive 2.x later, rename USE_APACHE_HIVE to USE_APACHE_HIVE_3.
    Additionally, to facilitate referencing different versions of the Hive
    MetastoreShim, the major version of Hive has been added to the environment
    variable IMPALA_HIVE_DIST_TYPE.
    
    Change-Id: I11b5fe1604b6fc34469fb357c98784b7ad88574d
    Reviewed-on: http://gerrit.cloudera.org:8080/21724
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 README-build.md                            |  2 +-
 bin/bootstrap_toolchain.py                 |  2 +-
 bin/impala-config.sh                       | 38 +++++++++++++++---------------
 bin/jenkins/build-all-flag-combinations.sh |  2 +-
 buildall.sh                                |  2 +-
 common/function-registry/CMakeLists.txt    |  2 +-
 fe/pom.xml                                 |  4 ++--
 fe/src/test/resources/hive-site.xml.py     |  4 ++--
 testdata/bin/create-load-data.sh           |  2 +-
 testdata/bin/generate-schema-statements.py |  4 ++--
 testdata/bin/load_nested.py                |  2 +-
 testdata/bin/patch_hive.sh                 |  4 ++--
 tests/common/environ.py                    |  2 +-
 tests/util/test_file_parser.py             |  2 +-
 14 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/README-build.md b/README-build.md
index 63d7cc299..93d2dc181 100644
--- a/README-build.md
+++ b/README-build.md
@@ -39,7 +39,7 @@ can do so through the environment variables and scripts 
listed below.
 | USE_APACHE_COMPONENTS | false | Use Apache components for Hadoop, HBase, 
Hive, Tez, Ranger. It will set USE_APACHE_{HADOOP,HBASE,HIVE,TEZ,RANGER} 
variable as true if not set false. |
 | USE_APACHE_HADOOP | false | Use Apache Hadoop |
 | USE_APACHE_HBASE | false | Use Apache HBase |
-| USE_APACHE_HIVE | false | Use Apache Hive |
+| USE_APACHE_HIVE_3 | false | Use Apache Hive-3 |
 | USE_APACHE_TEZ | false | Use Apache Tez |
 | USE_APACHE_RANGER | false | Use Apache Ranger |
 | DOWNLOAD_CDH_COMPONENTS | true | Download CDH components |
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index 59984fe05..ecc0354f9 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -506,7 +506,7 @@ def get_hadoop_downloads():
   cluster_components = []
   use_apache_hadoop = os.environ["USE_APACHE_HADOOP"] == "true"
   use_apache_hbase = os.environ["USE_APACHE_HBASE"] == "true"
-  use_apache_hive = os.environ["USE_APACHE_HIVE"] == "true"
+  use_apache_hive = os.environ["USE_APACHE_HIVE_3"] == "true"
   use_apache_tez = os.environ["USE_APACHE_TEZ"] == "true"
   use_apache_ranger = os.environ["USE_APACHE_RANGER"] == "true"
   use_apache_ozone = os.environ["USE_APACHE_OZONE"] == "true"
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index a5b758d2f..848732b0d 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -267,8 +267,8 @@ export APACHE_ORC_JAVA_VERSION=1.8.3
 export APACHE_PARQUET_VERSION=1.12.3
 export APACHE_RANGER_VERSION=2.4.0
 export APACHE_TEZ_VERSION=0.10.2
-export APACHE_HIVE_VERSION=3.1.3
-export APACHE_HIVE_STORAGE_API_VERSION=2.7.0
+export APACHE_HIVE_3_VERSION=3.1.3
+export APACHE_HIVE_3_STORAGE_API_VERSION=2.7.0
 export APACHE_OZONE_VERSION=1.4.0
 
 # Java dependencies that are not also runtime components. Declaring versions 
here allows
@@ -377,8 +377,8 @@ export CDP_ICEBERG_URL=${CDP_ICEBERG_URL-}
 export CDP_RANGER_URL=${CDP_RANGER_URL-}
 export CDP_TEZ_URL=${CDP_TEZ_URL-}
 
-export APACHE_HIVE_URL=${APACHE_HIVE_URL-}
-export APACHE_HIVE_SOURCE_URL=${APACHE_HIVE_SOURCE_URL-}
+export APACHE_HIVE_3_URL=${APACHE_HIVE_3_URL-}
+export APACHE_HIVE_3_SOURCE_URL=${APACHE_HIVE_3_SOURCE_URL-}
 export APACHE_OZONE_URL=${APACHE_OZONE_URL-}
 
 export CDP_COMPONENTS_HOME="$IMPALA_TOOLCHAIN/cdp_components-$CDP_BUILD_NUMBER"
@@ -396,7 +396,7 @@ if ${USE_APACHE_COMPONENTS:=false}; then
   export IMPALA_TEZ_VERSION=${APACHE_TEZ_VERSION}
   export USE_APACHE_HADOOP=true
   export USE_APACHE_HBASE=true
-  export USE_APACHE_HIVE=true
+  export USE_APACHE_HIVE_3=true
   export USE_APACHE_TEZ=true
   export USE_APACHE_RANGER=true
   export USE_APACHE_OZONE=true
@@ -418,7 +418,7 @@ else
   export IMPALA_TEZ_URL=${CDP_TEZ_URL-}
   export USE_APACHE_HADOOP=${USE_APACHE_HADOOP:=false}
   export USE_APACHE_HBASE=${USE_APACHE_HBASE:=false}
-  export USE_APACHE_HIVE=${USE_APACHE_HIVE:=false}
+  export USE_APACHE_HIVE_3=${USE_APACHE_HIVE_3:=false}
   export USE_APACHE_TEZ=${USE_APACHE_TEZ:=false}
   export USE_APACHE_RANGER=${USE_APACHE_RANGER:=false}
   export USE_APACHE_OZONE=${USE_APACHE_OZONE:=false}
@@ -432,18 +432,18 @@ else
   export HADOOP_HOME="$CDP_COMPONENTS_HOME/hadoop-${IMPALA_HADOOP_VERSION}"
 fi
 
-if $USE_APACHE_HIVE; then
-  # When USE_APACHE_HIVE is set we use the apache hive version to build as 
well as deploy
-  # in the minicluster
-  export IMPALA_HIVE_DIST_TYPE="apache-hive"
-  export IMPALA_HIVE_VERSION=${APACHE_HIVE_VERSION}
-  export IMPALA_HIVE_URL=${APACHE_HIVE_URL-}
-  export IMPALA_HIVE_SOURCE_URL=${APACHE_HIVE_SOURCE_URL-}
-  export IMPALA_HIVE_STORAGE_API_VERSION=${APACHE_HIVE_STORAGE_API_VERSION}
+if $USE_APACHE_HIVE_3; then
+  # When USE_APACHE_HIVE_3 is set we use the apache hive version to build as 
well as
+  # deploy in the minicluster
+  export IMPALA_HIVE_DIST_TYPE="apache-hive-3"
+  export IMPALA_HIVE_VERSION=${APACHE_HIVE_3_VERSION}
+  export IMPALA_HIVE_URL=${APACHE_HIVE_3_URL-}
+  export IMPALA_HIVE_SOURCE_URL=${APACHE_HIVE_3_SOURCE_URL-}
+  export IMPALA_HIVE_STORAGE_API_VERSION=${APACHE_HIVE_3_STORAGE_API_VERSION}
 else
-  # CDP hive version is used to build and deploy in minicluster when 
USE_APACHE_HIVE is
+  # CDP hive version is used to build and deploy in minicluster when 
USE_APACHE_HIVE_* is
   # false
-  export IMPALA_HIVE_DIST_TYPE="hive"
+  export IMPALA_HIVE_DIST_TYPE="hive-3"
   export IMPALA_HIVE_VERSION=${HIVE_VERSION_OVERRIDE:-"$CDP_HIVE_VERSION"}
   export IMPALA_HIVE_URL=${CDP_HIVE_URL-}
   export IMPALA_HIVE_SOURCE_URL=${CDP_HIVE_SOURCE_URL-}
@@ -461,7 +461,7 @@ fi
 # infra/python/deps/requirements.txt.
 export IMPALA_THRIFT_CPP_VERSION=0.16.0-p7
 unset IMPALA_THRIFT_CPP_URL
-if $USE_APACHE_HIVE; then
+if $USE_APACHE_HIVE_3; then
   # Apache Hive 3 clients can't run on thrift versions >= 0.14 (IMPALA-11801)
   export IMPALA_THRIFT_POM_VERSION=0.11.0
   export IMPALA_THRIFT_JAVA_VERSION=${IMPALA_THRIFT_POM_VERSION}-p5
@@ -707,7 +707,7 @@ 
DEFAULT_NODES_DIR="$IMPALA_HOME/testdata/cluster/cdh$CDH_MAJOR_VERSION$UNIQUE_FS
 export 
IMPALA_CLUSTER_NODES_DIR="${IMPALA_CLUSTER_NODES_DIR-$DEFAULT_NODES_DIR}"
 
 ESCAPED_DB_UID=$(sed "s/[^0-9a-zA-Z]/_/g" <<< "$UNIQUE_FS_LABEL$IMPALA_HOME")
-if $USE_APACHE_HIVE; then
+if $USE_APACHE_HIVE_3; then
   export 
HIVE_HOME="$APACHE_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-bin"
   export 
HIVE_SRC_DIR="$APACHE_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-src"
   # if apache hive is being used change the metastore db name, so we don't 
have to
@@ -1080,7 +1080,7 @@ export HIVE_AUX_JARS_PATH="$POSTGRES_JDBC_DRIVER"
 # Add the jar of iceberg-hive-runtime to have HiveIcebergStorageHandler.
 # Only needed by Apache Hive3 since CDP Hive3 has the jar of 
hive-iceberg-handler in its
 # lib folder.
-if $USE_APACHE_HIVE; then
+if $USE_APACHE_HIVE_3; then
   export HIVE_AUX_JARS_PATH="$HIVE_AUX_JARS_PATH:\
 
$IMPALA_HOME/fe/target/dependency/iceberg-hive-runtime-${IMPALA_ICEBERG_VERSION}.jar"
 fi
diff --git a/bin/jenkins/build-all-flag-combinations.sh 
b/bin/jenkins/build-all-flag-combinations.sh
index 2f914b5ab..8f7361342 100755
--- a/bin/jenkins/build-all-flag-combinations.sh
+++ b/bin/jenkins/build-all-flag-combinations.sh
@@ -48,7 +48,7 @@ CONFIGS=(
   "-skiptests -noclean -asan"
   "-skiptests -noclean -tsan"
   "-skiptests -noclean -ubsan -so -ninja"
-  # USE_APACHE_HIVE=true build:
+  # USE_APACHE_HIVE_3=true build:
   "-skiptests -noclean -use_apache_components"
   "-notests -noclean -use_apache_components -package"
 )
diff --git a/buildall.sh b/buildall.sh
index 211b93de7..f95b291ec 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -457,7 +457,7 @@ bootstrap_dependencies() {
     cp 
$IMPALA_TOOLCHAIN_PACKAGES_HOME/hadoop-client-$IMPALA_HADOOP_CLIENT_VERSION/lib/*
 \
         $HADOOP_HOME/lib/native/
   fi
-  if [[ "${USE_APACHE_HIVE}" = true ]]; then
+  if [[ "${USE_APACHE_HIVE_3}" = true ]]; then
     "$IMPALA_HOME/testdata/bin/patch_hive.sh"
   fi
 }
diff --git a/common/function-registry/CMakeLists.txt 
b/common/function-registry/CMakeLists.txt
index 0c9274f1c..55faeb59d 100644
--- a/common/function-registry/CMakeLists.txt
+++ b/common/function-registry/CMakeLists.txt
@@ -61,7 +61,7 @@ add_custom_command(
 
 add_custom_target(function-registry ALL DEPENDS ${CODE_GEN_OUTPUT})
 
-if( $ENV{USE_APACHE_HIVE} STREQUAL "false")
+if($ENV{USE_APACHE_HIVE_3} STREQUAL "false")
   add_custom_target(geospatial-udf-wrappers ALL DEPENDS 
${GEOSPATIAL_CODE_GEN_OUTPUT})
   add_dependencies(geospatial-udf-wrappers function-registry)
 else()
diff --git a/fe/pom.xml b/fe/pom.xml
index d73a07ad5..18cd27e4c 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -730,7 +730,7 @@ under the License.
                         -->
                 <source>${project.basedir}/generated-sources/gen-java</source>
                 
<source>${project.build.directory}/generated-sources/cup</source>
-                
<source>${project.basedir}/src/compat-${hive.dist.type}-${hive.major.version}/java</source>
+                
<source>${project.basedir}/src/compat-${hive.dist.type}/java</source>
                </sources>
             </configuration>
           </execution>
@@ -1049,7 +1049,7 @@ under the License.
       <activation>
         <property>
           <name>env.IMPALA_HIVE_DIST_TYPE</name>
-          <value>apache-hive</value>
+          <value>apache-hive-3</value>
         </property>
       </activation>
       <build>
diff --git a/fe/src/test/resources/hive-site.xml.py 
b/fe/src/test/resources/hive-site.xml.py
index aa55d8682..d1ad1daff 100644
--- a/fe/src/test/resources/hive-site.xml.py
+++ b/fe/src/test/resources/hive-site.xml.py
@@ -21,7 +21,7 @@ from __future__ import absolute_import, division, 
print_function
 import os
 
 HIVE_MAJOR_VERSION = int(os.environ['IMPALA_HIVE_VERSION'][0])
-USE_APACHE_HIVE = os.environ['USE_APACHE_HIVE']
+USE_APACHE_HIVE_3 = os.environ['USE_APACHE_HIVE_3']
 KERBERIZE = os.environ.get('IMPALA_KERBERIZE') == 'true'
 VARIANT = os.environ.get('HIVE_VARIANT')
 IMPALA_JAVA_TOOL_OPTIONS=os.environ.get("IMPALA_JAVA_TOOL_OPTIONS")
@@ -250,7 +250,7 @@ CONFIG.update({
 })
 # Before HIVE-19486 (in Apache Hive 4 and CDP Hive versions), Hikari CP 
configs are prefixed with "hikari.*".
 # After HIVE-19486, the prefix is "hikaricp.*".
-if USE_APACHE_HIVE and HIVE_MAJOR_VERSION == 3:
+if USE_APACHE_HIVE_3 and HIVE_MAJOR_VERSION == 3:
   CONFIG.update({
     'hikari.connectionTimeout': 60000,
   })
diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index 83088248d..debe1c8b8 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -386,7 +386,7 @@ function copy-and-load-dependent-tables {
   # For tables that rely on loading data from local fs test-wareload-house
   # TODO: Find a good way to integrate this with the normal data loading 
scripts
   SQL_FILE=${IMPALA_HOME}/testdata/bin/load-dependent-tables.sql
-  if $USE_APACHE_HIVE; then
+  if $USE_APACHE_HIVE_3; then
     # Apache Hive 3.1 doesn't support "STORED AS JSONFILE" (HIVE-19899)
     NEW_SQL_FILE=${IMPALA_HOME}/testdata/bin/load-dependent-tables-hive3.sql
     sed "s/STORED AS JSONFILE/ROW FORMAT SERDE 
'org.apache.hadoop.hive.serde2.JsonSerDe'"\
diff --git a/testdata/bin/generate-schema-statements.py 
b/testdata/bin/generate-schema-statements.py
index 84e550d01..9203cdab3 100755
--- a/testdata/bin/generate-schema-statements.py
+++ b/testdata/bin/generate-schema-statements.py
@@ -303,7 +303,7 @@ def build_create_statement(table_template, table_name, 
db_name, db_suffix,
   stmt = table_template.format(**params)
   # Apache Hive 3.1 doesn't support "STORED BY ICEBERG STORED AS AVRO" and
   # "STORED AS JSONFILE" (HIVE-25162, HIVE-19899)
-  if is_hive_stmt and os.environ['USE_APACHE_HIVE'] == "true":
+  if is_hive_stmt and os.environ['USE_APACHE_HIVE_3'] == "true":
     if "STORED AS JSONFILE" in stmt:
       stmt = stmt.replace("STORED AS JSONFILE",
                           "ROW FORMAT SERDE 
'org.apache.hadoop.hive.serde2.JsonSerDe'")
@@ -630,7 +630,7 @@ def build_hbase_insert(db_name, db_suffix, table_name):
   hbase_insert = SET_HIVE_HBASE_BULK_LOAD + ';\n'
   # For Apache Hive, "hive.hbase.bulk does not exist" exception will be thrown 
and there
   # is only warning in cdp
-  if os.environ['USE_APACHE_HIVE'] == "true":
+  if os.environ['USE_APACHE_HIVE_3'] == "true":
     hbase_insert = ""
   params = build_replacement_params(db_name, db_suffix, table_name)
   hbase_insert += ("INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name}"
diff --git a/testdata/bin/load_nested.py b/testdata/bin/load_nested.py
index 31ba7295b..ac4d9e5f6 100755
--- a/testdata/bin/load_nested.py
+++ b/testdata/bin/load_nested.py
@@ -93,7 +93,7 @@ def load():
     # directory. Use external.table.purge=true so that it is equivalent to a 
Hive 2
     # managed table.
     # For Apache Hive, HIVE-20085 (Hive 4) Allow CTAS.
-    if HIVE_MAJOR_VERSION >= 3 and os.environ["USE_APACHE_HIVE"] != "true":
+    if HIVE_MAJOR_VERSION >= 3 and os.environ["USE_APACHE_HIVE_3"] != "true":
       external = "EXTERNAL"
       tblproperties += ",'external.table.purge'='TRUE'"
     sql_params = {
diff --git a/testdata/bin/patch_hive.sh b/testdata/bin/patch_hive.sh
index c974b9eee..1953b8926 100755
--- a/testdata/bin/patch_hive.sh
+++ b/testdata/bin/patch_hive.sh
@@ -28,7 +28,7 @@ set -euo pipefail
 . $IMPALA_HOME/bin/report_build_error.sh
 setup_report_build_error
 
-if [[ "${USE_APACHE_HIVE}" != true ]]; then
+if [[ "${USE_APACHE_HIVE_3}" != true ]]; then
   exit 0
 fi
 
@@ -77,6 +77,6 @@ if [[ "${HIVE_REBUILD}" = "true" ]]; then
   echo "Repackage the hive-exec module"
   ${IMPALA_HOME}/bin/mvn-quiet.sh -pl ql,standalone-metastore clean package \
       -Dmaven.test.skip
-  cp $HIVE_SRC_DIR/ql/target/hive-exec-${APACHE_HIVE_VERSION}.jar 
$HIVE_HOME/lib/
+  cp $HIVE_SRC_DIR/ql/target/hive-exec-${APACHE_HIVE_3_VERSION}.jar 
$HIVE_HOME/lib/
 fi
 popd
diff --git a/tests/common/environ.py b/tests/common/environ.py
index a754001e2..c96571b7c 100644
--- a/tests/common/environ.py
+++ b/tests/common/environ.py
@@ -95,7 +95,7 @@ if impala_iceberg_version:
       # If we can't parse the version, default to format version 1
       pass
 
-IS_APACHE_HIVE = os.environ.get("USE_APACHE_HIVE", False) == 'true'
+IS_APACHE_HIVE = os.environ.get("USE_APACHE_HIVE_3", False) == 'true'
 
 # Resolve any symlinks in the path.
 impalad_basedir = \
diff --git a/tests/util/test_file_parser.py b/tests/util/test_file_parser.py
index e0d31ca21..b6e3601f8 100644
--- a/tests/util/test_file_parser.py
+++ b/tests/util/test_file_parser.py
@@ -168,7 +168,7 @@ def parse_test_file(test_file_name, valid_section_names, 
skip_unknown_sections=T
       file_data = file_data.decode(encoding)
     else:
       file_data = file_data.decode('utf-8')
-    if os.environ["USE_APACHE_HIVE"] == "true":
+    if os.environ["USE_APACHE_HIVE_3"] == "true":
       # Remove Hive 4.0 feature for tpcds_schema_template.sql
       if "tpcds_schema_template" in test_file_name:
         # HIVE-20703

Reply via email to