This is an automated email from the ASF dual-hosted git repository. laszlog pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 5d1f1e0180e3ded0fd03b78f75106378c2a1e9ce Author: ttttttz <[email protected]> AuthorDate: Sat Aug 17 20:55:55 2024 +0800 IMPALA-14183: Rename the environment variable USE_APACHE_HIVE to USE_APACHE_HIVE_3 When the environment variable USE_APACHE_HIVE is set to true, build Impala for adapting to Apache Hive 3.x. In order to better distinguish it from Apache Hive 2.x later, rename USE_APACHE_HIVE to USE_APACHE_HIVE_3. Additionally, to facilitate referencing different versions of the Hive MetastoreShim, the major version of Hive has been added to the environment variable IMPALA_HIVE_DIST_TYPE. Change-Id: I11b5fe1604b6fc34469fb357c98784b7ad88574d Reviewed-on: http://gerrit.cloudera.org:8080/21724 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- README-build.md | 2 +- bin/bootstrap_toolchain.py | 2 +- bin/impala-config.sh | 38 +++++++++++++++--------------- bin/jenkins/build-all-flag-combinations.sh | 2 +- buildall.sh | 2 +- common/function-registry/CMakeLists.txt | 2 +- fe/pom.xml | 4 ++-- fe/src/test/resources/hive-site.xml.py | 4 ++-- testdata/bin/create-load-data.sh | 2 +- testdata/bin/generate-schema-statements.py | 4 ++-- testdata/bin/load_nested.py | 2 +- testdata/bin/patch_hive.sh | 4 ++-- tests/common/environ.py | 2 +- tests/util/test_file_parser.py | 2 +- 14 files changed, 36 insertions(+), 36 deletions(-) diff --git a/README-build.md b/README-build.md index 63d7cc299..93d2dc181 100644 --- a/README-build.md +++ b/README-build.md @@ -39,7 +39,7 @@ can do so through the environment variables and scripts listed below. | USE_APACHE_COMPONENTS | false | Use Apache components for Hadoop, HBase, Hive, Tez, Ranger. It will set USE_APACHE_{HADOOP,HBASE,HIVE,TEZ,RANGER} variable as true if not set false. | | USE_APACHE_HADOOP | false | Use Apache Hadoop | | USE_APACHE_HBASE | false | Use Apache HBase | -| USE_APACHE_HIVE | false | Use Apache Hive | +| USE_APACHE_HIVE_3 | false | Use Apache Hive-3 | | USE_APACHE_TEZ | false | Use Apache Tez | | USE_APACHE_RANGER | false | Use Apache Ranger | | DOWNLOAD_CDH_COMPONENTS | true | Download CDH components | diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py index 59984fe05..ecc0354f9 100755 --- a/bin/bootstrap_toolchain.py +++ b/bin/bootstrap_toolchain.py @@ -506,7 +506,7 @@ def get_hadoop_downloads(): cluster_components = [] use_apache_hadoop = os.environ["USE_APACHE_HADOOP"] == "true" use_apache_hbase = os.environ["USE_APACHE_HBASE"] == "true" - use_apache_hive = os.environ["USE_APACHE_HIVE"] == "true" + use_apache_hive = os.environ["USE_APACHE_HIVE_3"] == "true" use_apache_tez = os.environ["USE_APACHE_TEZ"] == "true" use_apache_ranger = os.environ["USE_APACHE_RANGER"] == "true" use_apache_ozone = os.environ["USE_APACHE_OZONE"] == "true" diff --git a/bin/impala-config.sh b/bin/impala-config.sh index a5b758d2f..848732b0d 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -267,8 +267,8 @@ export APACHE_ORC_JAVA_VERSION=1.8.3 export APACHE_PARQUET_VERSION=1.12.3 export APACHE_RANGER_VERSION=2.4.0 export APACHE_TEZ_VERSION=0.10.2 -export APACHE_HIVE_VERSION=3.1.3 -export APACHE_HIVE_STORAGE_API_VERSION=2.7.0 +export APACHE_HIVE_3_VERSION=3.1.3 +export APACHE_HIVE_3_STORAGE_API_VERSION=2.7.0 export APACHE_OZONE_VERSION=1.4.0 # Java dependencies that are not also runtime components. Declaring versions here allows @@ -377,8 +377,8 @@ export CDP_ICEBERG_URL=${CDP_ICEBERG_URL-} export CDP_RANGER_URL=${CDP_RANGER_URL-} export CDP_TEZ_URL=${CDP_TEZ_URL-} -export APACHE_HIVE_URL=${APACHE_HIVE_URL-} -export APACHE_HIVE_SOURCE_URL=${APACHE_HIVE_SOURCE_URL-} +export APACHE_HIVE_3_URL=${APACHE_HIVE_3_URL-} +export APACHE_HIVE_3_SOURCE_URL=${APACHE_HIVE_3_SOURCE_URL-} export APACHE_OZONE_URL=${APACHE_OZONE_URL-} export CDP_COMPONENTS_HOME="$IMPALA_TOOLCHAIN/cdp_components-$CDP_BUILD_NUMBER" @@ -396,7 +396,7 @@ if ${USE_APACHE_COMPONENTS:=false}; then export IMPALA_TEZ_VERSION=${APACHE_TEZ_VERSION} export USE_APACHE_HADOOP=true export USE_APACHE_HBASE=true - export USE_APACHE_HIVE=true + export USE_APACHE_HIVE_3=true export USE_APACHE_TEZ=true export USE_APACHE_RANGER=true export USE_APACHE_OZONE=true @@ -418,7 +418,7 @@ else export IMPALA_TEZ_URL=${CDP_TEZ_URL-} export USE_APACHE_HADOOP=${USE_APACHE_HADOOP:=false} export USE_APACHE_HBASE=${USE_APACHE_HBASE:=false} - export USE_APACHE_HIVE=${USE_APACHE_HIVE:=false} + export USE_APACHE_HIVE_3=${USE_APACHE_HIVE_3:=false} export USE_APACHE_TEZ=${USE_APACHE_TEZ:=false} export USE_APACHE_RANGER=${USE_APACHE_RANGER:=false} export USE_APACHE_OZONE=${USE_APACHE_OZONE:=false} @@ -432,18 +432,18 @@ else export HADOOP_HOME="$CDP_COMPONENTS_HOME/hadoop-${IMPALA_HADOOP_VERSION}" fi -if $USE_APACHE_HIVE; then - # When USE_APACHE_HIVE is set we use the apache hive version to build as well as deploy - # in the minicluster - export IMPALA_HIVE_DIST_TYPE="apache-hive" - export IMPALA_HIVE_VERSION=${APACHE_HIVE_VERSION} - export IMPALA_HIVE_URL=${APACHE_HIVE_URL-} - export IMPALA_HIVE_SOURCE_URL=${APACHE_HIVE_SOURCE_URL-} - export IMPALA_HIVE_STORAGE_API_VERSION=${APACHE_HIVE_STORAGE_API_VERSION} +if $USE_APACHE_HIVE_3; then + # When USE_APACHE_HIVE_3 is set we use the apache hive version to build as well as + # deploy in the minicluster + export IMPALA_HIVE_DIST_TYPE="apache-hive-3" + export IMPALA_HIVE_VERSION=${APACHE_HIVE_3_VERSION} + export IMPALA_HIVE_URL=${APACHE_HIVE_3_URL-} + export IMPALA_HIVE_SOURCE_URL=${APACHE_HIVE_3_SOURCE_URL-} + export IMPALA_HIVE_STORAGE_API_VERSION=${APACHE_HIVE_3_STORAGE_API_VERSION} else - # CDP hive version is used to build and deploy in minicluster when USE_APACHE_HIVE is + # CDP hive version is used to build and deploy in minicluster when USE_APACHE_HIVE_* is # false - export IMPALA_HIVE_DIST_TYPE="hive" + export IMPALA_HIVE_DIST_TYPE="hive-3" export IMPALA_HIVE_VERSION=${HIVE_VERSION_OVERRIDE:-"$CDP_HIVE_VERSION"} export IMPALA_HIVE_URL=${CDP_HIVE_URL-} export IMPALA_HIVE_SOURCE_URL=${CDP_HIVE_SOURCE_URL-} @@ -461,7 +461,7 @@ fi # infra/python/deps/requirements.txt. export IMPALA_THRIFT_CPP_VERSION=0.16.0-p7 unset IMPALA_THRIFT_CPP_URL -if $USE_APACHE_HIVE; then +if $USE_APACHE_HIVE_3; then # Apache Hive 3 clients can't run on thrift versions >= 0.14 (IMPALA-11801) export IMPALA_THRIFT_POM_VERSION=0.11.0 export IMPALA_THRIFT_JAVA_VERSION=${IMPALA_THRIFT_POM_VERSION}-p5 @@ -707,7 +707,7 @@ DEFAULT_NODES_DIR="$IMPALA_HOME/testdata/cluster/cdh$CDH_MAJOR_VERSION$UNIQUE_FS export IMPALA_CLUSTER_NODES_DIR="${IMPALA_CLUSTER_NODES_DIR-$DEFAULT_NODES_DIR}" ESCAPED_DB_UID=$(sed "s/[^0-9a-zA-Z]/_/g" <<< "$UNIQUE_FS_LABEL$IMPALA_HOME") -if $USE_APACHE_HIVE; then +if $USE_APACHE_HIVE_3; then export HIVE_HOME="$APACHE_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-bin" export HIVE_SRC_DIR="$APACHE_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-src" # if apache hive is being used change the metastore db name, so we don't have to @@ -1080,7 +1080,7 @@ export HIVE_AUX_JARS_PATH="$POSTGRES_JDBC_DRIVER" # Add the jar of iceberg-hive-runtime to have HiveIcebergStorageHandler. # Only needed by Apache Hive3 since CDP Hive3 has the jar of hive-iceberg-handler in its # lib folder. -if $USE_APACHE_HIVE; then +if $USE_APACHE_HIVE_3; then export HIVE_AUX_JARS_PATH="$HIVE_AUX_JARS_PATH:\ $IMPALA_HOME/fe/target/dependency/iceberg-hive-runtime-${IMPALA_ICEBERG_VERSION}.jar" fi diff --git a/bin/jenkins/build-all-flag-combinations.sh b/bin/jenkins/build-all-flag-combinations.sh index 2f914b5ab..8f7361342 100755 --- a/bin/jenkins/build-all-flag-combinations.sh +++ b/bin/jenkins/build-all-flag-combinations.sh @@ -48,7 +48,7 @@ CONFIGS=( "-skiptests -noclean -asan" "-skiptests -noclean -tsan" "-skiptests -noclean -ubsan -so -ninja" - # USE_APACHE_HIVE=true build: + # USE_APACHE_HIVE_3=true build: "-skiptests -noclean -use_apache_components" "-notests -noclean -use_apache_components -package" ) diff --git a/buildall.sh b/buildall.sh index 211b93de7..f95b291ec 100755 --- a/buildall.sh +++ b/buildall.sh @@ -457,7 +457,7 @@ bootstrap_dependencies() { cp $IMPALA_TOOLCHAIN_PACKAGES_HOME/hadoop-client-$IMPALA_HADOOP_CLIENT_VERSION/lib/* \ $HADOOP_HOME/lib/native/ fi - if [[ "${USE_APACHE_HIVE}" = true ]]; then + if [[ "${USE_APACHE_HIVE_3}" = true ]]; then "$IMPALA_HOME/testdata/bin/patch_hive.sh" fi } diff --git a/common/function-registry/CMakeLists.txt b/common/function-registry/CMakeLists.txt index 0c9274f1c..55faeb59d 100644 --- a/common/function-registry/CMakeLists.txt +++ b/common/function-registry/CMakeLists.txt @@ -61,7 +61,7 @@ add_custom_command( add_custom_target(function-registry ALL DEPENDS ${CODE_GEN_OUTPUT}) -if( $ENV{USE_APACHE_HIVE} STREQUAL "false") +if($ENV{USE_APACHE_HIVE_3} STREQUAL "false") add_custom_target(geospatial-udf-wrappers ALL DEPENDS ${GEOSPATIAL_CODE_GEN_OUTPUT}) add_dependencies(geospatial-udf-wrappers function-registry) else() diff --git a/fe/pom.xml b/fe/pom.xml index d73a07ad5..18cd27e4c 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -730,7 +730,7 @@ under the License. --> <source>${project.basedir}/generated-sources/gen-java</source> <source>${project.build.directory}/generated-sources/cup</source> - <source>${project.basedir}/src/compat-${hive.dist.type}-${hive.major.version}/java</source> + <source>${project.basedir}/src/compat-${hive.dist.type}/java</source> </sources> </configuration> </execution> @@ -1049,7 +1049,7 @@ under the License. <activation> <property> <name>env.IMPALA_HIVE_DIST_TYPE</name> - <value>apache-hive</value> + <value>apache-hive-3</value> </property> </activation> <build> diff --git a/fe/src/test/resources/hive-site.xml.py b/fe/src/test/resources/hive-site.xml.py index aa55d8682..d1ad1daff 100644 --- a/fe/src/test/resources/hive-site.xml.py +++ b/fe/src/test/resources/hive-site.xml.py @@ -21,7 +21,7 @@ from __future__ import absolute_import, division, print_function import os HIVE_MAJOR_VERSION = int(os.environ['IMPALA_HIVE_VERSION'][0]) -USE_APACHE_HIVE = os.environ['USE_APACHE_HIVE'] +USE_APACHE_HIVE_3 = os.environ['USE_APACHE_HIVE_3'] KERBERIZE = os.environ.get('IMPALA_KERBERIZE') == 'true' VARIANT = os.environ.get('HIVE_VARIANT') IMPALA_JAVA_TOOL_OPTIONS=os.environ.get("IMPALA_JAVA_TOOL_OPTIONS") @@ -250,7 +250,7 @@ CONFIG.update({ }) # Before HIVE-19486 (in Apache Hive 4 and CDP Hive versions), Hikari CP configs are prefixed with "hikari.*". # After HIVE-19486, the prefix is "hikaricp.*". -if USE_APACHE_HIVE and HIVE_MAJOR_VERSION == 3: +if USE_APACHE_HIVE_3 and HIVE_MAJOR_VERSION == 3: CONFIG.update({ 'hikari.connectionTimeout': 60000, }) diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh index 83088248d..debe1c8b8 100755 --- a/testdata/bin/create-load-data.sh +++ b/testdata/bin/create-load-data.sh @@ -386,7 +386,7 @@ function copy-and-load-dependent-tables { # For tables that rely on loading data from local fs test-wareload-house # TODO: Find a good way to integrate this with the normal data loading scripts SQL_FILE=${IMPALA_HOME}/testdata/bin/load-dependent-tables.sql - if $USE_APACHE_HIVE; then + if $USE_APACHE_HIVE_3; then # Apache Hive 3.1 doesn't support "STORED AS JSONFILE" (HIVE-19899) NEW_SQL_FILE=${IMPALA_HOME}/testdata/bin/load-dependent-tables-hive3.sql sed "s/STORED AS JSONFILE/ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'"\ diff --git a/testdata/bin/generate-schema-statements.py b/testdata/bin/generate-schema-statements.py index 84e550d01..9203cdab3 100755 --- a/testdata/bin/generate-schema-statements.py +++ b/testdata/bin/generate-schema-statements.py @@ -303,7 +303,7 @@ def build_create_statement(table_template, table_name, db_name, db_suffix, stmt = table_template.format(**params) # Apache Hive 3.1 doesn't support "STORED BY ICEBERG STORED AS AVRO" and # "STORED AS JSONFILE" (HIVE-25162, HIVE-19899) - if is_hive_stmt and os.environ['USE_APACHE_HIVE'] == "true": + if is_hive_stmt and os.environ['USE_APACHE_HIVE_3'] == "true": if "STORED AS JSONFILE" in stmt: stmt = stmt.replace("STORED AS JSONFILE", "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'") @@ -630,7 +630,7 @@ def build_hbase_insert(db_name, db_suffix, table_name): hbase_insert = SET_HIVE_HBASE_BULK_LOAD + ';\n' # For Apache Hive, "hive.hbase.bulk does not exist" exception will be thrown and there # is only warning in cdp - if os.environ['USE_APACHE_HIVE'] == "true": + if os.environ['USE_APACHE_HIVE_3'] == "true": hbase_insert = "" params = build_replacement_params(db_name, db_suffix, table_name) hbase_insert += ("INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name}" diff --git a/testdata/bin/load_nested.py b/testdata/bin/load_nested.py index 31ba7295b..ac4d9e5f6 100755 --- a/testdata/bin/load_nested.py +++ b/testdata/bin/load_nested.py @@ -93,7 +93,7 @@ def load(): # directory. Use external.table.purge=true so that it is equivalent to a Hive 2 # managed table. # For Apache Hive, HIVE-20085 (Hive 4) Allow CTAS. - if HIVE_MAJOR_VERSION >= 3 and os.environ["USE_APACHE_HIVE"] != "true": + if HIVE_MAJOR_VERSION >= 3 and os.environ["USE_APACHE_HIVE_3"] != "true": external = "EXTERNAL" tblproperties += ",'external.table.purge'='TRUE'" sql_params = { diff --git a/testdata/bin/patch_hive.sh b/testdata/bin/patch_hive.sh index c974b9eee..1953b8926 100755 --- a/testdata/bin/patch_hive.sh +++ b/testdata/bin/patch_hive.sh @@ -28,7 +28,7 @@ set -euo pipefail . $IMPALA_HOME/bin/report_build_error.sh setup_report_build_error -if [[ "${USE_APACHE_HIVE}" != true ]]; then +if [[ "${USE_APACHE_HIVE_3}" != true ]]; then exit 0 fi @@ -77,6 +77,6 @@ if [[ "${HIVE_REBUILD}" = "true" ]]; then echo "Repackage the hive-exec module" ${IMPALA_HOME}/bin/mvn-quiet.sh -pl ql,standalone-metastore clean package \ -Dmaven.test.skip - cp $HIVE_SRC_DIR/ql/target/hive-exec-${APACHE_HIVE_VERSION}.jar $HIVE_HOME/lib/ + cp $HIVE_SRC_DIR/ql/target/hive-exec-${APACHE_HIVE_3_VERSION}.jar $HIVE_HOME/lib/ fi popd diff --git a/tests/common/environ.py b/tests/common/environ.py index a754001e2..c96571b7c 100644 --- a/tests/common/environ.py +++ b/tests/common/environ.py @@ -95,7 +95,7 @@ if impala_iceberg_version: # If we can't parse the version, default to format version 1 pass -IS_APACHE_HIVE = os.environ.get("USE_APACHE_HIVE", False) == 'true' +IS_APACHE_HIVE = os.environ.get("USE_APACHE_HIVE_3", False) == 'true' # Resolve any symlinks in the path. impalad_basedir = \ diff --git a/tests/util/test_file_parser.py b/tests/util/test_file_parser.py index e0d31ca21..b6e3601f8 100644 --- a/tests/util/test_file_parser.py +++ b/tests/util/test_file_parser.py @@ -168,7 +168,7 @@ def parse_test_file(test_file_name, valid_section_names, skip_unknown_sections=T file_data = file_data.decode(encoding) else: file_data = file_data.decode('utf-8') - if os.environ["USE_APACHE_HIVE"] == "true": + if os.environ["USE_APACHE_HIVE_3"] == "true": # Remove Hive 4.0 feature for tpcds_schema_template.sql if "tpcds_schema_template" in test_file_name: # HIVE-20703
