This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 3577030df67273748308d8d165809d12abbb6ff3 Author: Michael Smith <[email protected]> AuthorDate: Tue Sep 13 15:38:13 2022 -0700 IMPALA-11562: Revert support for o3fs as default filesystem Reverts support for o3fs as a default filesystem added in IMPALA-9442. Updates test setup to use ofs instead. Munges absolute paths in Iceberg metadata to match the new location required for ofs. Ozone has strict requirements on volume and bucket names, so all tables must be created within a bucket (e.g. inside /impala/test-warehouse/). Change-Id: I45e90d30b2e68876dec0db3c43ac15ee510b17bd Reviewed-on: http://gerrit.cloudera.org:8080/19001 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- bin/impala-config.sh | 5 +- bin/run-all-tests.sh | 6 +- .../org/apache/impala/common/FileSystemUtil.java | 1 - testdata/bin/create-tpcds-testcase-files.sh | 2 +- testdata/bin/load-metastore-snapshot.sh | 9 ++ testdata/bin/load-test-warehouse-snapshot.sh | 6 + testdata/bin/rewrite-iceberg-metadata.py | 97 +++++++++++++++ testdata/cluster/admin | 4 +- .../queries/QueryTest/iceberg-alter.test | 2 +- .../QueryTest/iceberg-create-table-like-table.test | 8 +- .../queries/QueryTest/iceberg-create.test | 30 ++--- .../queries/QueryTest/iceberg-ctas.test | 4 +- .../queries/QueryTest/iceberg-insert.test | 10 +- .../queries/QueryTest/iceberg-negative.test | 28 ++--- .../queries/QueryTest/iceberg-old-fileformat.test | 4 +- .../queries/QueryTest/iceberg-query.test | 6 +- .../queries/QueryTest/multiple-filesystems.test | 10 +- .../queries/QueryTest/partition-col-types.test | 6 +- .../queries/QueryTest/show-create-table.test | 12 +- tests/authorization/test_ranger.py | 5 +- tests/common/file_utils.py | 14 ++- tests/common/impala_test_suite.py | 3 +- .../test_startup_filesystem_checks.py | 18 ++- tests/metadata/test_ddl.py | 134 +++++++++------------ tests/metadata/test_explain.py | 8 +- tests/metadata/test_load.py | 12 +- tests/metadata/test_recover_partitions.py | 2 +- tests/metadata/test_recursive_listing.py | 31 ++--- tests/metadata/test_show_create_table.py | 10 +- tests/metadata/test_testcase_builder.py | 4 +- tests/query_test/test_compressed_formats.py | 8 +- tests/query_test/test_insert_behaviour.py | 29 ++--- tests/query_test/test_insert_parquet.py | 6 +- tests/query_test/test_observability.py | 6 +- tests/query_test/test_runtime_filters.py | 9 +- tests/util/filesystem_utils.py | 1 + tests/util/hdfs_util.py | 46 ++++--- 37 files changed, 355 insertions(+), 241 deletions(-) diff --git a/bin/impala-config.sh b/bin/impala-config.sh index 7d32d770e..92b915c6c 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -682,8 +682,9 @@ elif [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then elif [ "${TARGET_FILESYSTEM}" = "ozone" ]; then export USE_OZONE_ENCRYPTION=${USE_OZONE_ENCRYPTION-true} export OZONE_VOLUME="impala" - export OZONE_BUCKET="base" - export DEFAULT_FS="o3fs://${OZONE_BUCKET}.${OZONE_VOLUME}.${INTERNAL_LISTEN_HOST}:9862" + export DEFAULT_FS="ofs://${INTERNAL_LISTEN_HOST}:9862" + export FILESYSTEM_PREFIX="${DEFAULT_FS}/${OZONE_VOLUME}" + export WAREHOUSE_LOCATION_PREFIX="/${OZONE_VOLUME}" else echo "Unsupported filesystem '$TARGET_FILESYSTEM'" echo "Valid values are: hdfs, isilon, s3, abfs, adls, gs, local, ozone" diff --git a/bin/run-all-tests.sh b/bin/run-all-tests.sh index cb82ad693..ba0d40dbe 100755 --- a/bin/run-all-tests.sh +++ b/bin/run-all-tests.sh @@ -197,7 +197,8 @@ do TEST_RET_CODE=0 # Store a list of the files at the beginning of each iteration. - hdfs dfs -ls -R /test-warehouse > ${IMPALA_LOGS_DIR}/file-list-begin-${i}.log 2>&1 + hdfs dfs -ls -R ${FILESYSTEM_PREFIX}/test-warehouse \ + > ${IMPALA_LOGS_DIR}/file-list-begin-${i}.log 2>&1 # Try not restarting the cluster to save time. BE, FE, JDBC and EE tests require # running on a cluster with default flags. We just need to restart the cluster when @@ -334,7 +335,8 @@ do # to the file-list-begin*.log from the beginning of the iteration to see if files # are not being cleaned up. This is most useful on the first iteration, when # the list of files is from dataload. - hdfs dfs -ls -R /test-warehouse > ${IMPALA_LOGS_DIR}/file-list-end-${i}.log 2>&1 + hdfs dfs -ls -R ${FILESYSTEM_PREFIX}/test-warehouse \ + > ${IMPALA_LOGS_DIR}/file-list-end-${i}.log 2>&1 if [[ $TEST_RET_CODE == 1 ]]; then break diff --git a/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java b/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java index 4b2589419..1c2b75175 100644 --- a/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java +++ b/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java @@ -113,7 +113,6 @@ public class FileSystemUtil { .add(SCHEME_ADL) .add(SCHEME_HDFS) .add(SCHEME_S3A) - .add(SCHEME_O3FS) .add(SCHEME_OFS) .add(SCHEME_GCS) .add(SCHEME_COS) diff --git a/testdata/bin/create-tpcds-testcase-files.sh b/testdata/bin/create-tpcds-testcase-files.sh index fd688e805..caaf85086 100755 --- a/testdata/bin/create-tpcds-testcase-files.sh +++ b/testdata/bin/create-tpcds-testcase-files.sh @@ -31,7 +31,7 @@ IMPALAD=${IMPALAD:-localhost} TPCDS_QUERY_HOME=$IMPALA_HOME/testdata/workloads/tpcds/queries/raw # Target directory containing the testcase data. -TESTCASE_DATA_DIR=/test-warehouse/tpcds-testcase-data +TESTCASE_DATA_DIR=${FILESYSTEM_PREFIX}/test-warehouse/tpcds-testcase-data COPY_TEST_CASE_PREFIX="COPY TESTCASE TO '$TESTCASE_DATA_DIR'" diff --git a/testdata/bin/load-metastore-snapshot.sh b/testdata/bin/load-metastore-snapshot.sh index dd4e136de..5d7d0ab26 100755 --- a/testdata/bin/load-metastore-snapshot.sh +++ b/testdata/bin/load-metastore-snapshot.sh @@ -32,6 +32,8 @@ if [[ $# -ne 1 ]]; then exit 1 fi +: ${TEST_WAREHOUSE_DIR=/test-warehouse} + SNAPSHOT_FILE=$1 if [ ! -f ${SNAPSHOT_FILE} ]; then echo "Metastore Snapshot file '${SNAPSHOT_FILE}' not found" @@ -69,6 +71,13 @@ elif [[ "${DEFAULT_FS}" != "hdfs://localhost:20500" ]]; then sed -i "s|hdfs://localhost:20500|${DEFAULT_FS}|g" ${TMP_SNAPSHOT_FILE} fi +if [[ "${WAREHOUSE_LOCATION_PREFIX}" != "" ]]; then + echo "Adding prefix ${WAREHOUSE_LOCATION_PREFIX} to iceberg.catalog_location" + cloc='iceberg\.catalog_location\t' + sed -i "s|\(${cloc}\)\(${TEST_WAREHOUSE_DIR}\)|\1${WAREHOUSE_LOCATION_PREFIX}\2|g" \ + ${TMP_SNAPSHOT_FILE} +fi + # Drop and re-create the hive metastore database dropdb -U hiveuser ${METASTORE_DB} 2> /dev/null || true createdb -U hiveuser ${METASTORE_DB} diff --git a/testdata/bin/load-test-warehouse-snapshot.sh b/testdata/bin/load-test-warehouse-snapshot.sh index a5597cf18..cef8dc6ca 100755 --- a/testdata/bin/load-test-warehouse-snapshot.sh +++ b/testdata/bin/load-test-warehouse-snapshot.sh @@ -110,6 +110,12 @@ if [ ! -f ${SNAPSHOT_STAGING_DIR}${TEST_WAREHOUSE_DIR}/githash.txt ]; then exit 1 fi +if [ "${WAREHOUSE_LOCATION_PREFIX}" != "" ]; then + echo "Updating Iceberg locations with warehouse prefix ${WAREHOUSE_LOCATION_PREFIX}" + ${IMPALA_HOME}/testdata/bin/rewrite-iceberg-metadata.py ${WAREHOUSE_LOCATION_PREFIX} \ + $(find ${SNAPSHOT_STAGING_DIR}${TEST_WAREHOUSE_DIR}/iceberg_test -name "metadata") +fi + echo "Copying data to ${TARGET_FILESYSTEM}" if [ "${TARGET_FILESYSTEM}" = "s3" ]; then # hive does not yet work well with s3, so we won't need hive builtins. diff --git a/testdata/bin/rewrite-iceberg-metadata.py b/testdata/bin/rewrite-iceberg-metadata.py new file mode 100755 index 000000000..ec165f1d7 --- /dev/null +++ b/testdata/bin/rewrite-iceberg-metadata.py @@ -0,0 +1,97 @@ +#!/usr/bin/env impala-python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import glob +import json +import os +import sys + +from avro.datafile import DataFileReader, DataFileWriter +from avro.io import DatumReader, DatumWriter + +args = sys.argv[1:] +if len(args) < 2: + print("Usage: rewrite-iceberg-metadata.py PREFIX METADATA-dirs...") + exit(1) + +prefix = args[0] + + +def add_prefix_to_snapshot(snapshot): + if 'manifest-list' in snapshot: + snapshot['manifest-list'] = prefix + snapshot['manifest-list'] + if 'manifests' in snapshot: + snapshot['manifests'] = map(lambda m: prefix + m, snapshot['manifests']) + return snapshot + + +def add_prefix_to_mlog(metadata_log): + metadata_log['metadata-file'] = prefix + metadata_log['metadata-file'] + return metadata_log + + +def add_prefix_to_snapshot_entry(entry): + if 'manifest_path' in entry: + entry['manifest_path'] = prefix + entry['manifest_path'] + if 'data_file' in entry: + entry['data_file']['file_path'] = prefix + entry['data_file']['file_path'] + return entry + + +for arg in args[1:]: + # Update metadata.json + for mfile in glob.glob(os.path.join(arg, 'v*.metadata.json')): + with open(mfile, 'r') as f: + metadata = json.load(f) + + if 'format-version' not in metadata: + print("WARN: skipping {}, missing format-version".format(f)) + continue + + version = metadata['format-version'] + if version < 1 or version > 2: + print("WARN: skipping {}, unknown version {}".format(f, version)) + continue + + # metadata: required + metadata['location'] = prefix + metadata['location'] + + # snapshots: optional + if 'snapshots' in metadata: + metadata['snapshots'] = map(add_prefix_to_snapshot, metadata['snapshots']) + + # metadata-log: optional + if 'metadata-log' in metadata: + metadata['metadata-log'] = map(add_prefix_to_mlog, metadata['metadata-log']) + + with open(mfile + '.tmp', 'w') as f: + json.dump(metadata, f) + os.rename(mfile + '.tmp', mfile) + + for afile in glob.glob(os.path.join(arg, '*.avro')): + with open(afile, 'rb') as f: + with DataFileReader(f, DatumReader()) as reader: + schema = reader.datum_reader.writers_schema + lines = map(add_prefix_to_snapshot_entry, reader) + + with open(afile + '.tmp', 'wb') as f: + with DataFileWriter(f, DatumWriter(), schema) as writer: + for line in lines: + writer.append(line) + os.rename(afile + '.tmp', afile) diff --git a/testdata/cluster/admin b/testdata/cluster/admin index a8ed6ef16..0fa78d1f4 100755 --- a/testdata/cluster/admin +++ b/testdata/cluster/admin @@ -329,7 +329,7 @@ function start_cluster { if [[ "${TARGET_FILESYSTEM}" = "ozone" ]]; then local bucketkey='' if $USE_OZONE_ENCRYPTION; then - echo "Ozone encryption enabled for ${OZONE_VOLUME}/${OZONE_BUCKET}" + echo "Ozone encryption enabled for ${OZONE_VOLUME}/test-warehouse" # Encryption is done at the bucket level, so ensure the keys are available first. ${IMPALA_HOME}/testdata/bin/setup-dfs-keys.sh testkey @@ -337,7 +337,7 @@ function start_cluster { fi ozone sh volume create ${OZONE_VOLUME} || true - ozone sh bucket create ${bucketkey} ${OZONE_VOLUME}/${OZONE_BUCKET} || true + ozone sh bucket create ${bucketkey} ${OZONE_VOLUME}/test-warehouse || true fi return $? diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-alter.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-alter.test index 2438f5602..12518af2b 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-alter.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-alter.test @@ -74,7 +74,7 @@ CREATE TABLE iceberg_hadoop_catalog( ) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', -'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test'); +'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE/hadoop_catalog_test'); ALTER TABLE iceberg_hadoop_catalog ADD COLUMNS(event_time TIMESTAMP, register_time DATE); ALTER TABLE iceberg_hadoop_catalog ADD COLUMNS(message STRING, price DECIMAL(8,1)); ALTER TABLE iceberg_hadoop_catalog ADD COLUMNS(map_test MAP <STRING, array <STRING>>, struct_test STRUCT <f1: BIGINT, f2: BIGINT>); diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-create-table-like-table.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-create-table-like-table.test index 94ff9828c..c8e8c6641 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-create-table-like-table.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-create-table-like-table.test @@ -139,7 +139,7 @@ create table ice_hadoop_catalog_no_part( array < STRING > > ) stored as iceberg tblproperties( 'iceberg.catalog' = 'hadoop.catalog', - 'iceberg.catalog_location' = '/test-warehouse/$DATABASE.db/cat_loc', + 'iceberg.catalog_location' = '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/cat_loc', 'iceberg.table_identifier' = 'id_a.id_b.ice_hadoop_catalog_no_part' ); ---- RESULTS @@ -172,7 +172,7 @@ describe formatted ice_hadoop_catalog_no_part_clone; 'col_map','map<string,array<string>>','NULL' 'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/cat_loc/$DATABASE/ice_hadoop_catalog_no_part_clone','NULL' '','iceberg.catalog ','hadoop.catalog ' -'','iceberg.catalog_location','/test-warehouse/$DATABASE.db/cat_loc' +'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/cat_loc' '','storage_handler ','org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' '','write.format.default','parquet ' 'SerDe Library: ','org.apache.iceberg.mr.hive.HiveIcebergSerDe','NULL' @@ -202,7 +202,7 @@ create table ice_hadoop_catalog( array < STRING > > ) partitioned by spec (bucket(3, id)) stored as iceberg tblproperties( 'iceberg.catalog' = 'hadoop.catalog', - 'iceberg.catalog_location' = '/test-warehouse/$DATABASE.db/cat_loc', + 'iceberg.catalog_location' = '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/cat_loc', 'iceberg.table_identifier' = 'id_a.id_b.ice_hadoop_catalog' ); ---- RESULTS @@ -238,7 +238,7 @@ describe formatted ice_hadoop_catalog_clone; 'id','BUCKET[3]','NULL' 'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/cat_loc/$DATABASE/ice_hadoop_catalog_clone','NULL' '','iceberg.catalog ','hadoop.catalog ' -'','iceberg.catalog_location','/test-warehouse/$DATABASE.db/cat_loc' +'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/cat_loc' '','storage_handler ','org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' '','write.format.default','parquet ' 'SerDe Library: ','org.apache.iceberg.mr.hive.HiveIcebergSerDe','NULL' diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test index 1f44026e7..dc0c86ac0 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test @@ -70,13 +70,13 @@ PARTITIONED BY SPEC TRUNCATE(15, level) ) STORED AS ICEBERG -LOCATION '/$DATABASE.iceberg_test_with_location' +LOCATION '$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE.iceberg_test_with_location' TBLPROPERTIES('iceberg.catalog'='hadoop.tables'); CREATE EXTERNAL TABLE iceberg_hadoop_tbls_external( level STRING ) STORED AS ICEBERG -LOCATION '/$DATABASE.iceberg_test_with_location' +LOCATION '$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE.iceberg_test_with_location' TBLPROPERTIES('iceberg.catalog'='hadoop.tables'); ---- RESULTS 'Table has been created.' @@ -97,7 +97,7 @@ STRING,BIGINT,BIGINT ---- QUERY CREATE EXTERNAL TABLE iceberg_hadoop_tbls_external_empty_col STORED AS ICEBERG -LOCATION '/$DATABASE.iceberg_test_with_location' +LOCATION '$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE.iceberg_test_with_location' TBLPROPERTIES('iceberg.catalog'='hadoop.tables'); ---- RESULTS 'Table has been created.' @@ -137,7 +137,7 @@ PARTITIONED BY SPEC ) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', -'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test'); +'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE/hadoop_catalog_test'); ---- RESULTS 'Table has been created.' ==== @@ -171,13 +171,13 @@ PARTITIONED BY SPEC ) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', -'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test'); +'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE/hadoop_catalog_test'); CREATE EXTERNAL TABLE iceberg_hadoop_cat_external( level STRING ) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', -'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test', 'iceberg.table_identifier'='$DATABASE.iceberg_hadoop_catalog'); +'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE/hadoop_catalog_test', 'iceberg.table_identifier'='$DATABASE.iceberg_hadoop_catalog'); ---- RESULTS 'Table has been created.' ==== @@ -200,13 +200,13 @@ CREATE TABLE iceberg_hadoop_cat_drop( ) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', -'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'); +'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'); CREATE TABLE iceberg_hadoop_cat_query( level STRING ) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', -'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'); +'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'); DROP TABLE iceberg_hadoop_cat_drop; SELECT * FROM iceberg_hadoop_cat_query; ---- TYPES @@ -217,7 +217,7 @@ string DESCRIBE FORMATTED iceberg_hadoop_cat_query; ---- RESULTS: VERIFY_IS_SUBSET 'Location: ','$NAMENODE/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test/$DATABASE/iceberg_hadoop_cat_query','NULL' -'','iceberg.catalog_location','/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test' +'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test' '','write.format.default','parquet ' '','iceberg.catalog ','hadoop.catalog ' ---- TYPES @@ -229,12 +229,12 @@ CREATE TABLE iceberg_hadoop_cat_with_ident( ) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', -'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test', +'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test', 'iceberg.table_identifier'='org.db.tbl'); DESCRIBE FORMATTED iceberg_hadoop_cat_with_ident; ---- RESULTS: VERIFY_IS_SUBSET 'Location: ','$NAMENODE/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test/org/db/tbl','NULL' -'','iceberg.catalog_location','/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test' +'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test' '','write.format.default','parquet ' '','iceberg.catalog ','hadoop.catalog ' ---- TYPES @@ -247,7 +247,7 @@ INSERT INTO iceberg_hadoop_cat_with_ident values ("ice"); CREATE EXTERNAL TABLE iceberg_hadoop_cat_with_ident_ext STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', -'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test', +'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test', 'iceberg.table_identifier'='org.db.tbl'); ---- RESULTS 'Table has been created.' @@ -256,7 +256,7 @@ TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', DESCRIBE FORMATTED iceberg_hadoop_cat_with_ident_ext; ---- RESULTS: VERIFY_IS_SUBSET 'Location: ','$NAMENODE/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test/org/db/tbl','NULL' -'','iceberg.catalog_location','/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test' +'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test' '','write.format.default','parquet ' '','iceberg.catalog ','hadoop.catalog ' ---- TYPES @@ -288,7 +288,7 @@ PARTITIONED BY SPEC ) STORED AS ICEBERG TBLPROPERTIES('write.format.default'='orc','iceberg.catalog'='hadoop.catalog', -'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'); +'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'); ---- RESULTS 'Table has been created.' ==== @@ -475,7 +475,7 @@ create table ice_part_hadoop_catalog ( col_identity ) stored as iceberg TBLPROPERTIES( 'iceberg.catalog' = 'hadoop.catalog', - 'iceberg.catalog_location' = '/$DATABASE/hadoop_catalog_test' + 'iceberg.catalog_location' = '$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE/hadoop_catalog_test' ); ---- RESULTS 'Table has been created.' diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-ctas.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-ctas.test index 541fd4a39..776f86357 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-ctas.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-ctas.test @@ -75,7 +75,7 @@ STRING,BIGINT,BIGINT # Use PARTITIONED BY SPEC CREATE TABLE ice_ctas_hadoop_tables_part PARTITIONED BY SPEC (month(d)) STORED AS ICEBERG -LOCATION '/test-warehouse/$DATABASE.db/loc_test' +LOCATION '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/loc_test' TBLPROPERTIES ('iceberg.catalog'='hadoop.tables') AS SELECT s, ts, d FROM value_tbl; SELECT * FROM ice_ctas_hadoop_tables_part where d='2021-02-26'; ---- RESULTS @@ -134,7 +134,7 @@ STRING,BIGINT,BIGINT CREATE TABLE ice_ctas_hadoop_catalog_part PARTITIONED BY SPEC (truncate(3, s)) STORED AS ICEBERG TBLPROPERTIES ('iceberg.catalog'='hadoop.catalog', - 'iceberg.catalog_location'='/test-warehouse/$DATABASE.db/cat_loc', + 'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/cat_loc', 'iceberg.table_identifier'='ns1.ns2.ctas') AS SELECT cast(t as INT), s, d FROM value_tbl; INSERT INTO ice_ctas_hadoop_catalog_part VALUES (1, 'lion', '2021-02-27'); diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-insert.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-insert.test index e0b7f6ff7..c72d6087a 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-insert.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-insert.test @@ -102,7 +102,7 @@ select * from ts_iceberg; create table iceberg_hadoop_cat (i int) stored as iceberg tblproperties('iceberg.catalog'='hadoop.catalog', - 'iceberg.catalog_location'='/test-warehouse/$DATABASE.db/hadoop_catalog_test'); + 'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/hadoop_catalog_test'); insert into iceberg_hadoop_cat values (1), (2), (3); ---- RESULTS : 3 @@ -128,7 +128,7 @@ STRING, STRING, STRING create table iceberg_hadoop_cat_ti (i int) stored as iceberg tblproperties('iceberg.catalog'='hadoop.catalog', - 'iceberg.catalog_location'='/test-warehouse/$DATABASE.db/hadoop_catalog_test', + 'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/hadoop_catalog_test', 'iceberg.table_identifier'='test.custom_db.int_table'); insert into iceberg_hadoop_cat_ti values (1), (2), (3); ---- RESULTS @@ -170,7 +170,7 @@ INT # Query external Iceberg table create external table iceberg_hive_cat_ext (i int) stored as iceberg -location '/test-warehouse/$DATABASE.db/iceberg_hive_cat' +location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/iceberg_hive_cat' tblproperties('iceberg.catalog'='hive.catalog', 'iceberg.table_identifier'='$DATABASE.iceberg_hive_cat'); ---- RESULTS @@ -223,7 +223,7 @@ INT # Create another external Iceberg table create external table iceberg_hive_cat_ext_2 (i int) stored as iceberg -location '/test-warehouse/$DATABASE.db/iceberg_hive_cat' +location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/iceberg_hive_cat' tblproperties('iceberg.catalog'='hive.catalog', 'iceberg.table_identifier'='$DATABASE.iceberg_hive_cat'); select * from iceberg_hive_cat_ext_2 @@ -248,7 +248,7 @@ Table does not exist # Insert into hive catalog with custom location. create table iceberg_hive_cat_custom_loc (i int) stored as iceberg -location '/test-warehouse/$DATABASE.db/custom_hive_cat' +location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/custom_hive_cat' tblproperties('iceberg.catalog'='hive.catalog'); insert into iceberg_hive_cat_custom_loc values (1), (2), (3); ---- RESULTS diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test index db8f0c57b..2231c8e15 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test @@ -37,7 +37,7 @@ AnalysisException: Table is not partitioned: functional_parquet.iceberg_non_part CREATE TABLE iceberg_table_hadoop_tables_cat_loc(i INT) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.tables', - 'iceberg.catalog_location'='/test-warehouse/cat_loc') + 'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/cat_loc') ---- CATCH iceberg.catalog_location cannot be set for Iceberg table stored in hadoop.tables ==== @@ -46,7 +46,7 @@ CREATE TABLE iceberg_table_hadoop_catalog( level STRING ) STORED AS ICEBERG -LOCATION '/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test' +LOCATION '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test' TBLPROPERTIES('iceberg.catalog'='hadoop.catalog'); ---- CATCH AnalysisException: Location cannot be set for Iceberg table with 'hadoop.catalog'. @@ -63,9 +63,9 @@ AnalysisException: Table property 'iceberg.catalog_location' is necessary for Ic ---- QUERY CREATE EXTERNAL TABLE iceberg_external_table_hadoop_catalog STORED AS ICEBERG -LOCATION '/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test' +LOCATION '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test' TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', - 'iceberg.catalog_location'='/test-warehouse/fake_table', + 'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/fake_table', 'iceberg.table_identifier'='fake_db.fake_table'); ---- CATCH AnalysisException: Location cannot be set for Iceberg table with 'hadoop.catalog'. @@ -99,7 +99,7 @@ Table property 'iceberg.table_identifier' is necessary for external Iceberg tabl CREATE EXTERNAL TABLE fake_iceberg_table_hadoop_catalog STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', - 'iceberg.catalog_location'='/test-warehouse/fake_table', + 'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/fake_table', 'iceberg.table_identifier'='fake_db.fake_table'); SHOW CREATE TABLE fake_iceberg_table_hadoop_catalog; ---- CATCH @@ -144,7 +144,7 @@ AnalysisException: The Iceberg table has multiple partition specs. CREATE TABLE iceberg_hive_cat_with_cat_locaction (i int) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hive.catalog', - 'iceberg.catalog_location'='/test-warehouse/catalog_loc') + 'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/catalog_loc') ---- CATCH iceberg.catalog_location cannot be set for Iceberg table stored in hive.catalog ==== @@ -152,7 +152,7 @@ iceberg.catalog_location cannot be set for Iceberg table stored in hive.catalog CREATE TABLE iceberg_hadoop_tables_with_metdata_locaction (i int) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.tables', - 'metadata_location'='/test-warehouse/catalog/metadata_loc') + 'metadata_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/catalog/metadata_loc') ---- CATCH metadata_location cannot be set for Iceberg tables ==== @@ -160,8 +160,8 @@ metadata_location cannot be set for Iceberg tables CREATE TABLE iceberg_hadoop_cat_with_metadata_locaction (i int) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', - 'iceberg.catalog_location'='/test-warehouse/catalog', - 'metadata_location'='/test-warehouse/catalog/metadata_loc') + 'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/catalog', + 'metadata_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/catalog/metadata_loc') ---- CATCH metadata_location cannot be set for Iceberg tables ==== @@ -169,7 +169,7 @@ metadata_location cannot be set for Iceberg tables CREATE TABLE iceberg_hive_cat_with_metadata_locaction (i int) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hive.catalog', - 'metadata_location'='/test-warehouse/catalog/metadata_loc') + 'metadata_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/catalog/metadata_loc') ---- CATCH metadata_location cannot be set for Iceberg tables ==== @@ -218,7 +218,7 @@ CREATE TABLE iceberg_table_hadoop_catalog( ) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', -'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test'); +'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE/hadoop_catalog_test'); ALTER TABLE iceberg_table_hadoop_catalog RENAME TO iceberg_table_hadoop_catalog_new; ---- CATCH UnsupportedOperationException: Cannot rename Iceberg tables that use 'hadoop.catalog' as catalog. @@ -254,7 +254,7 @@ ALTER TABLE iceberg_table_hadoop_catalog unset TBLPROPERTIES('iceberg.table_iden AnalysisException: Unsetting the 'iceberg.table_identifier' table property is not supported for Iceberg table. ==== ---- QUERY -ALTER TABLE iceberg_table_hadoop_catalog set TBLPROPERTIES('metadata_location'='/test-warehouse/metadata_loc'); +ALTER TABLE iceberg_table_hadoop_catalog set TBLPROPERTIES('metadata_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/metadata_loc'); ---- CATCH AnalysisException: Changing the 'metadata_location' table property is not supported for Iceberg table. ==== @@ -264,7 +264,7 @@ ALTER TABLE iceberg_table_hadoop_catalog unset TBLPROPERTIES('metadata_location' AnalysisException: Unsetting the 'metadata_location' table property is not supported for Iceberg table. ==== ---- QUERY -ALTER TABLE iceberg_table_hive_catalog set TBLPROPERTIES('metadata_location'='/test-warehouse/metadata_loc'); +ALTER TABLE iceberg_table_hive_catalog set TBLPROPERTIES('metadata_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/metadata_loc'); ---- CATCH AnalysisException: Changing the 'metadata_location' table property is not supported for Iceberg table. ==== @@ -314,7 +314,7 @@ Iceberg tables cannot have Hive ACID table properties. CREATE TABLE iceberg_insert_only(i int) STORED AS ICEBERG tblproperties('iceberg.catalog'='hadoop.catalog', -'iceberg.catalog_location'='/$EXTERNAL_WAREHOUSE_DIR/specified_location', +'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/$EXTERNAL_WAREHOUSE_DIR/specified_location', 'transactional'='true', 'transactional_properties'='insert_only'); ---- CATCH Iceberg tables cannot have Hive ACID table properties. diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-old-fileformat.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-old-fileformat.test index 72219bf06..641faae5f 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-old-fileformat.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-old-fileformat.test @@ -5,7 +5,7 @@ STORED AS ICEBERG TBLPROPERTIES( 'iceberg.file_format'='orc', 'iceberg.catalog'='hadoop.catalog', - 'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc', + 'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc', 'iceberg.table_identifier'='functional_parquet.iceberg_partitioned_orc' ); ALTER TABLE iceberg_partitioned_orc_external_old_fileformat @@ -13,7 +13,7 @@ UNSET TBLPROPERTIES IF EXISTS ('write.format.default'); DESCRIBE FORMATTED iceberg_partitioned_orc_external_old_fileformat; ---- RESULTS: VERIFY_IS_SUBSET 'Location: ','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc','NULL' -'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc' +'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc' '','iceberg.table_identifier','functional_parquet.iceberg_partitioned_orc' '','iceberg.file_format','orc ' '','iceberg.catalog ','hadoop.catalog ' diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test index 9a5bfc473..29e57b702 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test @@ -370,7 +370,7 @@ string, string, string describe formatted hadoop_catalog_test_external; ---- RESULTS: VERIFY_IS_SUBSET 'Location: ','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/hadoop_catalog_test/functional_parquet/hadoop_catalog_test','NULL' -'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/hadoop_catalog_test' +'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/iceberg_test/hadoop_catalog/hadoop_catalog_test' '','iceberg.table_identifier','functional_parquet.hadoop_catalog_test' '','write.format.default','parquet ' '','iceberg.catalog ','hadoop.catalog ' @@ -381,7 +381,7 @@ string, string, string describe formatted iceberg_partitioned_orc_external; ---- RESULTS: VERIFY_IS_SUBSET 'Location: ','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc','NULL' -'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc' +'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc' '','iceberg.table_identifier','functional_parquet.iceberg_partitioned_orc' '','write.format.default','orc ' '','iceberg.catalog ','hadoop.catalog ' @@ -451,7 +451,7 @@ bigint,bigint describe formatted iceberg_resolution_test_external; ---- RESULTS: VERIFY_IS_SUBSET 'Location: ','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_resolution_test/functional_parquet/iceberg_resolution_test','NULL' -'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/iceberg_resolution_test' +'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/iceberg_test/hadoop_catalog/iceberg_resolution_test' '','iceberg.table_identifier','functional_parquet.iceberg_resolution_test' '','write.format.default','parquet ' '','iceberg.catalog ','hadoop.catalog ' diff --git a/testdata/workloads/functional-query/queries/QueryTest/multiple-filesystems.test b/testdata/workloads/functional-query/queries/QueryTest/multiple-filesystems.test index 75195052f..5e9425acc 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/multiple-filesystems.test +++ b/testdata/workloads/functional-query/queries/QueryTest/multiple-filesystems.test @@ -120,14 +120,14 @@ INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIM # Note: intentionally not using $SECONDARY_FILESYSTEM so that the partition points # to the default filesystem. alter table alltypes add partition(year=2010, month=3) -location '/test-warehouse/alltypes_parquet/year=2010/month=3' +location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/alltypes_parquet/year=2010/month=3' ---- RESULTS ==== ---- QUERY # Note: intentionally not using $SECONDARY_FILESYSTEM so that the partition points # to the default filesystem. alter table alltypes add partition(year=2010, month=4) -location '/test-warehouse/alltypes_parquet/year=2010/month=4' +location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/alltypes_parquet/year=2010/month=4' ---- RESULTS ==== ---- QUERY @@ -187,7 +187,7 @@ INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIM # Note: intentionally not using $SECONDARY_FILESYSTEM so that the partition points # to the default filesystem. alter table alltypes add partition(year=2009, month=5) -location '/test-warehouse/alltypes_parquet/year=2009/month=5' +location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/alltypes_parquet/year=2009/month=5' ---- RESULTS ==== ---- QUERY @@ -199,7 +199,7 @@ location '$SECONDARY_FILESYSTEM/multi_fs_tests/$DATABASE.db/alltypes_parquet/yea # Note: intentionally not using $SECONDARY_FILESYSTEM so that the partition points # to the default filesystem. alter table alltypes add partition(year=2010, month=5) -location '/test-warehouse/alltypes_parquet/year=2010/month=5' +location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/alltypes_parquet/year=2010/month=5' ---- RESULTS ==== ---- QUERY @@ -306,7 +306,7 @@ year=2009/month=1: 310 # ADD PARTITION on the default filesystem. # Point to unique database so we don't overwrite someone else's data. alter table alltypes_multipart_insert add partition(year=2009, month=2) -location '/test-warehouse/$DATABASE.db/alltypes_multipart_insert/year=2009/month=2' +location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/alltypes_multipart_insert/year=2009/month=2' ---- RESULTS ==== ---- QUERY diff --git a/testdata/workloads/functional-query/queries/QueryTest/partition-col-types.test b/testdata/workloads/functional-query/queries/QueryTest/partition-col-types.test index 136d4be89..9d1610e47 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/partition-col-types.test +++ b/testdata/workloads/functional-query/queries/QueryTest/partition-col-types.test @@ -12,7 +12,7 @@ PARTITIONED BY ( float_col FLOAT, double_col DOUBLE, string_col STRING -) LOCATION '/test-warehouse/$DATABASE.db/all_insert_partition_col_types' +) LOCATION '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/all_insert_partition_col_types' ==== ---- QUERY DESCRIBE all_insert_partition_col_types @@ -193,13 +193,13 @@ STRING, STRING, STRING ALTER TABLE all_partition_col_types ADD PARTITION (bool_col=FALSE, tinyint_col=1, smallint_col=1, int_col=1, bigint_col=10, float_col=0, double_col=1.1, string_col='1') -LOCATION '/test-warehouse/$DATABASE.db/all_insert_partition_col_types/tinyint_col=1/smallint_col=1/int_col=1/bigint_col=10/float_col=0/double_col=1.1/string_col=1/' +LOCATION '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/all_insert_partition_col_types/tinyint_col=1/smallint_col=1/int_col=1/bigint_col=10/float_col=0/double_col=1.1/string_col=1/' ==== ---- QUERY ALTER TABLE all_partition_col_types ADD PARTITION (bool_col=TRUE, tinyint_col=2, smallint_col=2, int_col=2, bigint_col=20, float_col=0, double_col=1.1, string_col='2') -LOCATION '/test-warehouse/$DATABASE.db/all_insert_partition_col_types/tinyint_col=2/smallint_col=2/int_col=2/bigint_col=20/float_col=0/double_col=1.1/string_col=2/' +LOCATION '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/all_insert_partition_col_types/tinyint_col=2/smallint_col=2/int_col=2/bigint_col=20/float_col=0/double_col=1.1/string_col=2/' ==== ---- QUERY EXPLAIN diff --git a/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test b/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test index 81c67d378..ed1809229 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test +++ b/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test @@ -652,7 +652,7 @@ TBLPROPERTIES('write.format.default'='parquet', 'write.parquet.page-size-bytes'='65536', 'write.parquet.dict-size-bytes'='131072', 'iceberg.catalog'='hadoop.catalog', - 'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test') + 'iceberg.catalog_location'='$$warehouse$$/hadoop_catalog_test') ---- RESULTS-HIVE-3 CREATE EXTERNAL TABLE show_create_table_test_db.iceberg_test2 ( level STRING NULL @@ -667,7 +667,7 @@ TBLPROPERTIES ('external.table.purge'='TRUE', 'write.parquet.dict-size-bytes'='131072', 'engine.hive.enabled'='true', 'iceberg.catalog'='hadoop.catalog', - 'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test') + 'iceberg.catalog_location'='$$warehouse$$/hadoop_catalog_test') ==== ---- CREATE_TABLE CREATE TABLE iceberg_test3 ( @@ -681,7 +681,7 @@ TBLPROPERTIES('write.format.default'='parquet', 'write.parquet.page-size-bytes'='65536', 'write.parquet.dict-size-bytes'='131072', 'iceberg.catalog'='hadoop.catalog', - 'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test', + 'iceberg.catalog_location'='$$warehouse$$/hadoop_catalog_test', 'iceberg.table_identifier'='org.my_db.my_table') ---- RESULTS-HIVE-3 CREATE EXTERNAL TABLE show_create_table_test_db.iceberg_test3 ( @@ -697,7 +697,7 @@ TBLPROPERTIES ('external.table.purge'='TRUE', 'write.parquet.dict-size-bytes'='131072', 'engine.hive.enabled'='true', 'iceberg.catalog'='hadoop.catalog', - 'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test', + 'iceberg.catalog_location'='$$warehouse$$/hadoop_catalog_test', 'iceberg.table_identifier'='org.my_db.my_table') ==== ---- CREATE_TABLE @@ -725,7 +725,7 @@ TBLPROPERTIES('write.format.default'='parquet', 'write.parquet.page-size-bytes'='65536', 'write.parquet.dict-size-bytes'='131072', 'iceberg.catalog'='hadoop.catalog', - 'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test') + 'iceberg.catalog_location'='$$warehouse$$/hadoop_catalog_test') ---- RESULTS-HIVE-3 CREATE EXTERNAL TABLE show_create_table_test_db.iceberg_test1_partitioned ( level STRING NULL, @@ -753,7 +753,7 @@ TBLPROPERTIES ('external.table.purge'='TRUE', 'write.parquet.dict-size-bytes'='131072', 'engine.hive.enabled'='true', 'iceberg.catalog'='hadoop.catalog', - 'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test') + 'iceberg.catalog_location'='$$warehouse$$/hadoop_catalog_test') ==== ---- CREATE_TABLE CREATE TABLE iceberg_test_orc ( diff --git a/tests/authorization/test_ranger.py b/tests/authorization/test_ranger.py index 23c5a1e73..41630efaf 100644 --- a/tests/authorization/test_ranger.py +++ b/tests/authorization/test_ranger.py @@ -32,6 +32,7 @@ from tests.common.test_dimensions import (create_client_protocol_dimension, create_exec_option_dimension, create_orc_dimension) from tests.util.hdfs_util import NAMENODE from tests.util.calculation_util import get_random_id +from tests.util.filesystem_utils import WAREHOUSE_PREFIX ADMIN = "admin" RANGER_AUTH = ("admin", "admin") @@ -285,7 +286,7 @@ class TestRanger(CustomClusterTestSuite): admin_client.execute("revoke {0} on server from user {1}".format(privilege, user)) def _test_show_grant_basic(self, admin_client, kw, id, unique_database, unique_table): - uri = '/tmp' + uri = WAREHOUSE_PREFIX + '/tmp' try: # Grant server privileges and verify admin_client.execute("grant all on server to {0} {1}".format(kw, id), user=ADMIN) @@ -306,7 +307,7 @@ class TestRanger(CustomClusterTestSuite): result = self.client.execute("show grant {0} {1} on uri '{2}'" .format(kw, id, uri)) TestRanger._check_privileges(result, [ - [kw, id, "", "", "", "{0}{1}".format(NAMENODE, uri), "", "all", "false"]]) + [kw, id, "", "", "", "{0}{1}".format(NAMENODE, '/tmp'), "", "all", "false"]]) # Revoke uri privileges and verify admin_client.execute("revoke all on uri '{0}' from {1} {2}" diff --git a/tests/common/file_utils.py b/tests/common/file_utils.py index de276cae7..fe10a5230 100644 --- a/tests/common/file_utils.py +++ b/tests/common/file_utils.py @@ -21,9 +21,10 @@ import os import re +import tempfile from subprocess import check_call -from tests.util.filesystem_utils import get_fs_path +from tests.util.filesystem_utils import get_fs_path, WAREHOUSE_PREFIX def create_iceberg_table_from_directory(impala_client, unique_database, table_name, @@ -38,6 +39,15 @@ def create_iceberg_table_from_directory(impala_client, unique_database, table_na os.environ['IMPALA_HOME'], 'testdata/data/iceberg_test/{0}'.format(table_name)) assert os.path.isdir(local_dir) + # If using a prefix, rewrite iceberg metadata to use the prefix + if WAREHOUSE_PREFIX: + tmp_dir = tempfile.mktemp(table_name) + check_call(['cp', '-r', local_dir, tmp_dir]) + rewrite = os.path.join( + os.environ['IMPALA_HOME'], 'testdata/bin/rewrite-iceberg-metadata.py') + check_call([rewrite, WAREHOUSE_PREFIX, os.path.join(tmp_dir, 'metadata')]) + local_dir = tmp_dir + # Put the directory in the database's directory (not the table directory) hdfs_parent_dir = get_fs_path("/test-warehouse") @@ -47,7 +57,7 @@ def create_iceberg_table_from_directory(impala_client, unique_database, table_na check_call(['hdfs', 'dfs', '-rm', '-f', '-r', hdfs_dir]) # Note: -d skips a staging copy - check_call(['hdfs', 'dfs', '-put', '-d', local_dir, hdfs_parent_dir]) + check_call(['hdfs', 'dfs', '-put', '-d', local_dir, hdfs_dir]) # Create external table qualified_table_name = '{0}.{1}'.format(unique_database, table_name) diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py index b31f207b2..bfa926dff 100644 --- a/tests/common/impala_test_suite.py +++ b/tests/common/impala_test_suite.py @@ -491,7 +491,8 @@ class ImpalaTestSuite(BaseTestSuite): "MANAGED_WAREHOUSE_DIR", "EXTERNAL_WAREHOUSE_DIR"]) repl.update({ - '$SECONDARY_FILESYSTEM': os.environ.get("SECONDARY_FILESYSTEM", ""), + '$SECONDARY_FILESYSTEM': os.getenv("SECONDARY_FILESYSTEM", ""), + '$WAREHOUSE_LOCATION_PREFIX': os.getenv("WAREHOUSE_LOCATION_PREFIX", ""), '$USER': getuser()}) if use_db: diff --git a/tests/custom_cluster/test_startup_filesystem_checks.py b/tests/custom_cluster/test_startup_filesystem_checks.py index 87b8a954a..4d2da9dfe 100644 --- a/tests/custom_cluster/test_startup_filesystem_checks.py +++ b/tests/custom_cluster/test_startup_filesystem_checks.py @@ -25,6 +25,7 @@ import tempfile from impala_py_lib.helpers import find_all_files, is_core_dump from tests.common.file_utils import assert_file_in_dir_contains from tests.common.custom_cluster_test_suite import CustomClusterTestSuite +from tests.util.filesystem_utils import get_fs_path LOG = logging.getLogger('test_startup_filesystem_checks') @@ -38,12 +39,19 @@ class TestStartupFilesystemChecks(CustomClusterTestSuite): setup_method(). """ - NONEXISTENT_PATH = "/nonexistent_path" - NONDIRECTORY_PATH = "/test-warehouse/alltypes/year=2009/month=1/090101.txt" - VALID_SUBDIRECTORY = "/test-warehouse" + # Use get_fs_path because testdata in Ozone requires a volume prefix and does not + # accept underscore as a bucket name (the first element after volume prefix). + NONEXISTENT_PATH = get_fs_path("/nonexistent-path") + NONDIRECTORY_PATH = \ + get_fs_path("/test-warehouse/alltypes/year=2009/month=1/090101.txt") + VALID_SUBDIRECTORY = get_fs_path("/test-warehouse") # Test multiple valid directories along with an empty entry - MULTIPLE_VALID_DIRECTORIES = \ - "/,/test-warehouse/zipcode_incomes,,/test-warehouse/alltypes" + MULTIPLE_VALID_DIRECTORIES = ",".join([ + "/", + get_fs_path("/test-warehouse/zipcode_incomes"), + "", + get_fs_path("/test-warehouse/alltypes")] + ) LOG_DIR = tempfile.mkdtemp(prefix="test_startup_filesystem_checks_", dir=os.getenv("LOG_DIR")) MINIDUMP_PATH = tempfile.mkdtemp() diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py index 241feebd5..e4fd873a4 100644 --- a/tests/metadata/test_ddl.py +++ b/tests/metadata/test_ddl.py @@ -38,6 +38,7 @@ from tests.common.test_vector import ImpalaTestDimension from tests.util.filesystem_utils import ( get_fs_path, WAREHOUSE, + WAREHOUSE_PREFIX, IS_HDFS, IS_S3, IS_ADLS, @@ -47,8 +48,11 @@ from tests.common.impala_cluster import ImpalaCluster from tests.util.filesystem_utils import FILESYSTEM_PREFIX -TRASH_PATH = ('.Trash/{0}/Current' if IS_OZONE else 'user/{0}/.Trash/Current').\ - format(getpass.getuser()) +def get_trash_path(bucket, path): + if IS_OZONE: + return get_fs_path('/{0}/.Trash/{1}/Current{2}/{0}/{3}'.format(bucket, + getpass.getuser(), WAREHOUSE_PREFIX, path)) + return '/user/{0}/.Trash/Current/{1}/{2}'.format(getpass.getuser(), bucket, path) # Validates DDL statements (create, drop) class TestDdlStatements(TestDdlBase): @@ -59,20 +63,16 @@ class TestDdlStatements(TestDdlBase): self.client.execute("create table {0}.t1(i int)".format(unique_database)) self.client.execute("create table {0}.t2(i int)".format(unique_database)) # Create sample test data files under the table directories - self.filesystem_client.create_file("test-warehouse/{0}.db/t1/t1.txt".\ - format(unique_database), file_data='t1') - self.filesystem_client.create_file("test-warehouse/{0}.db/t2/t2.txt".\ - format(unique_database), file_data='t2') + dbpath = "{0}/{1}.db".format(WAREHOUSE, unique_database) + self.filesystem_client.create_file("{}/t1/t1.txt".format(dbpath), file_data='t1') + self.filesystem_client.create_file("{}/t2/t2.txt".format(dbpath), file_data='t2') # Drop the table (without purge) and make sure it exists in trash self.client.execute("drop table {0}.t1".format(unique_database)) - assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/t1.txt".\ - format(unique_database)) - assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/".\ - format(unique_database)) - assert self.filesystem_client.exists( - '{0}/test-warehouse/{1}.db/t1/t1.txt'.format(TRASH_PATH, unique_database)) - assert self.filesystem_client.exists( - '{0}/test-warehouse/{1}.db/t1'.format(TRASH_PATH, unique_database)) + assert not self.filesystem_client.exists("{}/t1/t1.txt".format(dbpath)) + assert not self.filesystem_client.exists("{}/t1/".format(dbpath)) + trash = get_trash_path("test-warehouse", unique_database + ".db") + assert self.filesystem_client.exists("{}/t1/t1.txt".format(trash)) + assert self.filesystem_client.exists("{}/t1".format(trash)) # Drop the table (with purge) and make sure it doesn't exist in trash self.client.execute("drop table {0}.t2 purge".format(unique_database)) if not IS_S3 and not IS_ADLS: @@ -82,27 +82,20 @@ class TestDdlStatements(TestDdlBase): # consistent. # The ADLS Python client is not strongly consistent, so these files may still be # visible after a DROP. (Remove after IMPALA-5335 is resolved) - assert not self.filesystem_client.exists("test-warehouse/{0}.db/t2/".\ - format(unique_database)) - assert not self.filesystem_client.exists("test-warehouse/{0}.db/t2/t2.txt".\ - format(unique_database)) - assert not self.filesystem_client.exists( - '{0}/test-warehouse/{1}.db/t2/t2.txt'.format(TRASH_PATH, unique_database)) - assert not self.filesystem_client.exists( - '{0}/test-warehouse/{1}.db/t2'.format(TRASH_PATH, unique_database)) + assert not self.filesystem_client.exists("{}/t2/".format(dbpath)) + assert not self.filesystem_client.exists("{}/t2/t2.txt".format(dbpath)) + assert not self.filesystem_client.exists("{}/t2/t2.txt".format(trash)) + assert not self.filesystem_client.exists("{}/t2".format(trash)) # Create an external table t3 and run the same test as above. Make # sure the data is not deleted - self.filesystem_client.make_dir( - "test-warehouse/{0}.db/data_t3/".format(unique_database), permission=777) + self.filesystem_client.make_dir("{}/data_t3/".format(dbpath), permission=777) self.filesystem_client.create_file( - "test-warehouse/{0}.db/data_t3/data.txt".format(unique_database), file_data='100') + "{}/data_t3/data.txt".format(dbpath), file_data='100') self.client.execute("create external table {0}.t3(i int) stored as " - "textfile location \'/test-warehouse/{0}.db/data_t3\'" .format(unique_database)) + "textfile location \'{1}/data_t3\'".format(unique_database, dbpath)) self.client.execute("drop table {0}.t3 purge".format(unique_database)) - assert self.filesystem_client.exists( - "test-warehouse/{0}.db/data_t3/data.txt".format(unique_database)) - self.filesystem_client.delete_file_dir( - "test-warehouse/{0}.db/data_t3".format(unique_database), recursive=True) + assert self.filesystem_client.exists("{}/data_t3/data.txt".format(dbpath)) + self.filesystem_client.delete_file_dir("{}/data_t3".format(dbpath), recursive=True) @SkipIfFS.eventually_consistent @SkipIfLocal.hdfs_client @@ -110,24 +103,24 @@ class TestDdlStatements(TestDdlBase): self.client.execute('use default') # Verify the db directory exists assert self.filesystem_client.exists( - "test-warehouse/{0}.db/".format(unique_database)) + "{1}/{0}.db/".format(unique_database, WAREHOUSE)) self.client.execute("create table {0}.t1(i int)".format(unique_database)) # Verify the table directory exists assert self.filesystem_client.exists( - "test-warehouse/{0}.db/t1/".format(unique_database)) + "{1}/{0}.db/t1/".format(unique_database, WAREHOUSE)) # Dropping the table removes the table's directory and preserves the db's directory self.client.execute("drop table {0}.t1".format(unique_database)) assert not self.filesystem_client.exists( - "test-warehouse/{0}.db/t1/".format(unique_database)) + "{1}/{0}.db/t1/".format(unique_database, WAREHOUSE)) assert self.filesystem_client.exists( - "test-warehouse/{0}.db/".format(unique_database)) + "{1}/{0}.db/".format(unique_database, WAREHOUSE)) # Dropping the db removes the db's directory self.client.execute("drop database {0}".format(unique_database)) assert not self.filesystem_client.exists( - "test-warehouse/{0}.db/".format(unique_database)) + "{1}/{0}.db/".format(unique_database, WAREHOUSE)) # Dropping the db using "cascade" removes all tables' and db's directories # but keeps the external tables' directory @@ -138,17 +131,17 @@ class TestDdlStatements(TestDdlBase): "location '{1}/{0}/t3/'".format(unique_database, WAREHOUSE)) self.client.execute("drop database {0} cascade".format(unique_database)) assert not self.filesystem_client.exists( - "test-warehouse/{0}.db/".format(unique_database)) + "{1}/{0}.db/".format(unique_database, WAREHOUSE)) assert not self.filesystem_client.exists( - "test-warehouse/{0}.db/t1/".format(unique_database)) + "{1}/{0}.db/t1/".format(unique_database, WAREHOUSE)) assert not self.filesystem_client.exists( - "test-warehouse/{0}.db/t2/".format(unique_database)) + "{1}/{0}.db/t2/".format(unique_database, WAREHOUSE)) assert self.filesystem_client.exists( - "test-warehouse/{0}/t3/".format(unique_database)) + "{1}/{0}/t3/".format(unique_database, WAREHOUSE)) self.filesystem_client.delete_file_dir( - "test-warehouse/{0}/t3/".format(unique_database), recursive=True) + "{1}/{0}/t3/".format(unique_database, WAREHOUSE), recursive=True) assert not self.filesystem_client.exists( - "test-warehouse/{0}/t3/".format(unique_database)) + "{1}/{0}/t3/".format(unique_database, WAREHOUSE)) # Re-create database to make unique_database teardown succeed. self._create_db(unique_database) @@ -157,12 +150,12 @@ class TestDdlStatements(TestDdlBase): def test_truncate_cleans_hdfs_files(self, unique_database): # Verify the db directory exists assert self.filesystem_client.exists( - "test-warehouse/{0}.db/".format(unique_database)) + "{1}/{0}.db/".format(unique_database, WAREHOUSE)) self.client.execute("create table {0}.t1(i int)".format(unique_database)) # Verify the table directory exists assert self.filesystem_client.exists( - "test-warehouse/{0}.db/t1/".format(unique_database)) + "{1}/{0}.db/t1/".format(unique_database, WAREHOUSE)) try: # If we're testing S3, we want the staging directory to be created. @@ -170,31 +163,31 @@ class TestDdlStatements(TestDdlBase): # Should have created one file in the table's dir self.client.execute("insert into {0}.t1 values (1)".format(unique_database)) assert len(self.filesystem_client.ls( - "test-warehouse/{0}.db/t1/".format(unique_database))) == 2 + "{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))) == 2 # Truncating the table removes the data files and preserves the table's directory self.client.execute("truncate table {0}.t1".format(unique_database)) assert len(self.filesystem_client.ls( - "test-warehouse/{0}.db/t1/".format(unique_database))) == 1 + "{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))) == 1 self.client.execute( "create table {0}.t2(i int) partitioned by (p int)".format(unique_database)) # Verify the table directory exists assert self.filesystem_client.exists( - "test-warehouse/{0}.db/t2/".format(unique_database)) + "{1}/{0}.db/t2/".format(unique_database, WAREHOUSE)) # Should have created the partition dir, which should contain exactly one file self.client.execute( "insert into {0}.t2 partition(p=1) values (1)".format(unique_database)) assert len(self.filesystem_client.ls( - "test-warehouse/{0}.db/t2/p=1".format(unique_database))) == 1 + "{1}/{0}.db/t2/p=1".format(unique_database, WAREHOUSE))) == 1 # Truncating the table removes the data files and preserves the partition's directory self.client.execute("truncate table {0}.t2".format(unique_database)) assert self.filesystem_client.exists( - "test-warehouse/{0}.db/t2/p=1".format(unique_database)) + "{1}/{0}.db/t2/p=1".format(unique_database, WAREHOUSE)) assert len(self.filesystem_client.ls( - "test-warehouse/{0}.db/t2/p=1".format(unique_database))) == 0 + "{1}/{0}.db/t2/p=1".format(unique_database, WAREHOUSE))) == 0 finally: # Reset to its default value. self.client.execute("set s3_skip_insert_staging=true") @@ -455,11 +448,10 @@ class TestDdlStatements(TestDdlBase): # use the (key=value) format. The directory is automatically cleanup up # by the unique_database fixture. self.client.execute("create table {0}.part_data (i int)".format(unique_database)) - assert self.filesystem_client.exists( - "test-warehouse/{0}.db/part_data".format(unique_database)) + dbpath = "{1}/{0}.db".format(unique_database, WAREHOUSE) + assert self.filesystem_client.exists("{}/part_data".format(dbpath)) self.filesystem_client.create_file( - "test-warehouse/{0}.db/part_data/data.txt".format(unique_database), - file_data='1984') + "{}/part_data/data.txt".format(dbpath), file_data='1984') self.run_test_case('QueryTest/alter-table', vector, use_db=unique_database, multiple_impalad=self._use_multiple_impalad(vector)) @@ -483,20 +475,16 @@ class TestDdlStatements(TestDdlBase): # Add two partitions (j=1) and (j=2) to table t1 self.client.execute("alter table {0}.t1 add partition(j=1)".format(unique_database)) self.client.execute("alter table {0}.t1 add partition(j=2)".format(unique_database)) - self.filesystem_client.create_file(\ - "test-warehouse/{0}.db/t1/j=1/j1.txt".format(unique_database), file_data='j1') - self.filesystem_client.create_file(\ - "test-warehouse/{0}.db/t1/j=2/j2.txt".format(unique_database), file_data='j2') + dbpath = "{1}/{0}.db".format(unique_database, WAREHOUSE) + self.filesystem_client.create_file("{}/t1/j=1/j1.txt".format(dbpath), file_data='j1') + self.filesystem_client.create_file("{}/t1/j=2/j2.txt".format(dbpath), file_data='j2') # Drop the partition (j=1) without purge and make sure it exists in trash self.client.execute("alter table {0}.t1 drop partition(j=1)".format(unique_database)) - assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/j=1/j1.txt".\ - format(unique_database)) - assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/j=1".\ - format(unique_database)) - assert self.filesystem_client.exists( - '{0}/test-warehouse/{1}.db/t1/j=1/j1.txt'.format(TRASH_PATH, unique_database)) - assert self.filesystem_client.exists( - '{0}/test-warehouse/{1}.db/t1/j=1'.format(TRASH_PATH, unique_database)) + assert not self.filesystem_client.exists("{}/t1/j=1/j1.txt".format(dbpath)) + assert not self.filesystem_client.exists("{}/t1/j=1".format(dbpath)) + trash = get_trash_path("test-warehouse", unique_database + ".db") + assert self.filesystem_client.exists('{}/t1/j=1/j1.txt'.format(trash)) + assert self.filesystem_client.exists('{}/t1/j=1'.format(trash)) # Drop the partition (with purge) and make sure it doesn't exist in trash self.client.execute("alter table {0}.t1 drop partition(j=2) purge".\ format(unique_database)); @@ -507,14 +495,10 @@ class TestDdlStatements(TestDdlBase): # consistent. # The ADLS Python client is not strongly consistent, so these files may still be # visible after a DROP. (Remove after IMPALA-5335 is resolved) - assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/j=2/j2.txt".\ - format(unique_database)) - assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/j=2".\ - format(unique_database)) - assert not self.filesystem_client.exists( - '{0}/test-warehouse/{1}.db/t1/j=2/j2.txt'.format(TRASH_PATH, unique_database)) - assert not self.filesystem_client.exists( - '{0}/test-warehouse/{1}.db/t1/j=2'.format(TRASH_PATH, unique_database)) + assert not self.filesystem_client.exists("{}/t1/j=2/j2.txt".format(dbpath)) + assert not self.filesystem_client.exists("{}/t1/j=2".format(dbpath)) + assert not self.filesystem_client.exists('{}/t1/j=2/j2.txt'.format(trash)) + assert not self.filesystem_client.exists('{}/t1/j=2'.format(trash)) @UniqueDatabase.parametrize(sync_ddl=True) def test_views_ddl(self, vector, unique_database): @@ -751,8 +735,8 @@ class TestDdlStatements(TestDdlBase): tbl_name = "test_tbl" self.execute_query_expect_success(self.client, "create table {0}.{1} (c1 string)" .format(unique_database, tbl_name)) - self.filesystem_client.create_file("test-warehouse/{0}.db/{1}/f". - format(unique_database, tbl_name), + self.filesystem_client.create_file("{2}/{0}.db/{1}/f". + format(unique_database, tbl_name, WAREHOUSE), file_data="\nfoo\n") self.execute_query_expect_success(self.client, "alter table {0}.{1} set tblproperties" diff --git a/tests/metadata/test_explain.py b/tests/metadata/test_explain.py index bc408db68..f19d68014 100644 --- a/tests/metadata/test_explain.py +++ b/tests/metadata/test_explain.py @@ -210,8 +210,8 @@ class TestExplainEmptyPartition(ImpalaTestSuite): "ALTER TABLE %s.empty_partition ADD PARTITION (p=NULL)" % self.TEST_DB_NAME) # Put an empty file in the partition so we have > 0 files, but 0 rows self.filesystem_client.create_file( - "test-warehouse/%s.db/empty_partition/p=__HIVE_DEFAULT_PARTITION__/empty" % - self.TEST_DB_NAME, "") + "{1}/{0}.db/empty_partition/p=__HIVE_DEFAULT_PARTITION__/empty". + format(self.TEST_DB_NAME, WAREHOUSE), "") self.client.execute("REFRESH %s.empty_partition" % self.TEST_DB_NAME) self.client.execute("COMPUTE STATS %s.empty_partition" % self.TEST_DB_NAME) assert "NULL\t0\t1" in str( @@ -224,8 +224,8 @@ class TestExplainEmptyPartition(ImpalaTestSuite): # that its lack of stats is correctly identified self.client.execute( "ALTER TABLE %s.empty_partition ADD PARTITION (p=1)" % self.TEST_DB_NAME) - self.filesystem_client.create_file("test-warehouse/%s.db/empty_partition/p=1/rows" % - self.TEST_DB_NAME, "1") + self.filesystem_client.create_file( + "{1}/{0}.db/empty_partition/p=1/rows".format(self.TEST_DB_NAME, WAREHOUSE), "1") self.client.execute("REFRESH %s.empty_partition" % self.TEST_DB_NAME) explain_result = str( self.client.execute("EXPLAIN SELECT * FROM %s.empty_partition" % self.TEST_DB_NAME)) diff --git a/tests/metadata/test_load.py b/tests/metadata/test_load.py index d2ec060df..c9e537215 100644 --- a/tests/metadata/test_load.py +++ b/tests/metadata/test_load.py @@ -28,13 +28,13 @@ from tests.common.test_dimensions import ( create_uncompressed_text_dimension) from tests.common.skip import SkipIfLocal from tests.common.test_vector import ImpalaTestDimension -from tests.util.filesystem_utils import (WAREHOUSE) +from tests.util.filesystem_utils import WAREHOUSE TEST_TBL_PART = "test_load" TEST_TBL_NOPART = "test_load_nopart" -STAGING_PATH = 'test-warehouse/test_load_staging' -ALLTYPES_PATH = "test-warehouse/alltypes/year=2010/month=1/100101.txt" -MULTIAGG_PATH = 'test-warehouse/alltypesaggmultifiles/year=2010/month=1/day=1' +STAGING_PATH = '%s/test_load_staging' % WAREHOUSE +ALLTYPES_PATH = "%s/alltypes/year=2010/month=1/100101.txt" % WAREHOUSE +MULTIAGG_PATH = '%s/alltypesaggmultifiles/year=2010/month=1/day=1' % WAREHOUSE HIDDEN_FILES = ["{0}/3/.100101.txt".format(STAGING_PATH), "{0}/3/_100101.txt".format(STAGING_PATH)] @@ -155,7 +155,7 @@ class TestAsyncLoadData(ImpalaTestSuite): # Create a table with the staging path self.client.execute("create table {0} like functional.alltypesnopart \ - location \'/{1}\'".format(qualified_table_name, staging_path)) + location \'{1}\'".format(qualified_table_name, staging_path)) try: @@ -171,7 +171,7 @@ class TestAsyncLoadData(ImpalaTestSuite): enable_async_load_data delay = "CRS_DELAY_BEFORE_LOAD_DATA:SLEEP@3000" new_vector.get_value('exec_option')['debug_action'] = "{0}".format(delay) - load_stmt = "load data inpath \'/{1}\' \ + load_stmt = "load data inpath \'{1}\' \ into table {0}".format(qualified_table_name, staging_path) exec_start = time.time() handle = self.execute_query_async_using_client(client, load_stmt, new_vector) diff --git a/tests/metadata/test_recover_partitions.py b/tests/metadata/test_recover_partitions.py index d2319f226..40b6343d0 100644 --- a/tests/metadata/test_recover_partitions.py +++ b/tests/metadata/test_recover_partitions.py @@ -55,7 +55,7 @@ class TestRecoverPartitions(ImpalaTestSuite): create_uncompressed_text_dimension(cls.get_workload())) def __get_fs_location(self, db_name, table_name): - return 'test-warehouse/%s.db/%s/' % (db_name, table_name) + return '%s/%s.db/%s/' % (WAREHOUSE, db_name, table_name) @SkipIfLocal.hdfs_client def test_recover_partitions(self, vector, unique_database): diff --git a/tests/metadata/test_recursive_listing.py b/tests/metadata/test_recursive_listing.py index 7b8b0d14b..eb4d0e86f 100644 --- a/tests/metadata/test_recursive_listing.py +++ b/tests/metadata/test_recursive_listing.py @@ -86,36 +86,33 @@ class TestRecursiveListing(ImpalaTestSuite): fq_tbl_name, part_path = self._init_test_table(unique_database, partitioned) # Add a file inside a nested directory and refresh. - self.filesystem_client.make_dir("{0}/dir1".format(part_path[1:])) - self.filesystem_client.create_file("{0}/dir1/file1.txt".format(part_path[1:]), - "file1") + self.filesystem_client.make_dir("{0}/dir1".format(part_path)) + self.filesystem_client.create_file("{0}/dir1/file1.txt".format(part_path), "file1") self.execute_query_expect_success(self.client, "refresh {0}".format(fq_tbl_name)) assert len(self._show_files(fq_tbl_name)) == 1 assert len(self._get_rows(fq_tbl_name)) == 1 # Add another file inside the same directory, make sure it shows up. - self.filesystem_client.create_file("{0}/dir1/file2.txt".format(part_path[1:]), - "file2") + self.filesystem_client.create_file("{0}/dir1/file2.txt".format(part_path), "file2") self.execute_query_expect_success(self.client, "refresh {0}".format(fq_tbl_name)) assert len(self._show_files(fq_tbl_name)) == 2 assert len(self._get_rows(fq_tbl_name)) == 2 # Add a file at the top level, make sure it shows up. - self.filesystem_client.create_file("{0}/file3.txt".format(part_path[1:]), - "file3") + self.filesystem_client.create_file("{0}/file3.txt".format(part_path), "file3") self.execute_query_expect_success(self.client, "refresh {0}".format(fq_tbl_name)) assert len(self._show_files(fq_tbl_name)) == 3 assert len(self._get_rows(fq_tbl_name)) == 3 # Create files in the nested hidden directories and refresh. Make sure it does not # show up - self.filesystem_client.make_dir("{0}/.hive-staging".format(part_path[1:])) + self.filesystem_client.make_dir("{0}/.hive-staging".format(part_path)) self.filesystem_client.create_file( - "{0}/.hive-staging/file3.txt".format(part_path[1:]), + "{0}/.hive-staging/file3.txt".format(part_path), "data-should-be-ignored-by-impala") - self.filesystem_client.make_dir("{0}/_tmp.base_000000_1".format(part_path[1:])) + self.filesystem_client.make_dir("{0}/_tmp.base_000000_1".format(part_path)) self.filesystem_client.create_file( - "{0}/_tmp.base_000000_1/000000_0.manifest".format(part_path[1:]), + "{0}/_tmp.base_000000_1/000000_0.manifest".format(part_path), "manifest-file_contents") self.execute_query_expect_success(self.client, "refresh {0}".format(fq_tbl_name)) assert len(self._show_files(fq_tbl_name)) == 3 @@ -135,8 +132,7 @@ class TestRecursiveListing(ImpalaTestSuite): assert len(self._get_rows(fq_tbl_name)) == 3 # Remove the dir with two files. One should remain. - self.filesystem_client.delete_file_dir("{0}/dir1".format(part_path[1:]), - recursive=True) + self.filesystem_client.delete_file_dir("{0}/dir1".format(part_path), recursive=True) self.execute_query_expect_success(self.client, "refresh {0}".format(fq_tbl_name)) assert len(self._show_files(fq_tbl_name)) == 1 assert len(self._get_rows(fq_tbl_name)) == 1 @@ -201,18 +197,15 @@ class TestRecursiveListing(ImpalaTestSuite): response = requests.get(self.enable_fs_tracing_url) assert response.status_code == requests.codes.ok try: - # self.filesystem_client is a DelegatingHdfsClient. It delegates delete_file_dir() - # and make_dir() to the underlying PyWebHdfsClient which expects the HDFS path - # without a leading '/'. So we use large_dir[1:] to remove the leading '/'. - self.filesystem_client.delete_file_dir(large_dir[1:], recursive=True) - self.filesystem_client.make_dir(large_dir[1:]) + self.filesystem_client.delete_file_dir(large_dir, recursive=True) + self.filesystem_client.make_dir(large_dir) self.filesystem_client.touch(files) LOG.info("created staging files under " + large_dir) handle = self.execute_query_async(refresh_stmt) # Wait a moment to let REFRESH finish expected partial listing on the dir. time.sleep(pause_ms_before_file_cleanup / 1000.0) LOG.info("removing staging dir " + large_dir) - self.filesystem_client.delete_file_dir(large_dir[1:], recursive=True) + self.filesystem_client.delete_file_dir(large_dir, recursive=True) LOG.info("removed staging dir " + large_dir) try: self.client.fetch(refresh_stmt, handle) diff --git a/tests/metadata/test_show_create_table.py b/tests/metadata/test_show_create_table.py index b1309c635..f7fabf155 100644 --- a/tests/metadata/test_show_create_table.py +++ b/tests/metadata/test_show_create_table.py @@ -24,6 +24,7 @@ from tests.common.skip import SkipIf, SkipIfHive2 from tests.common.test_dimensions import create_uncompressed_text_dimension from tests.util.test_file_parser import QueryTestSectionReader, remove_comments from tests.common.environ import HIVE_MAJOR_VERSION +from tests.util.filesystem_utils import WAREHOUSE # The purpose of the show create table tests are to ensure that the "SHOW CREATE TABLE" @@ -105,7 +106,7 @@ class TestShowCreateTable(ImpalaTestSuite): if not test_case.existing_table: # create table in Impala - self.__exec(test_case.create_table_sql) + self.__exec(self.__replace_warehouse(test_case.create_table_sql)) # execute "SHOW CREATE TABLE ..." result = self.__exec(test_case.show_create_table_sql) create_table_result = self.__normalize(result.data[0]) @@ -115,9 +116,9 @@ class TestShowCreateTable(ImpalaTestSuite): self.__exec(test_case.drop_table_sql) # check the result matches the expected result - expected_result = self.__normalize(self.__replace_uri( + expected_result = self.__normalize(self.__replace_warehouse(self.__replace_uri( test_case.expected_result, - self.__get_location_uri(create_table_result))) + self.__get_location_uri(create_table_result)))) self.__compare_result(expected_result, create_table_result) if test_case.existing_table: @@ -205,6 +206,9 @@ class TestShowCreateTable(ImpalaTestSuite): def __replace_uri(self, s, uri): return s if uri is None else s.replace("$$location_uri$$", uri) + def __replace_warehouse(self, s): + return s.replace("$$warehouse$$", WAREHOUSE) + # Represents one show-create-table test case. Performs validation of the test sections # and provides SQL to execute for each section. diff --git a/tests/metadata/test_testcase_builder.py b/tests/metadata/test_testcase_builder.py index 161200b66..30a271404 100644 --- a/tests/metadata/test_testcase_builder.py +++ b/tests/metadata/test_testcase_builder.py @@ -40,7 +40,7 @@ class TestTestcaseBuilder(ImpalaTestSuite): tmp_path = get_fs_path("/tmp") # Make sure /tmp dir exists if not self.filesystem_client.exists(tmp_path): - self.filesystem_client.make_dir('tmp') + self.filesystem_client.make_dir(tmp_path) # Generate Testcase Data for query without table reference testcase_generate_query = """COPY TESTCASE TO '%s' SELECT 5 * 20""" % tmp_path result = self.execute_query_expect_success(self.client, testcase_generate_query) @@ -48,7 +48,7 @@ class TestTestcaseBuilder(ImpalaTestSuite): # Check file exists testcase_path = str(result.data)[1: -1] - index = testcase_path.index('/tmp') + index = testcase_path.index(tmp_path) hdfs_path = testcase_path[index:-1] assert self.filesystem_client.exists(hdfs_path), \ "File not generated {0}".format(hdfs_path) diff --git a/tests/query_test/test_compressed_formats.py b/tests/query_test/test_compressed_formats.py index 8ef3ecd42..5d8bb17b7 100644 --- a/tests/query_test/test_compressed_formats.py +++ b/tests/query_test/test_compressed_formats.py @@ -27,7 +27,7 @@ from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.test_dimensions import create_single_exec_option_dimension from tests.common.test_result_verifier import verify_query_result_is_equal from tests.common.test_vector import ImpalaTestDimension -from tests.util.filesystem_utils import get_fs_path +from tests.util.filesystem_utils import get_fs_path, WAREHOUSE # (file extension, table suffix) pairs @@ -65,14 +65,14 @@ class TestCompressedFormatsBase(ImpalaTestSuite): new table. Unless expected_error is set, it expects the query to run successfully. """ # Calculate locations for the source table - base_dir = '/test-warehouse' + base_dir = WAREHOUSE src_table = "functional{0}.{1}".format(db_suffix, table_name) src_table_dir = join(base_dir, table_name + db_suffix) file_basename = self.filesystem_client.ls(src_table_dir)[0] src_file = join(src_table_dir, file_basename) # Calculate locations for the destination table - dest_table_dir = "/test-warehouse/{0}.db/{1}".format(unique_database, table_name) + dest_table_dir = "{2}/{0}.db/{1}".format(unique_database, table_name, WAREHOUSE) dest_table = "{0}.{1}".format(unique_database, table_name) dest_file = join(dest_table_dir, file_basename + dest_extension) @@ -240,7 +240,7 @@ class TestLargeCompressedFile(ImpalaTestSuite): # is generated from a string of 50176 bytes. payload_size = 50176 hdfs_cat = subprocess.Popen(["hadoop", "fs", "-cat", - "/test-warehouse/compressed_payload.snap"], stdout=subprocess.PIPE) + "%s/compressed_payload.snap" % WAREHOUSE], stdout=subprocess.PIPE) compressed_payload = hdfs_cat.stdout.read() compressed_size = len(compressed_payload) hdfs_cat.stdout.close() diff --git a/tests/query_test/test_insert_behaviour.py b/tests/query_test/test_insert_behaviour.py index 0e6580141..5b20759c0 100644 --- a/tests/query_test/test_insert_behaviour.py +++ b/tests/query_test/test_insert_behaviour.py @@ -17,6 +17,7 @@ import getpass import grp +import os import pwd import pytest import re @@ -25,7 +26,7 @@ from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.parametrize import UniqueDatabase from tests.common.skip import (SkipIfFS, SkipIfLocal, SkipIfDockerizedCluster, SkipIfCatalogV2) -from tests.util.filesystem_utils import WAREHOUSE, get_fs_path, IS_S3 +from tests.util.filesystem_utils import WAREHOUSE, IS_S3 @SkipIfLocal.hdfs_client @@ -52,8 +53,8 @@ class TestInsertBehaviour(ImpalaTestSuite): @pytest.mark.execute_serially def test_insert_removes_staging_files(self): TBL_NAME = "insert_overwrite_nopart" - insert_staging_dir = ("test-warehouse/functional.db/%s/" - "_impala_insert_staging" % TBL_NAME) + insert_staging_dir = ("%s/functional.db/%s/" + "_impala_insert_staging" % (WAREHOUSE, TBL_NAME)) self.filesystem_client.delete_file_dir(insert_staging_dir, recursive=True) self.client.execute("INSERT OVERWRITE functional.%s " "SELECT int_col FROM functional.tinyinttable" % TBL_NAME) @@ -64,7 +65,7 @@ class TestInsertBehaviour(ImpalaTestSuite): def test_insert_preserves_hidden_files(self): """Test that INSERT OVERWRITE preserves hidden files in the root table directory""" TBL_NAME = "insert_overwrite_nopart" - table_dir = "test-warehouse/functional.db/%s/" % TBL_NAME + table_dir = "%s/functional.db/%s/" % (WAREHOUSE, TBL_NAME) hidden_file_locations = [".hidden", "_hidden"] dir_locations = ["dir", ".hidden_dir"] @@ -103,8 +104,10 @@ class TestInsertBehaviour(ImpalaTestSuite): def test_insert_alter_partition_location(self, unique_database): """Test that inserts after changing the location of a partition work correctly, including the creation of a non-existant partition dir""" - part_dir = "tmp/{0}".format(unique_database) - qualified_part_dir = get_fs_path('/' + part_dir) + # Moved to WAREHOUSE for Ozone because it does not allow rename between buckets. + work_dir = "%s/tmp" % WAREHOUSE + self.filesystem_client.make_dir(work_dir) + part_dir = os.path.join(work_dir, unique_database) table_name = "`{0}`.`insert_alter_partition_location`".format(unique_database) self.execute_query_expect_success(self.client, "DROP TABLE IF EXISTS %s" % table_name) @@ -119,7 +122,7 @@ class TestInsertBehaviour(ImpalaTestSuite): self.execute_query_expect_success( self.client, "ALTER TABLE %s PARTITION(p=1) SET LOCATION '%s'" % (table_name, - qualified_part_dir)) + part_dir)) self.execute_query_expect_success( self.client, "INSERT OVERWRITE %s PARTITION(p=1) VALUES(1)" % table_name) @@ -492,7 +495,7 @@ class TestInsertBehaviour(ImpalaTestSuite): "should {1}exist but does {2}exist.".format( path, '' if should_exist else 'not ', 'not ' if should_exist else '') - db_path = "test-warehouse/%s.db/" % self.TEST_DB_NAME + db_path = "%s/%s.db/" % (WAREHOUSE, self.TEST_DB_NAME) table_path = db_path + "test_insert_empty_result" partition_path = "{0}/year=2009/month=1".format(table_path) check_path_exists(table_path, False) @@ -600,14 +603,13 @@ class TestInsertBehaviour(ImpalaTestSuite): if self.exploration_strategy() != 'exhaustive' and IS_S3: pytest.skip("only runs in exhaustive") table = "{0}.insert_clustered".format(unique_database) - table_path = "test-warehouse/{0}.db/insert_clustered".format(unique_database) - table_location = get_fs_path("/" + table_path) + table_path = "{1}/{0}.db/insert_clustered".format(unique_database, WAREHOUSE) create_stmt = """create table {0} like functional.alltypes""".format(table) self.execute_query_expect_success(self.client, create_stmt) set_location_stmt = """alter table {0} set location '{1}'""".format( - table, table_location) + table, table_path) self.execute_query_expect_success(self.client, set_location_stmt) # Setting a lower batch size will result in multiple row batches being written. @@ -635,8 +637,7 @@ class TestInsertBehaviour(ImpalaTestSuite): if self.exploration_strategy() != 'exhaustive': pytest.skip("only runs in exhaustive") table = "{0}.insert_clustered".format(unique_database) - table_path = "test-warehouse/{0}.db/insert_clustered".format(unique_database) - table_location = get_fs_path("/" + table_path) + table_path = "{1}/{0}.db/insert_clustered".format(unique_database, WAREHOUSE) create_stmt = """create table {0} ( l_orderkey BIGINT, @@ -659,7 +660,7 @@ class TestInsertBehaviour(ImpalaTestSuite): self.execute_query_expect_success(self.client, create_stmt) set_location_stmt = """alter table {0} set location '{1}'""".format( - table, table_location) + table, table_path) self.execute_query_expect_success(self.client, set_location_stmt) # Setting a lower parquet file size will result in multiple files being written. diff --git a/tests/query_test/test_insert_parquet.py b/tests/query_test/test_insert_parquet.py index 563d090ef..b9231a43c 100644 --- a/tests/query_test/test_insert_parquet.py +++ b/tests/query_test/test_insert_parquet.py @@ -32,7 +32,7 @@ from tests.common.skip import SkipIfEC, SkipIfFS, SkipIfLocal from tests.common.test_dimensions import create_exec_option_dimension from tests.common.test_result_verifier import verify_query_result_is_equal from tests.common.test_vector import ImpalaTestDimension -from tests.util.filesystem_utils import get_fs_path +from tests.util.filesystem_utils import get_fs_path, WAREHOUSE from tests.util.get_parquet_metadata import (decode_stats_value, get_parquet_metadata_from_hdfs_folder) @@ -328,8 +328,8 @@ class TestHdfsParquetTableWriter(ImpalaTestSuite): to have columns with different signed integer logical types. The test verifies that parquet file written by the hdfs parquet table writer using the generated file has the same column type metadata as the generated one.""" - hdfs_path = (os.environ['DEFAULT_FS'] + "/test-warehouse/{0}.db/" - "signed_integer_logical_types.parquet").format(unique_database) + hdfs_path = "{1}/{0}.db/signed_integer_logical_types.parquet".\ + format(unique_database, WAREHOUSE) self.filesystem_client.copy_from_local(os.environ['IMPALA_HOME'] + '/testdata/data/signed_integer_logical_types.parquet', hdfs_path) # Create table with signed integer logical types diff --git a/tests/query_test/test_observability.py b/tests/query_test/test_observability.py index f68f98f32..a92347f88 100644 --- a/tests/query_test/test_observability.py +++ b/tests/query_test/test_observability.py @@ -20,7 +20,7 @@ from datetime import datetime from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.skip import SkipIfFS, SkipIfLocal, SkipIfNotHdfsMinicluster -from tests.util.filesystem_utils import IS_EC +from tests.util.filesystem_utils import IS_EC, WAREHOUSE from time import sleep from RuntimeProfile.ttypes import TRuntimeProfileFormat import pytest @@ -424,7 +424,7 @@ class TestObservability(ImpalaTestSuite): def test_query_profile_contains_all_events(self, unique_database): """Test that the expected events show up in a query profile for various queries""" # make a data file to load data from - path = "test-warehouse/{0}.db/data_file".format(unique_database) + path = "{1}/{0}.db/data_file".format(unique_database, WAREHOUSE) self.filesystem_client.create_file(path, "1") use_query = "use {0}".format(unique_database) self.execute_query(use_query) @@ -441,7 +441,7 @@ class TestObservability(ImpalaTestSuite): 'explain select * from impala_6568', 'describe impala_6568', 'alter table impala_6568 set tblproperties(\'numRows\'=\'10\')', - "load data inpath '/{0}' into table impala_6568".format(path) + "load data inpath '{0}' into table impala_6568".format(path) ] # run each query... for query in queries: diff --git a/tests/query_test/test_runtime_filters.py b/tests/query_test/test_runtime_filters.py index f4f51f0bb..e48289bd0 100644 --- a/tests/query_test/test_runtime_filters.py +++ b/tests/query_test/test_runtime_filters.py @@ -29,6 +29,7 @@ from tests.common.skip import SkipIfEC, SkipIfLocal, SkipIfFS from tests.common.test_dimensions import add_exec_option_dimension from tests.common.test_vector import ImpalaTestDimension from tests.verifiers.metric_verifier import MetricVerifier +from tests.util.filesystem_utils import WAREHOUSE # slow_build_timeout is set to 200000 to avoid failures like IMPALA-8064 where the # runtime filters don't arrive in time. @@ -320,12 +321,10 @@ class TestOverlapMinMaxFilters(ImpalaTestSuite): tbl_name = "part_col_in_data_file" self.execute_query("CREATE TABLE {0}.{1} (i INT) PARTITIONED BY (d DATE) " "STORED AS PARQUET".format(unique_database, tbl_name)) - tbl_loc = self._get_table_location("{0}.{1}".format(unique_database, tbl_name), - vector) - self.filesystem_client.make_dir(tbl_loc[tbl_loc.find('test-warehouse'):] + - '/d=2022-02-22/') + tbl_loc = "%s/%s/%s/d=2022-02-22/" % (WAREHOUSE, unique_database, tbl_name) + self.filesystem_client.make_dir(tbl_loc) self.filesystem_client.copy_from_local(os.environ['IMPALA_HOME'] + - '/testdata/data/partition_col_in_parquet.parquet', tbl_loc + '/d=2022-02-22/') + '/testdata/data/partition_col_in_parquet.parquet', tbl_loc) self.execute_query("ALTER TABLE {0}.{1} RECOVER PARTITIONS".format( unique_database, tbl_name)) self.execute_query("SET PARQUET_FALLBACK_SCHEMA_RESOLUTION=NAME") diff --git a/tests/util/filesystem_utils.py b/tests/util/filesystem_utils.py index 15ce54891..a9f0fff74 100644 --- a/tests/util/filesystem_utils.py +++ b/tests/util/filesystem_utils.py @@ -82,3 +82,4 @@ def get_secondary_fs_path(path): WAREHOUSE = get_fs_path('/test-warehouse') FILESYSTEM_NAME = get_fs_name(FILESYSTEM) +WAREHOUSE_PREFIX = os.getenv("WAREHOUSE_LOCATION_PREFIX") diff --git a/tests/util/hdfs_util.py b/tests/util/hdfs_util.py index 50004d663..9afdf640c 100644 --- a/tests/util/hdfs_util.py +++ b/tests/util/hdfs_util.py @@ -67,9 +67,11 @@ class DelegatingHdfsClient(BaseFilesystem): super(DelegatingHdfsClient, self).__init__() def create_file(self, path, file_data, overwrite=True): + path = self._normalize_path(path) return self.webhdfs_client.create_file(path, file_data, overwrite=overwrite) def make_dir(self, path, permission=None): + path = self._normalize_path(path) if permission: return self.webhdfs_client.make_dir(path, permission=permission) else: @@ -82,18 +84,23 @@ class DelegatingHdfsClient(BaseFilesystem): self.hdfs_filesystem_client.copy_from_local(src, dst) def ls(self, path): + path = self._normalize_path(path) return self.webhdfs_client.ls(path) def exists(self, path): + path = self._normalize_path(path) return self.webhdfs_client.exists(path) def delete_file_dir(self, path, recursive=False): + path = self._normalize_path(path) return self.webhdfs_client.delete_file_dir(path, recursive=recursive) def get_file_dir_status(self, path): + path = self._normalize_path(path) return self.webhdfs_client.get_file_dir_status(path) def get_all_file_sizes(self, path): + path = self._normalize_path(path) return self.webhdfs_client.get_all_file_sizes(path) def chmod(self, path, permission): @@ -111,6 +118,11 @@ class DelegatingHdfsClient(BaseFilesystem): def touch(self, paths): return self.hdfs_filesystem_client.touch(paths) + def _normalize_path(self, path): + """Paths passed in may include a leading slash. Remove it as the underlying + PyWebHdfsClient expects the HDFS path without a leading '/'.""" + return path[1:] if path.startswith('/') else path + class PyWebHdfsClientWithChmod(PyWebHdfsClient): def chmod(self, path, permission): """Set the permission of 'path' to 'permission' (specified as an octal string, e.g. @@ -212,20 +224,18 @@ class HadoopFsCommandLineClient(BaseFilesystem): def create_file(self, path, file_data, overwrite=True): """Creates a temporary file with the specified file_data on the local filesystem, then puts it into the specified path.""" - fixed_path = self._normalize_path(path) - if not overwrite and self.exists(fixed_path): return False + if not overwrite and self.exists(path): return False with tempfile.NamedTemporaryFile(delete=False) as tmp_file: tmp_file.write(file_data) put_cmd_params = ['-put', '-d'] if overwrite: put_cmd_params.append('-f') - put_cmd_params.extend([tmp_file.name, fixed_path]) + put_cmd_params.extend([tmp_file.name, path]) (status, stdout, stderr) = self._hadoop_fs_shell(put_cmd_params) return status == 0 def make_dir(self, path, permission=None): """Create a directory at the specified path. Permissions are not supported.""" - fixed_path = self._normalize_path(path) - (status, stdout, stderr) = self._hadoop_fs_shell(['-mkdir', '-p', fixed_path]) + (status, stdout, stderr) = self._hadoop_fs_shell(['-mkdir', '-p', path]) return status == 0 def copy(self, src, dst, overwrite=False): @@ -233,11 +243,9 @@ class HadoopFsCommandLineClient(BaseFilesystem): 'Skip[s] creation of temporary file with the suffix ._COPYING_.' to avoid extraneous copies on S3. If overwrite is true, the destination file is overwritten, set to false by default for backwards compatibility.""" - fixed_src = self._normalize_path(src) - fixed_dst = self._normalize_path(dst) cp_cmd_params = ['-cp', '-d'] if overwrite: cp_cmd_params.append('-f') - cp_cmd_params.extend([fixed_src, fixed_dst]) + cp_cmd_params.extend([src, dst]) (status, stdout, stderr) = self._hadoop_fs_shell(cp_cmd_params) assert status == 0, \ '{0} copy failed: '.format(self.filesystem_type) + stderr + "; " + stdout @@ -259,8 +267,7 @@ class HadoopFsCommandLineClient(BaseFilesystem): def _inner_ls(self, path): """List names, lengths, and mode for files/directories under the specified path.""" - fixed_path = self._normalize_path(path) - (status, stdout, stderr) = self._hadoop_fs_shell(['-ls', fixed_path]) + (status, stdout, stderr) = self._hadoop_fs_shell(['-ls', path]) # Trim the "Found X items" line and trailing new-line entries = stdout.split("\n")[1:-1] files = [] @@ -275,9 +282,8 @@ class HadoopFsCommandLineClient(BaseFilesystem): def ls(self, path): """Returns a list of all file and directory names in 'path'""" - fixed_path = self._normalize_path(path) files = [] - for f in self._inner_ls(fixed_path): + for f in self._inner_ls(path): fname = os.path.basename(f['name']) if not fname == '': files += [fname] @@ -285,31 +291,23 @@ class HadoopFsCommandLineClient(BaseFilesystem): def exists(self, path): """Checks if a particular path exists""" - fixed_path = self._normalize_path(path) - (status, stdout, stderr) = self._hadoop_fs_shell(['-test', '-e', fixed_path]) + (status, stdout, stderr) = self._hadoop_fs_shell(['-test', '-e', path]) return status == 0 def delete_file_dir(self, path, recursive=False): """Delete the file or directory given by the specified path. Recursive must be true for directories.""" - fixed_path = self._normalize_path(path) - rm_command = ['-rm', fixed_path] + rm_command = ['-rm', path] if recursive: - rm_command = ['-rm', '-r', fixed_path] + rm_command = ['-rm', '-r', path] (status, stdout, stderr) = self._hadoop_fs_shell(rm_command) return status == 0 def get_all_file_sizes(self, path): """Returns a list of integers which are all the file sizes of files found under 'path'.""" - fixed_path = self._normalize_path(path) return [f['length'] for f in - self._inner_ls(fixed_path) if f['mode'][0] == "-"] - - def _normalize_path(self, path): - """Paths passed in may lack a leading slash. This adds a leading slash if it is - missing.""" - return path if path.startswith('/') else '/' + path + self._inner_ls(path) if f['mode'][0] == "-"] def touch(self, paths): """Updates the access and modification times of the files specified by 'paths' to
