This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit e294be7707167ba261e5d9a58d3837a83dd0b7ff Author: stiga-huang <[email protected]> AuthorDate: Mon Jun 19 14:18:31 2023 +0800 IMPALA-12128: Bump ORC C++ version to 1.7.9-p10 This bumps the ORC C++ version from 1.7.0-p14 to 1.7.9-p10 to add the fixes of ORC-1041 and ORC-1304. Tests: - Add e2e test for ORC-1304. - It's hard to add test for ORC-1041 since it won't cause crashes when compiling with gcc-10. Change-Id: I26c39fe5b15ab0bcbe6b2af6fe7a45e48eaec6eb Reviewed-on: http://gerrit.cloudera.org:8080/20090 Reviewed-by: Joe McDonnell <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- bin/impala-config.sh | 4 +- testdata/data/README | 45 +++++++++++++++++++++ testdata/data/empty_present_stream.orc | Bin 0 -> 530 bytes .../functional/functional_schema_template.sql | 14 ++++++- .../datasets/functional/schema_constraints.csv | 3 ++ .../queries/QueryTest/orc-stats.test | 8 ++++ 6 files changed, 71 insertions(+), 3 deletions(-) diff --git a/bin/impala-config.sh b/bin/impala-config.sh index 644384e1f..cb4ca79a4 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -81,7 +81,7 @@ export USE_APACHE_HIVE=${USE_APACHE_HIVE-false} # moving to a different build of the toolchain, e.g. when a version is bumped or a # compile option is changed. The build id can be found in the output of the toolchain # build jobs, it is constructed from the build number and toolchain git hash prefix. -export IMPALA_TOOLCHAIN_BUILD_ID=295-d43043e809 +export IMPALA_TOOLCHAIN_BUILD_ID=296-f7e1d0d78b # Versions of toolchain dependencies. # ----------------------------------- export IMPALA_AVRO_VERSION=1.7.4-p5 @@ -148,7 +148,7 @@ export IMPALA_ZSTD_VERSION=1.5.2 unset IMPALA_ZSTD_URL export IMPALA_OPENLDAP_VERSION=2.4.47 unset IMPALA_OPENLDAP_URL -export IMPALA_ORC_VERSION=1.7.0-p14 +export IMPALA_ORC_VERSION=1.7.9-p10 unset IMPALA_ORC_URL export IMPALA_PROTOBUF_VERSION=3.14.0 unset IMPALA_PROTOBUF_URL diff --git a/testdata/data/README b/testdata/data/README index 0d4940d10..7c1560527 100644 --- a/testdata/data/README +++ b/testdata/data/README @@ -979,3 +979,48 @@ The following command was used: mvn -f "${IMPALA_HOME}/java/datagenerator/pom.xml" exec:java -Dexec.mainClass="org.apache.impala.datagenerator.RandomNestedDataGenerator" -Dexec.args="${input_table_schema}.avsc 1500000 15 '${output_file}.parquet'"; + +empty_present_stream.orc: +Generated by ORC C++ library using the following code + + size_t num = 500; + WriterOptions options; + options.setRowIndexStride(100); + auto stream = writeLocalFile("empty_present_stream.orc"); + std::unique_ptr<Type> type(Type::buildTypeFromString( + "struct<s1:struct<id:int>,s2:struct<id:int>>")); + + std::unique_ptr<Writer> writer = createWriter(*type, stream.get(), options); + + std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(num); + StructVectorBatch* structBatch = + dynamic_cast<StructVectorBatch*>(batch.get()); + StructVectorBatch* structBatch2 = + dynamic_cast<StructVectorBatch*>(structBatch->fields[0]); + LongVectorBatch* intBatch = + dynamic_cast<LongVectorBatch*>(structBatch2->fields[0]); + + StructVectorBatch* structBatch3 = + dynamic_cast<StructVectorBatch*>(structBatch->fields[1]); + LongVectorBatch* intBatch2 = + dynamic_cast<LongVectorBatch*>(structBatch3->fields[0]); + + structBatch->numElements = num; + structBatch2->numElements = num; + + structBatch3->numElements = num; + structBatch3->hasNulls = true; + + for (size_t i = 0; i < num; ++i) { + intBatch->data.data()[i] = i; + intBatch->notNull[i] = 1; + + intBatch2->notNull[i] = 0; + intBatch2->hasNulls = true; + + structBatch3->notNull[i] = 0; + } + intBatch->hasNulls = false; + + writer->add(*batch); + writer->close(); diff --git a/testdata/data/empty_present_stream.orc b/testdata/data/empty_present_stream.orc new file mode 100644 index 000000000..90695c009 Binary files /dev/null and b/testdata/data/empty_present_stream.orc differ diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql index 0bcf18fc4..cbaf445db 100644 --- a/testdata/datasets/functional/functional_schema_template.sql +++ b/testdata/datasets/functional/functional_schema_template.sql @@ -4202,4 +4202,16 @@ id bigint delimited ---- LOAD LOAD DATA LOCAL INPATH '{impala_home}/testdata/empty_parquet_page_source_impala10186/data.csv' OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name}; -==== \ No newline at end of file +==== +---- DATASET +functional +---- BASE_TABLE_NAME +empty_stream_tbl +---- COLUMNS +s1 struct<id:int> +s2 struct<id:int> +---- TABLE_PROPERTIES +transactional=false +---- DEPENDENT_LOAD +LOAD DATA LOCAL INPATH '{impala_home}/testdata/data/empty_present_stream.orc' OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name}; +==== diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv index f71b00de0..f567cac56 100644 --- a/testdata/datasets/functional/schema_constraints.csv +++ b/testdata/datasets/functional/schema_constraints.csv @@ -386,3 +386,6 @@ table_name:complextypestbl_parquet_v2_snappy, constraint:restrict_to, table_form # The table is used to test a specific parquet page layout bug table_name:empty_parquet_page_source_impala10186, constraint:restrict_to, table_format:text/none/none + +# The table is used as test coverage for ORC-1304 +table_name:empty_stream_tbl, constraint:restrict_to, table_format:orc/def/block diff --git a/testdata/workloads/functional-query/queries/QueryTest/orc-stats.test b/testdata/workloads/functional-query/queries/QueryTest/orc-stats.test index d43049c4d..14b14b00b 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/orc-stats.test +++ b/testdata/workloads/functional-query/queries/QueryTest/orc-stats.test @@ -856,3 +856,11 @@ select count(*) from functional_orc_def.decimal_tbl where d4 is null; ---- RUNTIME_PROFILE aggregation(SUM, RowsRead): 0 ==== +---- QUERY +# Regression test for ORC-1304 +select s2.id from functional_orc_def.empty_stream_tbl where s1.id = 100; +---- RESULTS +NULL +---- TYPES +INT +====
