This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 1b7b6bc6fbb258d985665f9f79bc3901e13ac2d4 Author: Qi Chen <kaka11.c...@gmail.com> AuthorDate: Thu Aug 17 23:26:11 2023 +0800 [Fix](orc-reader) Fix filling partition or missing column used incorrect row count. (#23096) [Fix](orc-reader) Fix filling partition or missing column used incorrect row count. `_row_reader->nextBatch` returns number of read rows. When orc lazy materialization is turned on, the number of read rows includes filtered rows, so caller must look at `numElements` in the row batch to determine how many rows were not filtered which will to fill to the block. In this case, filling partition or missing column used incorrect row count which will cause be crash by `filter.size() != offsets.size()` in filter column step. When orc lazy materialization is turned off, add `_convert_dict_cols_to_string_cols(block, nullptr)` if `(block->rows() == 0)`. --- be/src/vec/exec/format/orc/vorc_reader.cpp | 14 +++--- .../hive/test_external_catalog_hive_partition.out | 50 ++++++++++++++++++++++ .../test_external_catalog_hive_partition.groovy | 2 + 3 files changed, 61 insertions(+), 5 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 1dd000bd0f..a1b1aa34b5 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -1413,8 +1413,10 @@ Status OrcReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { } *read_rows = rr; - RETURN_IF_ERROR(_fill_partition_columns(block, rr, _lazy_read_ctx.partition_columns)); - RETURN_IF_ERROR(_fill_missing_columns(block, rr, _lazy_read_ctx.missing_columns)); + RETURN_IF_ERROR(_fill_partition_columns(block, _batch->numElements, + _lazy_read_ctx.partition_columns)); + RETURN_IF_ERROR( + _fill_missing_columns(block, _batch->numElements, _lazy_read_ctx.missing_columns)); if (block->rows() == 0) { *eof = true; @@ -1487,16 +1489,18 @@ Status OrcReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { } *read_rows = rr; + RETURN_IF_ERROR(_fill_partition_columns(block, _batch->numElements, + _lazy_read_ctx.partition_columns)); RETURN_IF_ERROR( - _fill_partition_columns(block, *read_rows, _lazy_read_ctx.partition_columns)); - RETURN_IF_ERROR(_fill_missing_columns(block, *read_rows, _lazy_read_ctx.missing_columns)); + _fill_missing_columns(block, _batch->numElements, _lazy_read_ctx.missing_columns)); if (block->rows() == 0) { + _convert_dict_cols_to_string_cols(block, nullptr); *eof = true; return Status::OK(); } - _build_delete_row_filter(block, rr); + _build_delete_row_filter(block, _batch->numElements); std::vector<uint32_t> columns_to_filter; int column_to_keep = block->columns(); diff --git a/regression-test/data/external_table_p2/hive/test_external_catalog_hive_partition.out b/regression-test/data/external_table_p2/hive/test_external_catalog_hive_partition.out index 5608999eb5..c823189e68 100644 --- a/regression-test/data/external_table_p2/hive/test_external_catalog_hive_partition.out +++ b/regression-test/data/external_table_p2/hive/test_external_catalog_hive_partition.out @@ -23,6 +23,31 @@ -- !q06 -- 2023-01-03T00:00 100 0.3 test3 +-- !q07 -- +1994 50063846 1820677 +1995 58220229 1820677 +1995 66859335 1820677 +1997 77350500 1820677 +1995 98899109 1820677 +1996 122310373 1820677 +1996 138664326 1820677 +1995 145803300 1820677 +1998 187514084 1820677 +1994 197627203 1820677 +1993 216217095 1820677 +1997 260737890 1820677 +1998 279581856 1820677 +1992 296560224 1820677 +1993 306190854 1820677 +1997 329189126 1820677 +1992 389043491 1820677 +1997 435247522 1820677 +1998 449388167 1820677 +1994 526241665 1820677 +1998 533034534 1820677 +1996 576018657 1820677 +1997 582732039 1820677 + -- !q01 -- 0.1 test1 2023-01-01T00:00 \N 0.2 test2 2023-01-02T00:00 \N @@ -47,6 +72,31 @@ -- !q06 -- 2023-01-03T00:00 100 0.3 test3 +-- !q07 -- +1994 50063846 1820677 +1995 58220229 1820677 +1995 66859335 1820677 +1997 77350500 1820677 +1995 98899109 1820677 +1996 122310373 1820677 +1996 138664326 1820677 +1995 145803300 1820677 +1998 187514084 1820677 +1994 197627203 1820677 +1993 216217095 1820677 +1997 260737890 1820677 +1998 279581856 1820677 +1992 296560224 1820677 +1993 306190854 1820677 +1997 329189126 1820677 +1992 389043491 1820677 +1997 435247522 1820677 +1998 449388167 1820677 +1994 526241665 1820677 +1998 533034534 1820677 +1996 576018657 1820677 +1997 582732039 1820677 + -- !q01 -- 0.1 test1 2023-01-01T00:00 \N 0.2 test2 2023-01-02T00:00 \N diff --git a/regression-test/suites/external_table_p2/hive/test_external_catalog_hive_partition.groovy b/regression-test/suites/external_table_p2/hive/test_external_catalog_hive_partition.groovy index fc6e7fbc23..642121c22e 100644 --- a/regression-test/suites/external_table_p2/hive/test_external_catalog_hive_partition.groovy +++ b/regression-test/suites/external_table_p2/hive/test_external_catalog_hive_partition.groovy @@ -39,6 +39,7 @@ suite("test_external_catalog_hive_partition", "p2") { qt_q04 """ select * from multi_catalog.parquet_partitioned_columns order by t_float """ qt_q05 """ select * from multi_catalog.parquet_partitioned_columns where t_int is null order by t_float """ qt_q06 """ select * from multi_catalog.parquet_partitioned_columns where t_int is not null order by t_float """ + qt_q07 """ select o_orderyear, o_orderkey, o_custkey from multi_catalog.orders_par_parquet where o_custkey=1820677 order by o_orderkey """ } // test orc format def q01_orc = { @@ -48,6 +49,7 @@ suite("test_external_catalog_hive_partition", "p2") { qt_q04 """ select * from multi_catalog.orc_partitioned_columns order by t_float """ qt_q05 """ select * from multi_catalog.orc_partitioned_columns where t_int is null order by t_float """ qt_q06 """ select * from multi_catalog.orc_partitioned_columns where t_int is not null order by t_float """ + qt_q07 """ select o_orderyear, o_orderkey, o_custkey from multi_catalog.orders_par_orc where o_custkey=1820677 order by o_orderkey """ } // test text format def q01_text = { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org