This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 68cfef6d67c [fix](parquet)fix when hive_parquet_use_column_names=false
&& read partition tb cause be core. (#49966)
68cfef6d67c is described below
commit 68cfef6d67ce588c2b079c53a2204a471c9acf93
Author: daidai <[email protected]>
AuthorDate: Mon Apr 14 09:22:04 2025 +0800
[fix](parquet)fix when hive_parquet_use_column_names=false && read
partition tb cause be core. (#49966)
### What problem does this PR solve?
related pr : #38432
Problem Summary:
when you query hive parquet format partition table, and `set
hive_parquet_use_column_names = false`, maybe you will get :
```
*** SIGABRT unknown detail explain (@0x2f59de) received by PID 3103198 (TID
3110278 OR 0x7f51c8e63640) from PID 3103198; stack trace: ***
0# doris::signal::(anonymous namespace)::FailureSignalHandler(int,
siginfo_t*, void*) at
/home/zcp/repo_center/doris_master/doris/be/src/common/signal_handler.h:421
1# 0x00007F55DFB45520 in /lib/x86_64-linux-gnu/libc.so.6
2# pthread_kill at ./nptl/pthread_kill.c:89
3# raise at ../sysdeps/posix/raise.c:27
4# abort at ./stdlib/abort.c:81
5# __gnu_cxx::__verbose_terminate_handler() [clone .cold] at
../../../../libstdc++-v3/libsupc++/vterminate.cc:75
6# __cxxabiv1::__terminate(void (*)()) at
../../../../libstdc++-v3/libsupc++/eh_terminate.cc:48
7# 0x000055C8BD4E2041 in
/mnt/disk1/doris-clusters/doris-master/output/be/lib/doris_be
8# 0x000055C8BD4E2194 in
/mnt/disk1/doris-clusters/doris-master/output/be/lib/doris_be
9# 0x000055C8BD4E2586 in
/mnt/disk1/doris-clusters/doris-master/output/be/lib/doris_be
10# std::__cxx11::basic_string<char, std::char_traits<char>,
std::allocator<char> >::_M_assign(std::__cxx11::basic_string<char,
std::char_traits<char>, std::allocator<char> > const&) at
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.tcc:265
11#
doris::vectorized::ParquetReader::get_next_block(doris::vectorized::Block*,
unsigned long*, bool*) at
/home/zcp/repo_center/doris_master/doris/be/src/vec/exec/format/parquet/vparquet_reader.cpp:586
````
The reason is that when `get_next_block` replaces the column name, data
out of bounds occurs.
---
be/src/vec/exec/format/parquet/vparquet_reader.cpp | 3 ++-
.../hive/test_external_catalog_hive_partition.out | Bin 2711 -> 4455 bytes
.../hive/test_external_catalog_hive_partition.groovy | 9 +++++++++
3 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index b91a7c21cbd..3083fd61ab0 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -561,6 +561,7 @@ Status ParquetReader::get_next_block(Block* block, size_t*
read_rows, bool* eof)
return Status::OK();
}
+ std::vector<std::string> original_block_column_name = block->get_names();
if (!_hive_use_column_names) {
for (auto i = 0; i < block->get_names().size(); i++) {
auto& col = block->get_by_position(i);
@@ -584,7 +585,7 @@ Status ParquetReader::get_next_block(Block* block, size_t*
read_rows, bool* eof)
if (!_hive_use_column_names) {
for (auto i = 0; i < block->columns(); i++) {
- block->get_by_position(i).name = (*_column_names)[i];
+ block->get_by_position(i).name = original_block_column_name[i];
}
block->initialize_index_by_name();
}
diff --git
a/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out
b/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out
index aa1e48a439d..0402feef40e 100644
Binary files
a/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out
and
b/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out
differ
diff --git
a/regression-test/suites/external_table_p0/hive/test_external_catalog_hive_partition.groovy
b/regression-test/suites/external_table_p0/hive/test_external_catalog_hive_partition.groovy
index 32b80f5650d..d34467c4c56 100644
---
a/regression-test/suites/external_table_p0/hive/test_external_catalog_hive_partition.groovy
+++
b/regression-test/suites/external_table_p0/hive/test_external_catalog_hive_partition.groovy
@@ -65,9 +65,18 @@ suite("test_external_catalog_hive_partition",
"p0,external,hive,external_docker,
qt_q06 """ select * from multi_catalog.text_partitioned_columns
where t_int is not null order by t_float """
}
sql """ use `multi_catalog`; """
+ sql """ set hive_parquet_use_column_names = true; """
+ sql """ set hive_orc_use_column_names = true"""
+
q01_parquet()
q01_orc()
q01_text()
+
+ sql """ set hive_parquet_use_column_names = false; """
+ sql """ set hive_orc_use_column_names = false"""
+ q01_parquet()
+ q01_orc()
+
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]