This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 3585c7e216f [test](parquet)append parquet reader byte_array_decimal
and rle_bool case (#26751)
3585c7e216f is described below
commit 3585c7e216fdde7447b4736fa3a5454cc732fcf9
Author: daidai <[email protected]>
AuthorDate: Tue Nov 14 15:05:10 2023 +0800
[test](parquet)append parquet reader byte_array_decimal and rle_bool case
(#26751)
---
.../exec/format/parquet/parquet_column_convert.h | 18 ++++-
.../hive/scripts/create_preinstalled_table.hql | 14 ++++
.../parquet_decimal_bool/null.parquet | Bin 0 -> 4298 bytes
.../parquet_decimal_bool/null2.parquet | Bin 0 -> 8572 bytes
.../parquet_decimal_bool/test.parquet | Bin 0 -> 621 bytes
.../parquet_decimal_bool/test_copy_1.parquet | Bin 0 -> 1701 bytes
.../parquet_decimal_bool/test_copy_2.parquet | Bin 0 -> 1719 bytes
.../hive/test_hive_basic_type.out | 81 +++++++++++++++++++++
.../hive/test_hive_basic_type.groovy | 23 ++++++
9 files changed, 135 insertions(+), 1 deletion(-)
diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h
b/be/src/vec/exec/format/parquet/parquet_column_convert.h
index 6cf3cfb6c50..8054f9b88e6 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.h
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h
@@ -619,7 +619,23 @@ inline Status get_converter(tparquet::Type::type
parquet_physical_type, Primitiv
DecimalScaleParams::NO_SCALE>>(); \
}
\
}
\
- } else if (tparquet::Type::INT32 == parquet_physical_type) {
\
+ } else if (tparquet::Type::BYTE_ARRAY == parquet_physical_type) {
\
+
convert_params->init_decimal_converter<PRIMARY_TYPE>(dst_data_type);
\
+ using ValueCopyType = DECIMAL_TYPE::NativeType;
\
+ if (scale_params.scale_type == DecimalScaleParams::SCALE_UP) {
\
+ *converter = std::make_unique<StringToDecimal<DECIMAL_TYPE,
ValueCopyType, \
+
DecimalScaleParams::SCALE_UP>>(); \
+ } else if (scale_params.scale_type ==
DecimalScaleParams::SCALE_DOWN) { \
+ *converter = std::make_unique<StringToDecimal<DECIMAL_TYPE,
ValueCopyType, \
+
DecimalScaleParams::SCALE_DOWN>>(); \
+ } else {
\
+ *converter = std::make_unique<StringToDecimal<DECIMAL_TYPE,
ValueCopyType, \
+
DecimalScaleParams::NO_SCALE>>(); \
+ }
\
+
\
+ }
\
+
\
+ else if (tparquet::Type::INT32 == parquet_physical_type) {
\
if (scale_params.scale_type == DecimalScaleParams::SCALE_UP) {
\
*converter = std::make_unique<NumberToDecimal<Int32,
PRIMARY_TYPE, int64_t, \
DecimalScaleParams::SCALE_UP>>(); \
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
index 740bf2fa584..a6d5c212a12 100644
---
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
+++
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
@@ -1762,6 +1762,20 @@ LOCATION
msck repair table orc_decimal_table;
+CREATE TABLE `parquet_decimal_bool`(
+ decimals decimal(20,3),
+ bool_rle boolean
+)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+LOCATION
+ '/user/doris/preinstalled_data/parquet_table/parquet_decimal_bool';
+
+msck repair table partition_table;
show tables;
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/null.parquet
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/null.parquet
new file mode 100644
index 00000000000..060bdf23bce
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/null.parquet
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/null2.parquet
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/null2.parquet
new file mode 100644
index 00000000000..6279cdd873f
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/null2.parquet
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test.parquet
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test.parquet
new file mode 100644
index 00000000000..dbee42cece3
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test.parquet
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test_copy_1.parquet
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test_copy_1.parquet
new file mode 100644
index 00000000000..5ca4106056a
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test_copy_1.parquet
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test_copy_2.parquet
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test_copy_2.parquet
new file mode 100644
index 00000000000..e1463724f4f
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test_copy_2.parquet
differ
diff --git
a/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
b/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
index 0398050f02b..94de65a4979 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
@@ -181,3 +181,84 @@ test DATETIME(6) Yes true \N
\N \N \N \N \N \N \N \N \N \N
test test
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
\N \N \N \N \N \N \N \N \N \N
test test
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
+-- !parquet --
+219
+
+-- !parquet1 --
+-7871.416 false
+-7871.416 false
+-7871.416 false
+-7871.416 true
+-7871.416 true
+-7871.416 true
+-7871.416 true
+
+-- !parquet2 --
+123.456
+123.456
+123.456
+123.456
+123.456
+123.456
+123.456
+
+-- !parquet3 --
+123.456
+123.456
+123.456
+123.456
+123.456
+123.456
+123.456
+
+-- !parquet4 --
+-2633.645
+-2633.645
+-2633.645
+-2633.645
+-2633.645
+-2633.645
+-2633.645
+
+-- !parquet5 --
+-7871.416
+-7871.416
+-7871.416
+-7871.416
+-7871.416
+-7871.416
+-7871.416
+
+-- !parquet7 --
+true
+true
+true
+true
+true
+true
+true
+
+-- !parquet8 --
+true
+true
+true
+true
+true
+true
+true
+
+-- !parquet9 --
+116
+
+-- !parquet10 --
+123
+
+-- !parquet11 --
+123.456
+123.456
+123.456
+123.456
+123.456
+123.456
+123.456
+
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_basic_type.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_basic_type.groovy
index bf58eb163c7..84720216fe0 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_basic_type.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_basic_type.groovy
@@ -108,6 +108,29 @@ suite("test_hive_basic_type",
"external_docker,hive,external_docker_hive,p0,exte
// orc_all_types_t predicate test
order_qt_41 """select * from
${catalog_name}.${ex_db_name}.orc_all_types_t where t_int = 3;"""
+ //test parquet byte_array_decimal and rle_bool
+ order_qt_parquet """ select count(*) from
${catalog_name}.${ex_db_name}.parquet_decimal_bool """
+ order_qt_parquet1 """ select * from
${catalog_name}.${ex_db_name}.parquet_decimal_bool
+ where decimals is not null and bool_rle is not null order by
decimals,bool_rle limit 7 """
+ order_qt_parquet2 """ select decimals from
${catalog_name}.${ex_db_name}.parquet_decimal_bool
+ where decimals is not null and decimals > 1 order by decimals
limit 7 """
+ order_qt_parquet3 """ select decimals from
${catalog_name}.${ex_db_name}.parquet_decimal_bool
+ where decimals = 123.456 order by decimals limit 7 """
+ order_qt_parquet4 """ select decimals from
${catalog_name}.${ex_db_name}.parquet_decimal_bool
+ where decimals != -7871.416 and decimals is not null order by
decimals limit 7 """
+
+ order_qt_parquet5 """ select decimals from
${catalog_name}.${ex_db_name}.parquet_decimal_bool
+ where decimals is not null and decimals < 0 order by decimals
limit 7 """
+
+ order_qt_parquet7 """ select bool_rle from
${catalog_name}.${ex_db_name}.parquet_decimal_bool
+ where bool_rle is not null and bool_rle = 1 limit 7 """
+ order_qt_parquet8 """ select bool_rle from
${catalog_name}.${ex_db_name}.parquet_decimal_bool
+ where bool_rle is not null and bool_rle = 1 limit 7 """
+ order_qt_parquet9 """ select count(bool_rle) from
${catalog_name}.${ex_db_name}.parquet_decimal_bool; """
+ order_qt_parquet10 """ select count(decimals) from
${catalog_name}.${ex_db_name}.parquet_decimal_bool; """
+ order_qt_parquet11 """ select decimals from
${catalog_name}.${ex_db_name}.parquet_decimal_bool
+ where decimals is not null and decimals > 1 order by decimals
limit 7 """
+
//sql """drop catalog if exists ${catalog_name} """
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]