This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new fda337ed1d6 [Fix](parquet-reader) Fix reading fixed length byte array decimal in parquet reader. (#30535) fda337ed1d6 is described below commit fda337ed1d637ddeab9534387b837bfdb82bcacd Author: Qi Chen <kaka11.c...@gmail.com> AuthorDate: Wed Jan 31 21:37:59 2024 +0800 [Fix](parquet-reader) Fix reading fixed length byte array decimal in parquet reader. (#30535) --- .../exec/format/parquet/fix_length_plain_decoder.h | 6 ++---- .../hive/scripts/create_preinstalled_table.hql | 19 +++++++++++++++++++ .../fixed_length_byte_array_decimal_table/000000_0 | Bin 0 -> 200910 bytes .../external_table_p0/hive/test_hive_parquet.out | 12 ++++++++++++ .../external_table_p0/hive/test_hive_parquet.groovy | 16 ++++++++++++++++ 5 files changed, 49 insertions(+), 4 deletions(-) diff --git a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h index c06e33c550c..cbf2ffa5c68 100644 --- a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h +++ b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h @@ -112,19 +112,17 @@ Status FixLengthPlainDecoder<PhysicalType>::_decode_string(MutableColumnPtr& dor size_t data_size = run_length * _type_length; size_t old_size = chars.size(); chars.resize(old_size + data_size); - memcpy(chars.data() + old_size, _data->data, data_size); + memcpy(chars.data() + old_size, _data->data + _offset, data_size); // copy offsets offsets.resize(offsets.size() + run_length); auto* offsets_data = offsets.data() + offsets.size() - run_length; - int i = 0; - for (; i < run_length; i++) { + for (int i = 0; i < run_length; i++) { bytes_size += _type_length; *(offsets_data++) = bytes_size; } - //doris_column->insert_many_strings_fixed_length<_type_length>(&string_values[0], run_length); _offset += data_size; break; } diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql index cc16a408eec..9baf13020f6 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql @@ -1899,6 +1899,25 @@ LOCATION msck repair table parquet_decimal90_table; +CREATE TABLE `fixed_length_byte_array_decimal_table`( + `decimal_col1` decimal(7,2), + `decimal_col2` decimal(7,2), + `decimal_col3` decimal(7,2), + `decimal_col4` decimal(7,2), + `decimal_col5` decimal(7,2)) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '/user/doris/preinstalled_data/parquet_table/fixed_length_byte_array_decimal_table' +TBLPROPERTIES ( + 'parquet.compress'='SNAPPY'); + +msck repair table fixed_length_byte_array_decimal_table; + show tables; diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/fixed_length_byte_array_decimal_table/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/fixed_length_byte_array_decimal_table/000000_0 new file mode 100644 index 00000000000..c215cf5ac6c Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/fixed_length_byte_array_decimal_table/000000_0 differ diff --git a/regression-test/data/external_table_p0/hive/test_hive_parquet.out b/regression-test/data/external_table_p0/hive/test_hive_parquet.out index a54a25520da..d0a22fe47b8 100644 --- a/regression-test/data/external_table_p0/hive/test_hive_parquet.out +++ b/regression-test/data/external_table_p0/hive/test_hive_parquet.out @@ -10145,3 +10145,15 @@ us washington 1999 -- !q21_avg -- -495084140.9042 +-- !q22_max -- +27960.57 29917.80 41344.02 43556.47 18281.12 + +-- !q22_min -- +0.00 0.00 0.00 0.00 -9884.00 + +-- !q22_sum -- +2296013386.03 2399429017.59 3573059811.32 3676453127.65 -256727758.97 + +-- !q22_avg -- +2296.5737 2399.9882 3573.0598 3676.4531 -256.7277 + diff --git a/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy b/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy index c60e1a4f0a6..c381a3c236b 100644 --- a/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy @@ -154,6 +154,21 @@ suite("test_hive_parquet", "p0,external,hive,external_docker,external_docker_hiv """ } + def q22 = { + qt_q22_max """ + select max(decimal_col1), max(decimal_col2), max(decimal_col3), max(decimal_col4), max(decimal_col5) from fixed_length_byte_array_decimal_table; + """ + qt_q22_min """ + select min(decimal_col1), min(decimal_col2), min(decimal_col3), min(decimal_col4), min(decimal_col5) from fixed_length_byte_array_decimal_table; + """ + qt_q22_sum """ + select sum(decimal_col1), sum(decimal_col2), sum(decimal_col3), sum(decimal_col4), sum(decimal_col5) from fixed_length_byte_array_decimal_table; + """ + qt_q22_avg """ + select avg(decimal_col1), avg(decimal_col2), avg(decimal_col3), avg(decimal_col4), avg(decimal_col5) from fixed_length_byte_array_decimal_table; + """ + } + String enabled = context.config.otherConfigs.get("enableHiveTest") if (enabled != null && enabled.equalsIgnoreCase("true")) { try { @@ -191,6 +206,7 @@ suite("test_hive_parquet", "p0,external,hive,external_docker,external_docker_hiv q19() q20() q21() + q22() sql """explain physical plan select l_partkey from partition_table where (nation != 'cn' or city !='beijing') and (l_quantity > 28 or l_extendedprice > 30000) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org