This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new fda337ed1d6 [Fix](parquet-reader) Fix reading fixed length byte array 
decimal in parquet reader. (#30535)
fda337ed1d6 is described below

commit fda337ed1d637ddeab9534387b837bfdb82bcacd
Author: Qi Chen <kaka11.c...@gmail.com>
AuthorDate: Wed Jan 31 21:37:59 2024 +0800

    [Fix](parquet-reader) Fix reading fixed length byte array decimal in 
parquet reader. (#30535)
---
 .../exec/format/parquet/fix_length_plain_decoder.h   |   6 ++----
 .../hive/scripts/create_preinstalled_table.hql       |  19 +++++++++++++++++++
 .../fixed_length_byte_array_decimal_table/000000_0   | Bin 0 -> 200910 bytes
 .../external_table_p0/hive/test_hive_parquet.out     |  12 ++++++++++++
 .../external_table_p0/hive/test_hive_parquet.groovy  |  16 ++++++++++++++++
 5 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h 
b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h
index c06e33c550c..cbf2ffa5c68 100644
--- a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h
+++ b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h
@@ -112,19 +112,17 @@ Status 
FixLengthPlainDecoder<PhysicalType>::_decode_string(MutableColumnPtr& dor
             size_t data_size = run_length * _type_length;
             size_t old_size = chars.size();
             chars.resize(old_size + data_size);
-            memcpy(chars.data() + old_size, _data->data, data_size);
+            memcpy(chars.data() + old_size, _data->data + _offset, data_size);
 
             // copy offsets
             offsets.resize(offsets.size() + run_length);
             auto* offsets_data = offsets.data() + offsets.size() - run_length;
 
-            int i = 0;
-            for (; i < run_length; i++) {
+            for (int i = 0; i < run_length; i++) {
                 bytes_size += _type_length;
                 *(offsets_data++) = bytes_size;
             }
 
-            
//doris_column->insert_many_strings_fixed_length<_type_length>(&string_values[0],
 run_length);
             _offset += data_size;
             break;
         }
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
index cc16a408eec..9baf13020f6 100644
--- 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
@@ -1899,6 +1899,25 @@ LOCATION
 
 msck repair table parquet_decimal90_table;
 
+CREATE TABLE `fixed_length_byte_array_decimal_table`(
+  `decimal_col1` decimal(7,2),
+  `decimal_col2` decimal(7,2),
+  `decimal_col3` decimal(7,2),
+  `decimal_col4` decimal(7,2),
+  `decimal_col5` decimal(7,2))
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+LOCATION
+  
'/user/doris/preinstalled_data/parquet_table/fixed_length_byte_array_decimal_table'
+TBLPROPERTIES (
+  'parquet.compress'='SNAPPY');
+
+msck repair table fixed_length_byte_array_decimal_table;
+
 show tables;
 
 
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/fixed_length_byte_array_decimal_table/000000_0
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/fixed_length_byte_array_decimal_table/000000_0
new file mode 100644
index 00000000000..c215cf5ac6c
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/fixed_length_byte_array_decimal_table/000000_0
 differ
diff --git a/regression-test/data/external_table_p0/hive/test_hive_parquet.out 
b/regression-test/data/external_table_p0/hive/test_hive_parquet.out
index a54a25520da..d0a22fe47b8 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_parquet.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_parquet.out
@@ -10145,3 +10145,15 @@ us     washington      1999
 -- !q21_avg --
 -495084140.9042
 
+-- !q22_max --
+27960.57       29917.80        41344.02        43556.47        18281.12
+
+-- !q22_min --
+0.00   0.00    0.00    0.00    -9884.00
+
+-- !q22_sum --
+2296013386.03  2399429017.59   3573059811.32   3676453127.65   -256727758.97
+
+-- !q22_avg --
+2296.5737      2399.9882       3573.0598       3676.4531       -256.7277
+
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy
index c60e1a4f0a6..c381a3c236b 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy
@@ -154,6 +154,21 @@ suite("test_hive_parquet", 
"p0,external,hive,external_docker,external_docker_hiv
         """
     }
 
+    def q22 = {
+        qt_q22_max """
+        select max(decimal_col1), max(decimal_col2), max(decimal_col3), 
max(decimal_col4), max(decimal_col5) from fixed_length_byte_array_decimal_table;
+        """
+        qt_q22_min """
+        select min(decimal_col1), min(decimal_col2), min(decimal_col3), 
min(decimal_col4), min(decimal_col5) from fixed_length_byte_array_decimal_table;
+        """
+        qt_q22_sum """
+        select sum(decimal_col1), sum(decimal_col2), sum(decimal_col3), 
sum(decimal_col4), sum(decimal_col5) from fixed_length_byte_array_decimal_table;
+        """
+        qt_q22_avg """
+        select avg(decimal_col1), avg(decimal_col2), avg(decimal_col3), 
avg(decimal_col4), avg(decimal_col5) from fixed_length_byte_array_decimal_table;
+        """
+    }
+
     String enabled = context.config.otherConfigs.get("enableHiveTest")
     if (enabled != null && enabled.equalsIgnoreCase("true")) {
         try {
@@ -191,6 +206,7 @@ suite("test_hive_parquet", 
"p0,external,hive,external_docker,external_docker_hiv
             q19()
             q20()
             q21()
+            q22()
 
             sql """explain physical plan select l_partkey from partition_table
                 where (nation != 'cn' or city !='beijing') and (l_quantity > 
28 or l_extendedprice > 30000)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to