This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit f64fe8e8b2e6211ccffe7f7b5ea0e06b21974fef
Author: Ashin Gau <ashin...@users.noreply.github.com>
AuthorDate: Fri Aug 18 14:05:33 2023 +0800

    [fix](multi-catalog) conversion of compatible numerical types (#23113)
    
    Hive support schema change, but doesn't rewrite the parquet file, so the 
physical type of parquet file may not equal the logical type of table schema.
---
 .../format/parquet/fix_length_dict_decoder.hpp     | 12 +++----
 .../format/parquet/fix_length_plain_decoder.cpp    | 24 +++++++++++---
 .../exec/format/parquet/fix_length_plain_decoder.h |  2 +-
 .../hive/test_hive_schema_change.out               |  7 ++++
 .../hive/test_hive_schema_change.groovy            | 37 ++++++++++++++++++++++
 5 files changed, 70 insertions(+), 12 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp 
b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
index bb95fb426f..c368868fd8 100644
--- a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
+++ b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
@@ -74,10 +74,10 @@ public:
 
         TypeIndex logical_type = remove_nullable(data_type)->get_type_id();
         switch (logical_type) {
-#define DISPATCH(NUMERIC_TYPE, CPP_NUMERIC_TYPE, PHYSICAL_TYPE)                
                \
-    case NUMERIC_TYPE:                                                         
                \
-        if constexpr (std::is_same_v<T, PHYSICAL_TYPE>) {                      
                \
-            return _decode_numeric<CPP_NUMERIC_TYPE, has_filter>(doris_column, 
select_vector); \
+#define DISPATCH(NUMERIC_TYPE, CPP_NUMERIC_TYPE, PHYSICAL_TYPE)                
                   \
+    case NUMERIC_TYPE:                                                         
                   \
+        if constexpr (!std::is_same_v<T, ParquetInt96>) {                      
                   \
+            return _decode_numeric<CPP_NUMERIC_TYPE, T, 
has_filter>(doris_column, select_vector); \
         }
             FOR_LOGICAL_NUMERIC_TYPES(DISPATCH)
 #undef DISPATCH
@@ -177,7 +177,7 @@ public:
     }
 
 protected:
-    template <typename Numeric, bool has_filter>
+    template <typename Numeric, typename PhysicalType, bool has_filter>
     Status _decode_numeric(MutableColumnPtr& doris_column, ColumnSelectVector& 
select_vector) {
         auto& column_data = 
static_cast<ColumnVector<Numeric>&>(*doris_column).get_data();
         size_t data_index = column_data.size();
@@ -189,7 +189,7 @@ protected:
             case ColumnSelectVector::CONTENT: {
                 for (size_t i = 0; i < run_length; ++i) {
                     column_data[data_index++] =
-                            
static_cast<Numeric>(_dict_items[_indexes[dict_index++]]);
+                            
static_cast<PhysicalType>(_dict_items[_indexes[dict_index++]]);
                 }
                 break;
             }
diff --git a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp 
b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
index 3fe58e6a5d..f4e24ca4ab 100644
--- a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
+++ b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
@@ -75,9 +75,23 @@ Status 
FixLengthPlainDecoder::_decode_values(MutableColumnPtr& doris_column, Dat
     }
     TypeIndex logical_type = remove_nullable(data_type)->get_type_id();
     switch (logical_type) {
-#define DISPATCH(NUMERIC_TYPE, CPP_NUMERIC_TYPE, PHYSICAL_TYPE) \
-    case NUMERIC_TYPE:                                          \
-        return _decode_numeric<CPP_NUMERIC_TYPE, has_filter>(doris_column, 
select_vector);
+#define DISPATCH(NUMERIC_TYPE, CPP_NUMERIC_TYPE, PHYSICAL_TYPE)                
           \
+    case NUMERIC_TYPE:                                                         
           \
+        if (_physical_type == tparquet::Type::INT32) {                         
           \
+            return _decode_numeric<CPP_NUMERIC_TYPE, Int32, 
has_filter>(doris_column,     \
+                                                                        
select_vector);   \
+        } else if (_physical_type == tparquet::Type::INT64) {                  
           \
+            return _decode_numeric<CPP_NUMERIC_TYPE, Int64, 
has_filter>(doris_column,     \
+                                                                        
select_vector);   \
+        } else if (_physical_type == tparquet::Type::FLOAT) {                  
           \
+            return _decode_numeric<CPP_NUMERIC_TYPE, Float32, 
has_filter>(doris_column,   \
+                                                                          
select_vector); \
+        } else if (_physical_type == tparquet::Type::DOUBLE) {                 
           \
+            return _decode_numeric<CPP_NUMERIC_TYPE, Float64, 
has_filter>(doris_column,   \
+                                                                          
select_vector); \
+        } else {                                                               
           \
+            break;                                                             
           \
+        }
         FOR_LOGICAL_NUMERIC_TYPES(DISPATCH)
 #undef DISPATCH
     case TypeIndex::Date:
@@ -207,7 +221,7 @@ Status 
FixLengthPlainDecoder::_decode_string(MutableColumnPtr& doris_column,
     }
     return Status::OK();
 }
-template <typename Numeric, bool has_filter>
+template <typename Numeric, typename PhysicalType, bool has_filter>
 Status FixLengthPlainDecoder::_decode_numeric(MutableColumnPtr& doris_column,
                                               ColumnSelectVector& 
select_vector) {
     auto& column_data = 
static_cast<ColumnVector<Numeric>&>(*doris_column).get_data();
@@ -219,7 +233,7 @@ Status 
FixLengthPlainDecoder::_decode_numeric(MutableColumnPtr& doris_column,
         case ColumnSelectVector::CONTENT: {
             for (size_t i = 0; i < run_length; ++i) {
                 char* buf_start = _data->data + _offset;
-                column_data[data_index++] = *(Numeric*)buf_start;
+                column_data[data_index++] = *(PhysicalType*)buf_start;
                 _offset += _type_length;
             }
             break;
diff --git a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h 
b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h
index 3204d54464..0b3a6e1945 100644
--- a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h
+++ b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h
@@ -47,7 +47,7 @@ public:
     Status skip_values(size_t num_values) override;
 
 protected:
-    template <typename Numeric, bool has_filter>
+    template <typename Numeric, typename PhysicalType, bool has_filter>
     Status _decode_numeric(MutableColumnPtr& doris_column, ColumnSelectVector& 
select_vector);
 
     template <typename CppType, typename ColumnType, bool has_filter>
diff --git 
a/regression-test/data/external_table_p2/hive/test_hive_schema_change.out 
b/regression-test/data/external_table_p2/hive/test_hive_schema_change.out
new file mode 100644
index 0000000000..fde929309b
--- /dev/null
+++ b/regression-test/data/external_table_p2/hive/test_hive_schema_change.out
@@ -0,0 +1,7 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !schema_change --
+1      1       1.0     1.0     1.0099999904632568      1.10101
+2      2       2.0     2.0     2.0199999809265137      2.20202
+3      3       3.0     3.0     3.0299999713897705      3.30303
+123    6334    7898763.0       1.2837483628455E13      0.010101010091602802    
0.09238498728784825
+
diff --git 
a/regression-test/suites/external_table_p2/hive/test_hive_schema_change.groovy 
b/regression-test/suites/external_table_p2/hive/test_hive_schema_change.groovy
new file mode 100644
index 0000000000..fbc1e40d89
--- /dev/null
+++ 
b/regression-test/suites/external_table_p2/hive/test_hive_schema_change.groovy
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_schema_change", 
"p2,external,hive,external_remote,external_remote_hive") {
+    String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        String extHiveHmsHost = 
context.config.otherConfigs.get("extHiveHmsHost")
+        String extHiveHmsPort = 
context.config.otherConfigs.get("extHiveHmsPort")
+        String catalog_name = "test_hive_schema_change"
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                'type'='hms',
+                'hadoop.username' = 'hadoop',
+                'hive.metastore.uris' = 
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+            );
+        """
+        sql """ switch ${catalog_name} """
+        sql """ use `default` """
+        qt_schema_change """ select * from schema_change order by tinyint_col 
"""
+        sql """ drop catalog ${catalog_name} """
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to