[doris] branch master updated: [unify type system](remove unused type desc) remove some code (#17921)

yiguolei Sat, 18 Mar 2023 23:05:15 -0700

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new dd53bc1c8d [unify type system](remove unused type desc) remove some 
code (#17921)
dd53bc1c8d is described below

commit dd53bc1c8dbac94f945a24a442e1a24dab22ebbe
Author: yiguolei <676222...@qq.com>
AuthorDate: Sun Mar 19 14:05:02 2023 +0800

    [unify type system](remove unused type desc) remove some code (#17921)
    
    There are many type definitions in BE. Should unify the type system and 
simplify the development.
    
    
    
    ---------
    
    Co-authored-by: yiguolei <yiguo...@gmail.com>
---
 be/src/exec/base_scanner.cpp                      |   1 -
 be/src/exec/schema_scanner.cpp                    |  74 ---------
 be/src/exec/schema_scanner.h                      |   3 -
 be/src/exec/tablet_info.cpp                       |   1 +
 be/src/exec/tablet_info.h                         |   1 -
 be/src/runtime/buffer_control_block.cpp           |   1 -
 be/src/runtime/collection_value.cpp               |   1 -
 be/src/runtime/collection_value.h                 |   1 -
 be/src/runtime/raw_value.h                        | 183 +---------------------
 be/src/runtime/result_buffer_mgr.cpp              |   1 -
 be/src/runtime/result_queue_mgr.h                 |   1 -
 be/src/runtime/result_writer.h                    |   1 -
 be/src/runtime/struct_value.h                     |   2 -
 be/src/util/CMakeLists.txt                        |   1 -
 be/src/util/arrow/row_block.cpp                   | 127 ---------------
 be/src/util/arrow/row_block.h                     |  38 -----
 be/src/util/hash_util.hpp                         |   1 +
 be/src/vec/columns/column_const.cpp               |   3 +-
 be/src/vec/core/block.cpp                         |  11 --
 be/src/vec/core/block.h                           |   3 -
 be/src/vec/exec/scan/vfile_scanner.cpp            |   1 -
 be/src/vec/runtime/vdata_stream_mgr.cpp           |   7 +-
 be/src/vec/utils/arrow_column_to_doris_column.cpp |  55 -------
 be/src/vec/utils/arrow_column_to_doris_column.h   |   2 -
 be/test/olap/row_cursor_test.cpp                  |   2 -
 be/test/vec/exec/parquet/parquet_thrift_test.cpp  |  83 ++++++++--
 be/test/vec/exprs/vexpr_test.cpp                  |  76 ++++++++-
 27 files changed, 154 insertions(+), 527 deletions(-)

diff --git a/be/src/exec/base_scanner.cpp b/be/src/exec/base_scanner.cpp
index edb057a55d..84329c832d 100644
--- a/be/src/exec/base_scanner.cpp
+++ b/be/src/exec/base_scanner.cpp
@@ -23,7 +23,6 @@
 #include "common/utils.h"
 #include "exec/exec_node.h"
 #include "runtime/descriptors.h"
-#include "runtime/raw_value.h"
 #include "runtime/runtime_state.h"
 #include "vec/data_types/data_type_factory.hpp"
 
diff --git a/be/src/exec/schema_scanner.cpp b/be/src/exec/schema_scanner.cpp
index adc52d024d..7b8e625d61 100644
--- a/be/src/exec/schema_scanner.cpp
+++ b/be/src/exec/schema_scanner.cpp
@@ -84,8 +84,6 @@ Status SchemaScanner::init(SchemaScannerParam* param, 
ObjectPool* pool) {
         return Status::InternalError("invalid parameter");
     }
 
-    RETURN_IF_ERROR(create_tuple_desc(pool));
-
     _param = param;
     _is_init = true;
 
@@ -302,76 +300,4 @@ Status 
SchemaScanner::fill_dest_column_for_range(vectorized::Block* block, size_
     return Status::OK();
 }
 
-Status SchemaScanner::create_tuple_desc(ObjectPool* pool) {
-    int null_column = 0;
-    for (int i = 0; i < _columns.size(); ++i) {
-        if (_columns[i].is_null) {
-            null_column++;
-        }
-    }
-
-    int offset = (null_column + 7) / 8;
-    std::vector<SlotDescriptor*> slots;
-    int null_byte = 0;
-    int null_bit = 0;
-
-    for (int i = 0; i < _columns.size(); ++i) {
-        TSlotDescriptor t_slot_desc;
-        if (_columns[i].type == TYPE_DECIMALV2) {
-            
t_slot_desc.__set_slotType(TypeDescriptor::create_decimalv2_type(27, 
9).to_thrift());
-        } else {
-            TypeDescriptor descriptor(_columns[i].type);
-            if (_columns[i].precision >= 0 && _columns[i].scale >= 0) {
-                descriptor.precision = _columns[i].precision;
-                descriptor.scale = _columns[i].scale;
-            }
-            t_slot_desc.__set_slotType(descriptor.to_thrift());
-        }
-        t_slot_desc.__set_colName(_columns[i].name);
-        t_slot_desc.__set_columnPos(i);
-        t_slot_desc.__set_byteOffset(offset);
-
-        if (_columns[i].is_null) {
-            t_slot_desc.__set_nullIndicatorByte(null_byte);
-            t_slot_desc.__set_nullIndicatorBit(null_bit);
-            null_bit = (null_bit + 1) % 8;
-
-            if (0 == null_bit) {
-                null_byte++;
-            }
-        } else {
-            t_slot_desc.__set_nullIndicatorByte(0);
-            t_slot_desc.__set_nullIndicatorBit(-1);
-        }
-
-        t_slot_desc.id = i;
-        t_slot_desc.__set_slotIdx(i);
-        t_slot_desc.__set_isMaterialized(true);
-
-        SlotDescriptor* slot = pool->add(new (std::nothrow) 
SlotDescriptor(t_slot_desc));
-
-        if (nullptr == slot) {
-            return Status::InternalError("no memory for _tuple_desc.");
-        }
-
-        slots.push_back(slot);
-        offset += _columns[i].size;
-    }
-
-    TTupleDescriptor t_tuple_desc;
-    t_tuple_desc.__set_byteSize(offset);
-    t_tuple_desc.__set_numNullBytes((null_byte * 8 + null_bit + 7) / 8);
-    _tuple_desc = pool->add(new (std::nothrow) TupleDescriptor(t_tuple_desc));
-
-    if (nullptr == _tuple_desc) {
-        return Status::InternalError("no memory for _tuple_desc.");
-    }
-
-    for (int i = 0; i < slots.size(); ++i) {
-        _tuple_desc->add_slot(slots[i]);
-    }
-
-    return Status::OK();
-}
-
 } // namespace doris
diff --git a/be/src/exec/schema_scanner.h b/be/src/exec/schema_scanner.h
index 4f7faac06c..ca62f5ac02 100644
--- a/be/src/exec/schema_scanner.h
+++ b/be/src/exec/schema_scanner.h
@@ -88,7 +88,6 @@ public:
     const std::vector<ColumnDesc>& get_column_desc() const { return _columns; }
     // factory function
     static SchemaScanner* create(TSchemaTableType::type type);
-    const TupleDescriptor* tuple_desc() const { return _tuple_desc; }
     TSchemaTableType::type type() const { return _schema_table_type; }
 
     static void set_doris_server(DorisServer* doris_server) { _s_doris_server 
= doris_server; }
@@ -96,14 +95,12 @@ public:
 protected:
     Status fill_dest_column_for_range(vectorized::Block* block, size_t pos,
                                       const std::vector<void*>& datas);
-    Status create_tuple_desc(ObjectPool* pool);
 
     bool _is_init;
     // this is used for sub class
     SchemaScannerParam* _param;
     // schema table's column desc
     std::vector<ColumnDesc> _columns;
-    TupleDescriptor* _tuple_desc;
 
     static DorisServer* _s_doris_server;
 
diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp
index d2d2e07a5c..a5c53294f1 100644
--- a/be/src/exec/tablet_info.cpp
+++ b/be/src/exec/tablet_info.cpp
@@ -18,6 +18,7 @@
 #include "exec/tablet_info.h"
 
 #include "runtime/large_int_value.h"
+#include "runtime/raw_value.h"
 #include "util/string_parser.hpp"
 
 namespace doris {
diff --git a/be/src/exec/tablet_info.h b/be/src/exec/tablet_info.h
index 9753b71349..3556adb13f 100644
--- a/be/src/exec/tablet_info.h
+++ b/be/src/exec/tablet_info.h
@@ -29,7 +29,6 @@
 #include "gen_cpp/descriptors.pb.h"
 #include "olap/tablet_schema.h"
 #include "runtime/descriptors.h"
-#include "runtime/raw_value.h"
 #include "vec/core/block.h"
 
 namespace doris {
diff --git a/be/src/runtime/buffer_control_block.cpp 
b/be/src/runtime/buffer_control_block.cpp
index 7fa0c50923..89ee93a012 100644
--- a/be/src/runtime/buffer_control_block.cpp
+++ b/be/src/runtime/buffer_control_block.cpp
@@ -20,7 +20,6 @@
 #include "gen_cpp/PaloInternalService_types.h"
 #include "gen_cpp/internal_service.pb.h"
 #include "runtime/exec_env.h"
-#include "runtime/raw_value.h"
 #include "runtime/thread_context.h"
 #include "service/brpc.h"
 #include "util/thrift_util.h"
diff --git a/be/src/runtime/collection_value.cpp 
b/be/src/runtime/collection_value.cpp
index 255fce195b..f4543d361d 100644
--- a/be/src/runtime/collection_value.cpp
+++ b/be/src/runtime/collection_value.cpp
@@ -22,7 +22,6 @@
 #include "common/object_pool.h"
 #include "common/utils.h"
 #include "runtime/mem_pool.h"
-#include "runtime/raw_value.h"
 #include "runtime/types.h"
 #include "vec/common/string_ref.h"
 
diff --git a/be/src/runtime/collection_value.h 
b/be/src/runtime/collection_value.h
index 6b59066f97..02a51e4bd1 100644
--- a/be/src/runtime/collection_value.h
+++ b/be/src/runtime/collection_value.h
@@ -36,7 +36,6 @@ struct ArrayIteratorFunctionsBase;
 class ArrayIterator;
 class Status;
 class ObjectPool;
-struct TypeDescriptor;
 
 template <PrimitiveType type>
 struct ArrayIteratorFunctions;
diff --git a/be/src/runtime/raw_value.h b/be/src/runtime/raw_value.h
index 32633d0ae7..b1e635ea16 100644
--- a/be/src/runtime/raw_value.h
+++ b/be/src/runtime/raw_value.h
@@ -36,191 +36,14 @@ class SlotDescriptor;
 // Useful utility functions for runtime values (which are passed around as 
void*).
 class RawValue {
 public:
-    // Ascii output precision for double/float
-    static const int ASCII_PRECISION;
-
-    static uint32_t get_hash_value(const void* value, const PrimitiveType& 
type) {
-        return get_hash_value(value, type, 0);
-    }
-
-    static uint32_t get_hash_value(const void* value, const PrimitiveType& 
type, uint32_t seed);
-
-    // Returns hash value for 'value' interpreted as 'type'.  The resulting 
hash value
-    // is combined with the seed value.
-    static uint32_t get_hash_value(const void* value, const TypeDescriptor& 
type, uint32_t seed) {
-        return get_hash_value(value, type.type, seed);
-    }
-
-    static uint32_t get_hash_value(const void* value, const TypeDescriptor& 
type) {
-        return get_hash_value(value, type.type, 0);
-    }
-
-    // Get the hash value using the fvn hash function.  Using different seeds 
with FVN
-    // results in different hash functions.  get_hash_value() does not have 
this property
-    // and cannot be safely used as the first step in data repartitioning.
-    // However, get_hash_value() can be significantly faster.
-    // TODO: fix get_hash_value
-    static uint32_t zlib_crc32(const void* value, const TypeDescriptor& type, 
uint32_t seed);
-
     // Same as the up function, only use in vec exec engine.
-    static uint32_t zlib_crc32(const void* value, size_t len, const 
TypeDescriptor& type,
+    static uint32_t zlib_crc32(const void* value, size_t len, const 
PrimitiveType& type,
                                uint32_t seed);
-
-    // Compares both values.
-    // Return value is < 0  if v1 < v2, 0 if v1 == v2, > 0 if v1 > v2.
-    static int compare(const void* v1, const void* v2, const TypeDescriptor& 
type);
-
-    // Returns true if v1 == v2.
-    // This is more performant than compare() == 0 for string equality, mostly 
because of
-    // the length comparison check.
-    static bool eq(const void* v1, const void* v2, const TypeDescriptor& type);
-
-    static bool lt(const void* v1, const void* v2, const TypeDescriptor& type);
 };
 
-// Use boost::hash_combine for corner cases.  boost::hash_combine is 
reimplemented
-// here to use int32t's (instead of size_t)
-// boost::hash_combine does:
-//  seed ^= v + 0x9e3779b9 + (seed << 6) + (seed >> 2);
-inline uint32_t RawValue::get_hash_value(const void* v, const PrimitiveType& 
type, uint32_t seed) {
-    // Hash_combine with v = 0
-    if (v == nullptr) {
-        uint32_t value = 0x9e3779b9;
-        return seed ^ (value + (seed << 6) + (seed >> 2));
-    }
-
-    switch (type) {
-    case TYPE_VARCHAR:
-    case TYPE_CHAR:
-    case TYPE_HLL:
-    case TYPE_STRING: {
-        const StringRef* string_value = reinterpret_cast<const StringRef*>(v);
-        return HashUtil::hash(string_value->data, string_value->size, seed);
-    }
-
-    case TYPE_BOOLEAN: {
-        uint32_t value = *reinterpret_cast<const bool*>(v) + 0x9e3779b9;
-        return seed ^ (value + (seed << 6) + (seed >> 2));
-    }
-
-    case TYPE_TINYINT:
-        return HashUtil::hash(v, 1, seed);
-
-    case TYPE_SMALLINT:
-        return HashUtil::hash(v, 2, seed);
-
-    case TYPE_INT:
-        return HashUtil::hash(v, 4, seed);
-
-    case TYPE_BIGINT:
-        return HashUtil::hash(v, 8, seed);
-
-    case TYPE_FLOAT:
-        return HashUtil::hash(v, 4, seed);
-
-    case TYPE_DOUBLE:
-        return HashUtil::hash(v, 8, seed);
-
-    case TYPE_DATE:
-    case TYPE_DATETIME:
-        return HashUtil::hash(v, 16, seed);
-
-    case TYPE_DATEV2:
-        return HashUtil::hash(v, 4, seed);
-
-    case TYPE_DATETIMEV2:
-        return HashUtil::hash(v, 8, seed);
-
-    case TYPE_DECIMALV2:
-        return HashUtil::hash(v, 16, seed);
-    case TYPE_DECIMAL32:
-        return HashUtil::hash(v, 4, seed);
-    case TYPE_DECIMAL64:
-        return HashUtil::hash(v, 8, seed);
-    case TYPE_DECIMAL128I:
-        return HashUtil::hash(v, 16, seed);
-
-    case TYPE_LARGEINT:
-        return HashUtil::hash(v, 16, seed);
-
-    default:
-        DCHECK(false) << "invalid type: " << type;
-        return 0;
-    }
-}
-
 // NOTE: this is just for split data, decimal use old doris hash function
 // Because crc32 hardware is not equal with zlib crc32
-inline uint32_t RawValue::zlib_crc32(const void* v, const TypeDescriptor& 
type, uint32_t seed) {
-    // Hash_combine with v = 0
-    if (v == nullptr) {
-        uint32_t value = 0x9e3779b9;
-        return seed ^ (value + (seed << 6) + (seed >> 2));
-    }
-
-    switch (type.type) {
-    case TYPE_VARCHAR:
-    case TYPE_HLL:
-    case TYPE_STRING: {
-        const StringRef* string_value = reinterpret_cast<const StringRef*>(v);
-        return HashUtil::zlib_crc_hash(string_value->data, string_value->size, 
seed);
-    }
-    case TYPE_CHAR: {
-        // TODO(zc): ugly, use actual value to compute hash value
-        const StringRef* string_value = reinterpret_cast<const StringRef*>(v);
-        int len = 0;
-        while (len < string_value->size) {
-            if (string_value->data[len] == '\0') {
-                break;
-            }
-            len++;
-        }
-        return HashUtil::zlib_crc_hash(string_value->data, len, seed);
-    }
-    case TYPE_BOOLEAN:
-    case TYPE_TINYINT:
-        return HashUtil::zlib_crc_hash(v, 1, seed);
-    case TYPE_SMALLINT:
-        return HashUtil::zlib_crc_hash(v, 2, seed);
-    case TYPE_INT:
-    case TYPE_DATEV2:
-    case TYPE_DECIMAL32:
-        return HashUtil::zlib_crc_hash(v, 4, seed);
-    case TYPE_BIGINT:
-    case TYPE_DATETIMEV2:
-    case TYPE_DECIMAL64:
-        return HashUtil::zlib_crc_hash(v, 8, seed);
-    case TYPE_LARGEINT:
-    case TYPE_DECIMAL128I:
-        return HashUtil::zlib_crc_hash(v, 16, seed);
-    case TYPE_FLOAT:
-        return HashUtil::zlib_crc_hash(v, 4, seed);
-    case TYPE_DOUBLE:
-        return HashUtil::zlib_crc_hash(v, 8, seed);
-    case TYPE_DATE:
-    case TYPE_DATETIME: {
-        const DateTimeValue* date_val = (const DateTimeValue*)v;
-        char buf[64];
-        int len = date_val->to_buffer(buf);
-        return HashUtil::zlib_crc_hash(buf, len, seed);
-    }
-
-    case TYPE_DECIMALV2: {
-        const DecimalV2Value* dec_val = (const DecimalV2Value*)v;
-        int64_t int_val = dec_val->int_value();
-        int32_t frac_val = dec_val->frac_value();
-        seed = HashUtil::zlib_crc_hash(&int_val, sizeof(int_val), seed);
-        return HashUtil::zlib_crc_hash(&frac_val, sizeof(frac_val), seed);
-    }
-    default:
-        DCHECK(false) << "invalid type: " << type;
-        return 0;
-    }
-}
-
-// NOTE: this is just for split data, decimal use old doris hash function
-// Because crc32 hardware is not equal with zlib crc32
-inline uint32_t RawValue::zlib_crc32(const void* v, size_t len, const 
TypeDescriptor& type,
+inline uint32_t RawValue::zlib_crc32(const void* v, size_t len, const 
PrimitiveType& type,
                                      uint32_t seed) {
     // Hash_combine with v = 0
     if (v == nullptr) {
@@ -228,7 +51,7 @@ inline uint32_t RawValue::zlib_crc32(const void* v, size_t 
len, const TypeDescri
         return seed ^ (value + (seed << 6) + (seed >> 2));
     }
 
-    switch (type.type) {
+    switch (type) {
     case TYPE_VARCHAR:
     case TYPE_HLL:
     case TYPE_STRING:
diff --git a/be/src/runtime/result_buffer_mgr.cpp 
b/be/src/runtime/result_buffer_mgr.cpp
index 19fb2522b8..02229efe23 100644
--- a/be/src/runtime/result_buffer_mgr.cpp
+++ b/be/src/runtime/result_buffer_mgr.cpp
@@ -22,7 +22,6 @@
 #include "gen_cpp/PaloInternalService_types.h"
 #include "gen_cpp/types.pb.h"
 #include "runtime/buffer_control_block.h"
-#include "runtime/raw_value.h"
 #include "util/doris_metrics.h"
 
 namespace doris {
diff --git a/be/src/runtime/result_queue_mgr.h 
b/be/src/runtime/result_queue_mgr.h
index 088ad787d3..28dcabfaa5 100644
--- a/be/src/runtime/result_queue_mgr.h
+++ b/be/src/runtime/result_queue_mgr.h
@@ -24,7 +24,6 @@
 
 #include "common/status.h"
 #include "runtime/primitive_type.h"
-#include "runtime/raw_value.h"
 #include "runtime/record_batch_queue.h"
 #include "util/hash_util.hpp"
 
diff --git a/be/src/runtime/result_writer.h b/be/src/runtime/result_writer.h
index 1d0bfde589..bd5a11cc8f 100644
--- a/be/src/runtime/result_writer.h
+++ b/be/src/runtime/result_writer.h
@@ -24,7 +24,6 @@ namespace doris {
 
 class Status;
 class RuntimeState;
-struct TypeDescriptor;
 
 namespace vectorized {
 class Block;
diff --git a/be/src/runtime/struct_value.h b/be/src/runtime/struct_value.h
index ec243d729c..d02ddc994c 100644
--- a/be/src/runtime/struct_value.h
+++ b/be/src/runtime/struct_value.h
@@ -45,8 +45,6 @@ public:
 
     void shallow_copy(const StructValue* other);
 
-    // size_t get_byte_size(const TypeDescriptor& type) const;
-
     const void** values() const { return const_cast<const void**>(_values); }
     void** mutable_values() { return _values; }
     void set_values(void** values) { _values = values; }
diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt
index 71ff0e089f..23e14e3b4c 100644
--- a/be/src/util/CMakeLists.txt
+++ b/be/src/util/CMakeLists.txt
@@ -23,7 +23,6 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/util")
 
 set(UTIL_FILES
   arrow/row_batch.cpp
-  arrow/row_block.cpp
   arrow/utils.cpp
   arrow/block_convertor.cpp
   bfd_parser.cpp
diff --git a/be/src/util/arrow/row_block.cpp b/be/src/util/arrow/row_block.cpp
deleted file mode 100644
index a7758707fb..0000000000
--- a/be/src/util/arrow/row_block.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "util/arrow/row_block.h"
-
-#include <arrow/array/builder_primitive.h>
-#include <arrow/memory_pool.h>
-#include <arrow/pretty_print.h>
-#include <arrow/record_batch.h>
-#include <arrow/type.h>
-#include <arrow/type_fwd.h>
-#include <arrow/visit_array_inline.h>
-#include <arrow/visit_type_inline.h>
-
-#include "gutil/strings/substitute.h"
-#include "olap/field.h"
-#include "olap/olap_common.h"
-#include "olap/schema.h"
-#include "olap/tablet_schema.h"
-#include "util/arrow/utils.h"
-
-namespace doris {
-
-using strings::Substitute;
-
-Status convert_to_arrow_type(FieldType type, std::shared_ptr<arrow::DataType>* 
result) {
-    switch (type) {
-    case OLAP_FIELD_TYPE_TINYINT:
-        *result = arrow::int8();
-        break;
-    case OLAP_FIELD_TYPE_SMALLINT:
-        *result = arrow::int16();
-        break;
-    case OLAP_FIELD_TYPE_INT:
-        *result = arrow::int32();
-        break;
-    case OLAP_FIELD_TYPE_BIGINT:
-        *result = arrow::int64();
-        break;
-    case OLAP_FIELD_TYPE_FLOAT:
-        *result = arrow::float32();
-        break;
-    case OLAP_FIELD_TYPE_DOUBLE:
-        *result = arrow::float64();
-        break;
-    default:
-        return Status::InvalidArgument("Unknown FieldType({})", type);
-    }
-    return Status::OK();
-}
-
-Status convert_to_arrow_field(uint32_t cid, const Field* field,
-                              std::shared_ptr<arrow::Field>* result) {
-    std::shared_ptr<arrow::DataType> type;
-    RETURN_IF_ERROR(convert_to_arrow_type(field->type(), &type));
-    *result = arrow::field(strings::Substitute("Col$0", cid), type, 
field->is_nullable());
-    return Status::OK();
-}
-
-Status convert_to_arrow_schema(const Schema& schema, 
std::shared_ptr<arrow::Schema>* result) {
-    std::vector<std::shared_ptr<arrow::Field>> fields;
-    size_t num_fields = schema.num_column_ids();
-    fields.resize(num_fields);
-    for (int i = 0; i < num_fields; ++i) {
-        auto cid = schema.column_ids()[i];
-        RETURN_IF_ERROR(convert_to_arrow_field(cid, schema.column(cid), 
&fields[i]));
-    }
-    *result = arrow::schema(std::move(fields));
-    return Status::OK();
-}
-
-Status convert_to_type_name(const arrow::DataType& type, std::string* name) {
-    switch (type.id()) {
-    case arrow::Type::INT8:
-        *name = "TINYINT";
-        break;
-    case arrow::Type::INT16:
-        *name = "SMALLINT";
-        break;
-    case arrow::Type::INT32:
-        *name = "INT";
-        break;
-    case arrow::Type::INT64:
-        *name = "BIGINT";
-        break;
-    case arrow::Type::FLOAT:
-        *name = "FLOAT";
-        break;
-    case arrow::Type::DOUBLE:
-        *name = "DOUBLE";
-        break;
-    default:
-        return Status::InvalidArgument("Unknown arrow type id({})", type.id());
-    }
-    return Status::OK();
-}
-
-Status convert_to_tablet_column(const arrow::Field& field, int32_t cid, 
TabletColumn* output) {
-    ColumnPB column_pb;
-    std::string type_name;
-    RETURN_IF_ERROR(convert_to_type_name(*field.type(), &type_name));
-
-    column_pb.set_unique_id(cid);
-    column_pb.set_name(field.name());
-    column_pb.set_type(type_name);
-    column_pb.set_is_key(true);
-    column_pb.set_is_nullable(field.nullable());
-
-    output->init_from_pb(column_pb);
-    return Status::OK();
-}
-
-} // namespace doris
diff --git a/be/src/util/arrow/row_block.h b/be/src/util/arrow/row_block.h
deleted file mode 100644
index afdecdd52a..0000000000
--- a/be/src/util/arrow/row_block.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <memory>
-
-#include "common/status.h"
-
-namespace arrow {
-
-class Schema;
-class MemoryPool;
-class RecordBatch;
-
-} // namespace arrow
-
-namespace doris {
-class Schema;
-
-// Convert Doris Schema to Arrow Schema.
-Status convert_to_arrow_schema(const Schema& row_desc, 
std::shared_ptr<arrow::Schema>* result);
-
-} // namespace doris
diff --git a/be/src/util/hash_util.hpp b/be/src/util/hash_util.hpp
index 726ad50f92..ecc0f04a19 100644
--- a/be/src/util/hash_util.hpp
+++ b/be/src/util/hash_util.hpp
@@ -37,6 +37,7 @@
 #include <zlib.h>
 
 #include "gen_cpp/Types_types.h"
+#include "runtime/define_primitive_type.h"
 #include "util/cpu_info.h"
 #include "util/murmur_hash3.h"
 
diff --git a/be/src/vec/columns/column_const.cpp 
b/be/src/vec/columns/column_const.cpp
index 8d79ec32f9..7b7fc4d753 100644
--- a/be/src/vec/columns/column_const.cpp
+++ b/be/src/vec/columns/column_const.cpp
@@ -131,8 +131,7 @@ void 
ColumnConst::update_crcs_with_value(std::vector<uint64_t>& hashes, doris::P
         }
     } else {
         for (int i = 0; i < hashes.size(); ++i) {
-            hashes[i] = RawValue::zlib_crc32(real_data.data, real_data.size, 
TypeDescriptor {type},
-                                             hashes[i]);
+            hashes[i] = RawValue::zlib_crc32(real_data.data, real_data.size, 
type, hashes[i]);
         }
     }
 }
diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index a5534f8075..ebd7f3726d 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -825,17 +825,6 @@ Status Block::serialize(int be_exec_version, PBlock* 
pblock,
     return Status::OK();
 }
 
-inline bool Block::is_column_data_null(const doris::TypeDescriptor& type_desc,
-                                       const StringRef& data_ref, const 
IColumn* column, int row) {
-    if (type_desc.type != TYPE_ARRAY) {
-        return data_ref.data == nullptr;
-    } else {
-        Field array;
-        column->get(row, array);
-        return array.is_null();
-    }
-}
-
 MutableBlock::MutableBlock(const std::vector<TupleDescriptor*>& tuple_descs, 
int reserve_size,
                            bool ignore_trivial_slot) {
     for (auto tuple_desc : tuple_descs) {
diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h
index 35026b846d..eef530659c 100644
--- a/be/src/vec/core/block.h
+++ b/be/src/vec/core/block.h
@@ -42,7 +42,6 @@ namespace doris {
 class RowDescriptor;
 class Status;
 class TupleDescriptor;
-struct TypeDescriptor;
 
 namespace vectorized {
 
@@ -371,8 +370,6 @@ public:
 
 private:
     void erase_impl(size_t position);
-    bool is_column_data_null(const doris::TypeDescriptor& type_desc, const 
StringRef& data_ref,
-                             const IColumn* column_with_type_and_name, int 
row);
 };
 
 using Blocks = std::vector<Block>;
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp 
b/be/src/vec/exec/scan/vfile_scanner.cpp
index c9cade7ec8..7ab2f5bc50 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -28,7 +28,6 @@
 #include "io/cache/block/block_file_cache_profile.h"
 #include "olap/iterators.h"
 #include "runtime/descriptors.h"
-#include "runtime/raw_value.h"
 #include "runtime/runtime_state.h"
 #include "vec/exec/format/csv/csv_reader.h"
 #include "vec/exec/format/json/new_json_reader.h"
diff --git a/be/src/vec/runtime/vdata_stream_mgr.cpp 
b/be/src/vec/runtime/vdata_stream_mgr.cpp
index 282d641431..09c9cd77c7 100644
--- a/be/src/vec/runtime/vdata_stream_mgr.cpp
+++ b/be/src/vec/runtime/vdata_stream_mgr.cpp
@@ -20,7 +20,6 @@
 #include "gen_cpp/internal_service.pb.h"
 #include "runtime/descriptors.h"
 #include "runtime/primitive_type.h"
-#include "runtime/raw_value.h"
 #include "runtime/runtime_state.h"
 #include "util/doris_metrics.h"
 #include "util/runtime_profile.h"
@@ -39,9 +38,9 @@ VDataStreamMgr::~VDataStreamMgr() {
 
 inline uint32_t VDataStreamMgr::get_hash_value(const TUniqueId& 
fragment_instance_id,
                                                PlanNodeId node_id) {
-    uint32_t value = RawValue::get_hash_value(&fragment_instance_id.lo, 
TYPE_BIGINT, 0);
-    value = RawValue::get_hash_value(&fragment_instance_id.hi, TYPE_BIGINT, 
value);
-    value = RawValue::get_hash_value(&node_id, TYPE_INT, value);
+    uint32_t value = HashUtil::hash(&fragment_instance_id.lo, 8, 0);
+    value = HashUtil::hash(&fragment_instance_id.hi, 8, value);
+    value = HashUtil::hash(&node_id, 4, value);
     return value;
 }
 
diff --git a/be/src/vec/utils/arrow_column_to_doris_column.cpp 
b/be/src/vec/utils/arrow_column_to_doris_column.cpp
index 35d5d4956a..c0d64df211 100644
--- a/be/src/vec/utils/arrow_column_to_doris_column.cpp
+++ b/be/src/vec/utils/arrow_column_to_doris_column.cpp
@@ -408,59 +408,4 @@ Status arrow_column_to_doris_column(const arrow::Array* 
arrow_column, size_t arr
             fmt::format("Not support arrow type:{}", 
arrow_column->type()->name()));
 }
 
-Status arrow_type_to_doris_type(arrow::Type::type type, TypeDescriptor* 
return_type) {
-    switch (type) {
-    case arrow::Type::STRING:
-    case arrow::Type::BINARY:
-    case arrow::Type::FIXED_SIZE_BINARY:
-        return_type->type = TYPE_STRING;
-        break;
-    case arrow::Type::INT8:
-        return_type->type = TYPE_TINYINT;
-        break;
-    case arrow::Type::UINT8:
-    case arrow::Type::INT16:
-        return_type->type = TYPE_SMALLINT;
-        break;
-    case arrow::Type::UINT16:
-    case arrow::Type::INT32:
-        return_type->type = TYPE_INT;
-        break;
-    case arrow::Type::UINT32:
-    case arrow::Type::INT64:
-        return_type->type = TYPE_BIGINT;
-        break;
-    case arrow::Type::UINT64:
-        return_type->type = TYPE_LARGEINT;
-        break;
-    case arrow::Type::HALF_FLOAT:
-    case arrow::Type::FLOAT:
-        return_type->type = TYPE_FLOAT;
-        break;
-    case arrow::Type::DOUBLE:
-        return_type->type = TYPE_DOUBLE;
-        break;
-    case arrow::Type::BOOL:
-        return_type->type = TYPE_BOOLEAN;
-        break;
-    case arrow::Type::DATE32:
-        return_type->type = TYPE_DATEV2;
-        break;
-    case arrow::Type::DATE64:
-        return_type->type = TYPE_DATETIMEV2;
-        break;
-    case arrow::Type::TIMESTAMP:
-        return_type->type = TYPE_BIGINT;
-        break;
-    case arrow::Type::DECIMAL:
-        return_type->type = TYPE_DECIMALV2;
-        return_type->precision = 27;
-        return_type->scale = 9;
-        break;
-    default:
-        return Status::InternalError("unsupport type: {}", type);
-    }
-    return Status::OK();
-}
-
 } // namespace doris::vectorized
diff --git a/be/src/vec/utils/arrow_column_to_doris_column.h 
b/be/src/vec/utils/arrow_column_to_doris_column.h
index 84e26a7011..73704c95f5 100644
--- a/be/src/vec/utils/arrow_column_to_doris_column.h
+++ b/be/src/vec/utils/arrow_column_to_doris_column.h
@@ -42,6 +42,4 @@ Status arrow_column_to_doris_column(const arrow::Array* 
arrow_column, size_t arr
                                     ColumnPtr& doris_column, const 
DataTypePtr& type,
                                     size_t num_elements, const 
cctz::time_zone& ctz);
 
-Status arrow_type_to_doris_type(arrow::Type::type type, TypeDescriptor* 
return_type);
-
 } // namespace doris::vectorized
diff --git a/be/test/olap/row_cursor_test.cpp b/be/test/olap/row_cursor_test.cpp
index 5899f557f3..9ee6ae7943 100644
--- a/be/test/olap/row_cursor_test.cpp
+++ b/be/test/olap/row_cursor_test.cpp
@@ -286,8 +286,6 @@ TEST_F(TestRowCursor, InitRowCursorWithColumnCount) {
     EXPECT_EQ(res, Status::OK());
     EXPECT_EQ(row.get_fixed_len(), 23);
     EXPECT_EQ(row.get_variable_len(), 0);
-    row.allocate_memory_for_string_type(tablet_schema);
-    EXPECT_EQ(row.get_variable_len(), 0);
 }
 
 TEST_F(TestRowCursor, InitRowCursorWithColIds) {
diff --git a/be/test/vec/exec/parquet/parquet_thrift_test.cpp 
b/be/test/vec/exec/parquet/parquet_thrift_test.cpp
index 9426505489..484d5adbd6 100644
--- a/be/test/vec/exec/parquet/parquet_thrift_test.cpp
+++ b/be/test/vec/exec/parquet/parquet_thrift_test.cpp
@@ -26,6 +26,7 @@
 #include "io/buffered_reader.h"
 #include "io/fs/local_file_system.h"
 #include "olap/iterators.h"
+#include "runtime/descriptors.h"
 #include "util/runtime_profile.h"
 #include "util/timezone_utils.h"
 #include "vec/common/string_ref.h"
@@ -226,6 +227,74 @@ static Status get_column_values(io::FileReaderSPtr 
file_reader, tparquet::Column
     }
 }
 
+// Only the unit test depend on this, but it is wrong, should not use 
TTupleDesc to create tuple desc, not
+// use columndesc
+static doris::TupleDescriptor* create_tuple_desc(
+        doris::ObjectPool* pool, 
std::vector<doris::SchemaScanner::ColumnDesc>& column_descs) {
+    using namespace doris;
+    int null_column = 0;
+    for (int i = 0; i < column_descs.size(); ++i) {
+        if (column_descs[i].is_null) {
+            null_column++;
+        }
+    }
+
+    int offset = (null_column + 7) / 8;
+    std::vector<SlotDescriptor*> slots;
+    int null_byte = 0;
+    int null_bit = 0;
+
+    for (int i = 0; i < column_descs.size(); ++i) {
+        TSlotDescriptor t_slot_desc;
+        if (column_descs[i].type == TYPE_DECIMALV2) {
+            
t_slot_desc.__set_slotType(TypeDescriptor::create_decimalv2_type(27, 
9).to_thrift());
+        } else {
+            TypeDescriptor descriptor(column_descs[i].type);
+            if (column_descs[i].precision >= 0 && column_descs[i].scale >= 0) {
+                descriptor.precision = column_descs[i].precision;
+                descriptor.scale = column_descs[i].scale;
+            }
+            t_slot_desc.__set_slotType(descriptor.to_thrift());
+        }
+        t_slot_desc.__set_colName(column_descs[i].name);
+        t_slot_desc.__set_columnPos(i);
+        t_slot_desc.__set_byteOffset(offset);
+
+        if (column_descs[i].is_null) {
+            t_slot_desc.__set_nullIndicatorByte(null_byte);
+            t_slot_desc.__set_nullIndicatorBit(null_bit);
+            null_bit = (null_bit + 1) % 8;
+
+            if (0 == null_bit) {
+                null_byte++;
+            }
+        } else {
+            t_slot_desc.__set_nullIndicatorByte(0);
+            t_slot_desc.__set_nullIndicatorBit(-1);
+        }
+
+        t_slot_desc.id = i;
+        t_slot_desc.__set_slotIdx(i);
+        t_slot_desc.__set_isMaterialized(true);
+
+        SlotDescriptor* slot = pool->add(new (std::nothrow) 
SlotDescriptor(t_slot_desc));
+        slots.push_back(slot);
+        offset += column_descs[i].size;
+    }
+
+    TTupleDescriptor t_tuple_desc;
+    t_tuple_desc.__set_byteSize(offset);
+    t_tuple_desc.__set_numNullBytes((null_byte * 8 + null_bit + 7) / 8);
+    doris::TupleDescriptor* tuple_desc =
+            pool->add(new (std::nothrow) doris::TupleDescriptor(t_tuple_desc));
+
+    for (int i = 0; i < slots.size(); ++i) {
+        tuple_desc->add_slot(slots[i]);
+    }
+
+    return tuple_desc;
+}
+
 static void create_block(std::unique_ptr<vectorized::Block>& block) {
     // Current supported column type:
     std::vector<SchemaScanner::ColumnDesc> column_descs = {
@@ -247,11 +316,9 @@ static void 
create_block(std::unique_ptr<vectorized::Block>& block) {
             {"date_col", TYPE_DATE, sizeof(DateTimeValue), true},
             {"date_v2_col", TYPE_DATEV2, sizeof(uint32_t), true},
             {"timestamp_v2_col", TYPE_DATETIMEV2, sizeof(DateTimeValue), true, 
18, 0}};
-    SchemaScanner schema_scanner(column_descs);
     ObjectPool object_pool;
-    SchemaScannerParam param;
-    schema_scanner.init(&param, &object_pool);
-    auto tuple_slots = 
const_cast<TupleDescriptor*>(schema_scanner.tuple_desc())->slots();
+    doris::TupleDescriptor* tuple_desc = create_tuple_desc(&object_pool, 
column_descs);
+    auto tuple_slots = tuple_desc->slots();
     block.reset(new vectorized::Block());
     for (const auto& slot_desc : tuple_slots) {
         auto data_type = slot_desc->get_data_type_ptr();
@@ -347,7 +414,7 @@ TEST_F(ParquetThriftReaderTest, dict_decoder) {
 }
 
 TEST_F(ParquetThriftReaderTest, group_reader) {
-    std::vector<SchemaScanner::ColumnDesc> column_descs = {
+    std::vector<doris::SchemaScanner::ColumnDesc> column_descs = {
             {"tinyint_col", TYPE_TINYINT, sizeof(int8_t), true},
             {"smallint_col", TYPE_SMALLINT, sizeof(int16_t), true},
             {"int_col", TYPE_INT, sizeof(int32_t), true},
@@ -362,11 +429,9 @@ TEST_F(ParquetThriftReaderTest, group_reader) {
             {"char_col", TYPE_CHAR, sizeof(StringRef), true},
             {"varchar_col", TYPE_VARCHAR, sizeof(StringRef), true},
             {"date_col", TYPE_DATE, sizeof(DateTimeValue), true}};
-    SchemaScanner schema_scanner(column_descs);
     ObjectPool object_pool;
-    SchemaScannerParam param;
-    schema_scanner.init(&param, &object_pool);
-    auto tuple_slots = 
const_cast<TupleDescriptor*>(schema_scanner.tuple_desc())->slots();
+    doris::TupleDescriptor* tuple_desc = create_tuple_desc(&object_pool, 
column_descs);
+    auto tuple_slots = tuple_desc->slots();
 
     TSlotDescriptor tslot_desc;
     {
diff --git a/be/test/vec/exprs/vexpr_test.cpp b/be/test/vec/exprs/vexpr_test.cpp
index 0e91ba42e3..45b7ca5025 100644
--- a/be/test/vec/exprs/vexpr_test.cpp
+++ b/be/test/vec/exprs/vexpr_test.cpp
@@ -26,6 +26,7 @@
 #include "exec/schema_scanner.h"
 #include "gen_cpp/Exprs_types.h"
 #include "gen_cpp/Types_types.h"
+#include "runtime/descriptors.h"
 #include "runtime/exec_env.h"
 #include "runtime/large_int_value.h"
 #include "runtime/memory/chunk_allocator.h"
@@ -63,15 +64,80 @@ TEST(TEST_VEXPR, ABSTEST) {
     context->close(&runtime_stat);
 }
 
+// Only the unit test depend on this, but it is wrong, should not use 
TTupleDesc to create tuple desc, not
+// use columndesc
+static doris::TupleDescriptor* create_tuple_desc(
+        doris::ObjectPool* pool, 
std::vector<doris::SchemaScanner::ColumnDesc>& column_descs) {
+    using namespace doris;
+    int null_column = 0;
+    for (int i = 0; i < column_descs.size(); ++i) {
+        if (column_descs[i].is_null) {
+            null_column++;
+        }
+    }
+
+    int offset = (null_column + 7) / 8;
+    std::vector<SlotDescriptor*> slots;
+    int null_byte = 0;
+    int null_bit = 0;
+
+    for (int i = 0; i < column_descs.size(); ++i) {
+        TSlotDescriptor t_slot_desc;
+        if (column_descs[i].type == TYPE_DECIMALV2) {
+            
t_slot_desc.__set_slotType(TypeDescriptor::create_decimalv2_type(27, 
9).to_thrift());
+        } else {
+            TypeDescriptor descriptor(column_descs[i].type);
+            if (column_descs[i].precision >= 0 && column_descs[i].scale >= 0) {
+                descriptor.precision = column_descs[i].precision;
+                descriptor.scale = column_descs[i].scale;
+            }
+            t_slot_desc.__set_slotType(descriptor.to_thrift());
+        }
+        t_slot_desc.__set_colName(column_descs[i].name);
+        t_slot_desc.__set_columnPos(i);
+        t_slot_desc.__set_byteOffset(offset);
+
+        if (column_descs[i].is_null) {
+            t_slot_desc.__set_nullIndicatorByte(null_byte);
+            t_slot_desc.__set_nullIndicatorBit(null_bit);
+            null_bit = (null_bit + 1) % 8;
+
+            if (0 == null_bit) {
+                null_byte++;
+            }
+        } else {
+            t_slot_desc.__set_nullIndicatorByte(0);
+            t_slot_desc.__set_nullIndicatorBit(-1);
+        }
+
+        t_slot_desc.id = i;
+        t_slot_desc.__set_slotIdx(i);
+        t_slot_desc.__set_isMaterialized(true);
+
+        SlotDescriptor* slot = pool->add(new (std::nothrow) 
SlotDescriptor(t_slot_desc));
+        slots.push_back(slot);
+        offset += column_descs[i].size;
+    }
+
+    TTupleDescriptor t_tuple_desc;
+    t_tuple_desc.__set_byteSize(offset);
+    t_tuple_desc.__set_numNullBytes((null_byte * 8 + null_bit + 7) / 8);
+    doris::TupleDescriptor* tuple_desc =
+            pool->add(new (std::nothrow) doris::TupleDescriptor(t_tuple_desc));
+
+    for (int i = 0; i < slots.size(); ++i) {
+        tuple_desc->add_slot(slots[i]);
+    }
+
+    return tuple_desc;
+}
+
 TEST(TEST_VEXPR, ABSTEST2) {
     using namespace doris;
-    std::vector<SchemaScanner::ColumnDesc> column_descs = {
+    std::vector<doris::SchemaScanner::ColumnDesc> column_descs = {
             {"k1", TYPE_INT, sizeof(int32_t), false}};
-    SchemaScanner schema_scanner(column_descs);
     ObjectPool object_pool;
-    SchemaScannerParam param;
-    schema_scanner.init(&param, &object_pool);
-    auto tuple_desc = 
const_cast<TupleDescriptor*>(schema_scanner.tuple_desc());
+    doris::TupleDescriptor* tuple_desc = create_tuple_desc(&object_pool, 
column_descs);
     RowDescriptor row_desc(tuple_desc, false);
     std::string expr_json =
             
R"|({"1":{"lst":["rec",2,{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"abs"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"5":{"tf":0},"7":{"str":"abs(INT)"},"9":{"rec":{"1":{"str":"_ZN5doris13MathFunctions3absEPN9doris_ud
 [...]


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

[doris] branch master updated: [unify type system](remove unused type desc) remove some code (#17921)

Reply via email to