This is an automated email from the ASF dual-hosted git repository. gabriellee pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new d913ca5731 [Opt](vectorized) Speed up bucket shuffle join hash compute (#12407) d913ca5731 is described below commit d913ca573116139b4cb95f6f649ce2a4b3870788 Author: HappenLee <happen...@hotmail.com> AuthorDate: Tue Sep 13 20:19:22 2022 +0800 [Opt](vectorized) Speed up bucket shuffle join hash compute (#12407) * [Opt](vectorized) Speed up bucket shuffle join hash compute --- be/src/exec/tablet_info.cpp | 12 ++--- be/src/runtime/data_stream_sender.cpp | 5 +- be/src/runtime/define_primitive_type.h | 58 ++++++++++++++++++++++ be/src/runtime/primitive_type.h | 38 +------------- be/src/runtime/raw_value.h | 27 ++-------- be/src/util/hash_util.hpp | 7 +++ be/src/vec/columns/column.h | 21 ++++++++ be/src/vec/columns/column_const.cpp | 18 +++++++ be/src/vec/columns/column_const.h | 3 ++ be/src/vec/columns/column_decimal.cpp | 33 ++++++++++++ be/src/vec/columns/column_decimal.h | 2 + be/src/vec/columns/column_nullable.cpp | 19 +++++++ be/src/vec/columns/column_nullable.h | 2 + be/src/vec/columns/column_string.cpp | 20 ++++++++ be/src/vec/columns/column_string.h | 3 ++ be/src/vec/columns/column_vector.cpp | 33 +++++++++++- be/src/vec/columns/column_vector.h | 3 ++ be/src/vec/sink/vdata_stream_sender.cpp | 18 ++----- .../org/apache/doris/analysis/DateLiteral.java | 28 ++++++++--- .../org/apache/doris/analysis/DecimalLiteral.java | 16 ++++-- .../org/apache/doris/analysis/LargeIntLiteral.java | 6 +++ 21 files changed, 274 insertions(+), 98 deletions(-) diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index c89e03f0be..08e77d9565 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -220,10 +220,7 @@ Status OlapTablePartitionParam::init() { if (slot != nullptr) { hash_val = RawValue::zlib_crc32(slot, slot_desc->type(), hash_val); } else { - //nullptr is treat as 0 when hash - static const int INT_VALUE = 0; - static const TypeDescriptor INT_TYPE(TYPE_INT); - hash_val = RawValue::zlib_crc32(&INT_VALUE, INT_TYPE, hash_val); + hash_val = HashUtil::zlib_crc_hash_null(hash_val); } } return hash_val % num_buckets; @@ -492,16 +489,13 @@ Status VOlapTablePartitionParam::init() { uint32_t hash_val = 0; for (int i = 0; i < _distributed_slot_locs.size(); ++i) { auto slot_desc = _slots[_distributed_slot_locs[i]]; - auto column = key->first->get_by_position(_distributed_slot_locs[i]).column; + auto& column = key->first->get_by_position(_distributed_slot_locs[i]).column; auto val = column->get_data_at(key->second); if (val.data != nullptr) { hash_val = RawValue::zlib_crc32(val.data, val.size, slot_desc->type().type, hash_val); } else { - // NULL is treat as 0 when hash - static const int INT_VALUE = 0; - static const TypeDescriptor INT_TYPE(TYPE_INT); - hash_val = RawValue::zlib_crc32(&INT_VALUE, INT_TYPE, hash_val); + hash_val = HashUtil::zlib_crc_hash_null(hash_val); } } return hash_val % num_buckets; diff --git a/be/src/runtime/data_stream_sender.cpp b/be/src/runtime/data_stream_sender.cpp index 13b2308d7b..38d5aa1d51 100644 --- a/be/src/runtime/data_stream_sender.cpp +++ b/be/src/runtime/data_stream_sender.cpp @@ -626,10 +626,7 @@ Status DataStreamSender::process_distribute(RuntimeState* state, TupleRow* row, if (partition_val != nullptr) { hash_val = RawValue::zlib_crc32(partition_val, ctx->root()->type(), hash_val); } else { - //nullptr is treat as 0 when hash - static const int INT_VALUE = 0; - static const TypeDescriptor INT_TYPE(TYPE_INT); - hash_val = RawValue::zlib_crc32(&INT_VALUE, INT_TYPE, hash_val); + hash_val = HashUtil::zlib_crc_hash_null(hash_val); } } hash_val %= part->distributed_bucket(); diff --git a/be/src/runtime/define_primitive_type.h b/be/src/runtime/define_primitive_type.h new file mode 100644 index 0000000000..aa5e140a6e --- /dev/null +++ b/be/src/runtime/define_primitive_type.h @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +namespace doris { +enum PrimitiveType { + INVALID_TYPE = 0, + TYPE_NULL, /* 1 */ + TYPE_BOOLEAN, /* 2 */ + TYPE_TINYINT, /* 3 */ + TYPE_SMALLINT, /* 4 */ + TYPE_INT, /* 5 */ + TYPE_BIGINT, /* 6 */ + TYPE_LARGEINT, /* 7 */ + TYPE_FLOAT, /* 8 */ + TYPE_DOUBLE, /* 9 */ + TYPE_VARCHAR, /* 10 */ + TYPE_DATE, /* 11 */ + TYPE_DATETIME, /* 12 */ + TYPE_BINARY, + /* 13 */ // Not implemented + TYPE_DECIMAL [[deprecated]], /* 14 */ + TYPE_CHAR, /* 15 */ + + TYPE_STRUCT, /* 16 */ + TYPE_ARRAY, /* 17 */ + TYPE_MAP, /* 18 */ + TYPE_HLL, /* 19 */ + TYPE_DECIMALV2, /* 20 */ + + TYPE_TIME, /* 21 */ + TYPE_OBJECT, /* 22 */ + TYPE_STRING, /* 23 */ + TYPE_QUANTILE_STATE, /* 24 */ + TYPE_DATEV2, /* 25 */ + TYPE_DATETIMEV2, /* 26 */ + TYPE_TIMEV2, /* 27 */ + TYPE_DECIMAL32, /* 28 */ + TYPE_DECIMAL64, /* 29 */ + TYPE_DECIMAL128, /* 30 */ +}; + +} diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h index 7fc6a728de..45efe0336c 100644 --- a/be/src/runtime/primitive_type.h +++ b/be/src/runtime/primitive_type.h @@ -19,6 +19,7 @@ #include <string> +#include "runtime/define_primitive_type.h" #include "vec/columns/column_decimal.h" #include "vec/columns/columns_number.h" #include "vec/core/types.h" @@ -33,43 +34,6 @@ class DateTimeValue; class DecimalV2Value; struct StringValue; -enum PrimitiveType { - INVALID_TYPE = 0, - TYPE_NULL, /* 1 */ - TYPE_BOOLEAN, /* 2 */ - TYPE_TINYINT, /* 3 */ - TYPE_SMALLINT, /* 4 */ - TYPE_INT, /* 5 */ - TYPE_BIGINT, /* 6 */ - TYPE_LARGEINT, /* 7 */ - TYPE_FLOAT, /* 8 */ - TYPE_DOUBLE, /* 9 */ - TYPE_VARCHAR, /* 10 */ - TYPE_DATE, /* 11 */ - TYPE_DATETIME, /* 12 */ - TYPE_BINARY, - /* 13 */ // Not implemented - TYPE_DECIMAL [[deprecated]], /* 14 */ - TYPE_CHAR, /* 15 */ - - TYPE_STRUCT, /* 16 */ - TYPE_ARRAY, /* 17 */ - TYPE_MAP, /* 18 */ - TYPE_HLL, /* 19 */ - TYPE_DECIMALV2, /* 20 */ - - TYPE_TIME, /* 21 */ - TYPE_OBJECT, /* 22 */ - TYPE_STRING, /* 23 */ - TYPE_QUANTILE_STATE, /* 24 */ - TYPE_DATEV2, /* 25 */ - TYPE_DATETIMEV2, /* 26 */ - TYPE_TIMEV2, /* 27 */ - TYPE_DECIMAL32, /* 28 */ - TYPE_DECIMAL64, /* 29 */ - TYPE_DECIMAL128, /* 30 */ -}; - PrimitiveType convert_type_to_primitive(FunctionContext::Type type); bool is_enumeration_type(PrimitiveType type); diff --git a/be/src/runtime/raw_value.h b/be/src/runtime/raw_value.h index 7c5f990061..990e8b9015 100644 --- a/be/src/runtime/raw_value.h +++ b/be/src/runtime/raw_value.h @@ -442,10 +442,15 @@ inline uint32_t RawValue::zlib_crc32(const void* v, const TypeDescriptor& type, case TYPE_SMALLINT: return HashUtil::zlib_crc_hash(v, 2, seed); case TYPE_INT: + case TYPE_DATEV2: + case TYPE_DECIMAL32: return HashUtil::zlib_crc_hash(v, 4, seed); case TYPE_BIGINT: + case TYPE_DATETIMEV2: + case TYPE_DECIMAL64: return HashUtil::zlib_crc_hash(v, 8, seed); case TYPE_LARGEINT: + case TYPE_DECIMAL128: return HashUtil::zlib_crc_hash(v, 16, seed); case TYPE_FLOAT: return HashUtil::zlib_crc_hash(v, 4, seed); @@ -458,21 +463,6 @@ inline uint32_t RawValue::zlib_crc32(const void* v, const TypeDescriptor& type, int len = date_val->to_buffer(buf); return HashUtil::zlib_crc_hash(buf, len, seed); } - case TYPE_DATEV2: { - const vectorized::DateV2Value<doris::vectorized::DateV2ValueType>* date_v2_val = - (const vectorized::DateV2Value<doris::vectorized::DateV2ValueType>*)v; - char buf[64]; - int len = date_v2_val->to_buffer(buf); - return HashUtil::zlib_crc_hash(buf, len, seed); - } - - case TYPE_DATETIMEV2: { - const vectorized::DateV2Value<doris::vectorized::DateTimeV2ValueType>* date_v2_val = - (const vectorized::DateV2Value<doris::vectorized::DateTimeV2ValueType>*)v; - char buf[64]; - int len = date_v2_val->to_buffer(buf); - return HashUtil::zlib_crc_hash(buf, len, seed); - } case TYPE_DECIMALV2: { const DecimalV2Value* dec_val = (const DecimalV2Value*)v; @@ -481,13 +471,6 @@ inline uint32_t RawValue::zlib_crc32(const void* v, const TypeDescriptor& type, seed = HashUtil::zlib_crc_hash(&int_val, sizeof(int_val), seed); return HashUtil::zlib_crc_hash(&frac_val, sizeof(frac_val), seed); } - - case TYPE_DECIMAL32: - return HashUtil::zlib_crc_hash(v, 4, seed); - case TYPE_DECIMAL64: - return HashUtil::zlib_crc_hash(v, 8, seed); - case TYPE_DECIMAL128: - return HashUtil::zlib_crc_hash(v, 16, seed); default: DCHECK(false) << "invalid type: " << type; return 0; diff --git a/be/src/util/hash_util.hpp b/be/src/util/hash_util.hpp index 3a7ac11704..0ebcc6e11b 100644 --- a/be/src/util/hash_util.hpp +++ b/be/src/util/hash_util.hpp @@ -47,6 +47,13 @@ public: static uint32_t zlib_crc_hash(const void* data, int32_t bytes, uint32_t hash) { return crc32(hash, (const unsigned char*)data, bytes); } + + static uint32_t zlib_crc_hash_null(uint32_t hash) { + // null is treat as 0 when hash + static const int INT_VALUE = 0; + return crc32(hash, (const unsigned char*)(&INT_VALUE), 4); + } + #if defined(__SSE4_2__) || defined(__aarch64__) // Compute the Crc32 hash for data using SSE4 instructions. The input hash parameter is // the current hash/seed value. diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index 71d737274c..6d085fa7c7 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -20,6 +20,7 @@ #pragma once +#include "runtime/define_primitive_type.h" #include "vec/common/cow.h" #include "vec/common/exception.h" #include "vec/common/pod_array_fwd.h" @@ -43,6 +44,18 @@ class SipHash; } \ } +#define DO_CRC_HASHES_FUNCTION_COLUMN_IMPL() \ + if (null_data == nullptr) { \ + for (size_t i = 0; i < s; i++) { \ + hashes[i] = HashUtil::zlib_crc_hash(&data[i], sizeof(T), hashes[i]); \ + } \ + } else { \ + for (size_t i = 0; i < s; i++) { \ + if (null_data[i] == 0) \ + hashes[i] = HashUtil::zlib_crc_hash(&data[i], sizeof(T), hashes[i]); \ + } \ + } + namespace doris::vectorized { class Arena; @@ -322,6 +335,14 @@ public: LOG(FATAL) << "update_hashes_with_value not supported"; }; + /// Update state of crc32 hash function with value of n elements to avoid the virtual function call + /// null_data to mark whether need to do hash compute, null_data == nullptr + /// means all element need to do hash function, else only *null_data != 0 need to do hash func + virtual void update_crcs_with_value(std::vector<uint32_t>& hash, PrimitiveType type, + const uint8_t* __restrict null_data = nullptr) const { + LOG(FATAL) << "update_crcs_with_value not supported"; + }; + /** Removes elements that don't match the filter. * Is used in WHERE and HAVING operations. * If result_size_hint > 0, then makes advance reserve(result_size_hint) for the result column; diff --git a/be/src/vec/columns/column_const.cpp b/be/src/vec/columns/column_const.cpp index 946074cf61..4bd72ccf8d 100644 --- a/be/src/vec/columns/column_const.cpp +++ b/be/src/vec/columns/column_const.cpp @@ -20,6 +20,7 @@ #include "vec/columns/column_const.h" +#include "runtime/raw_value.h" #include "vec/columns/columns_common.h" #include "vec/common/pod_array.h" #include "vec/common/sip_hash.h" @@ -103,6 +104,23 @@ void ColumnConst::update_hashes_with_value(std::vector<SipHash>& hashes, } } +void ColumnConst::update_crcs_with_value(std::vector<uint32_t>& hashes, doris::PrimitiveType type, + const uint8_t* __restrict null_data) const { + DCHECK(null_data == nullptr); + DCHECK(hashes.size() == size()); + auto real_data = data->get_data_at(0); + if (real_data.data == nullptr) { + for (int i = 0; i < hashes.size(); ++i) { + hashes[i] = HashUtil::zlib_crc_hash_null(hashes[i]); + } + } else { + for (int i = 0; i < hashes.size(); ++i) { + hashes[i] = RawValue::zlib_crc32(real_data.data, real_data.size, TypeDescriptor {type}, + hashes[i]); + } + } +} + MutableColumns ColumnConst::scatter(ColumnIndex num_columns, const Selector& selector) const { if (s != selector.size()) { LOG(FATAL) << fmt::format("Size of selector ({}) doesn't match size of column ({})", diff --git a/be/src/vec/columns/column_const.h b/be/src/vec/columns/column_const.h index be7f56ab23..422316075b 100644 --- a/be/src/vec/columns/column_const.h +++ b/be/src/vec/columns/column_const.h @@ -135,6 +135,9 @@ public: void update_hashes_with_value(std::vector<SipHash>& hashes, const uint8_t* __restrict null_data) const override; + void update_crcs_with_value(std::vector<uint32_t>& hashes, PrimitiveType type, + const uint8_t* __restrict null_data) const override; + ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; ColumnPtr replicate(const Offsets& offsets) const override; void replicate(const uint32_t* counts, size_t target_size, IColumn& column) const override; diff --git a/be/src/vec/columns/column_decimal.cpp b/be/src/vec/columns/column_decimal.cpp index ea904c8c30..9b6470a371 100644 --- a/be/src/vec/columns/column_decimal.cpp +++ b/be/src/vec/columns/column_decimal.cpp @@ -127,6 +127,39 @@ void ColumnDecimal<T>::update_hashes_with_value(std::vector<SipHash>& hashes, SIP_HASHES_FUNCTION_COLUMN_IMPL(); } +template <typename T> +void ColumnDecimal<T>::update_crcs_with_value(std::vector<uint32_t>& hashes, PrimitiveType type, + const uint8_t* __restrict null_data) const { + auto s = hashes.size(); + DCHECK(s == size()); + + if constexpr (!std::is_same_v<T, Decimal128>) { + DO_CRC_HASHES_FUNCTION_COLUMN_IMPL() + } else { + if (type == TYPE_DECIMALV2) { + auto decimalv2_do_crc = [&](size_t i) { + const DecimalV2Value& dec_val = (const DecimalV2Value&)data[i]; + int64_t int_val = dec_val.int_value(); + int32_t frac_val = dec_val.frac_value(); + hashes[i] = HashUtil::zlib_crc_hash(&int_val, sizeof(int_val), hashes[i]); + hashes[i] = HashUtil::zlib_crc_hash(&frac_val, sizeof(frac_val), hashes[i]); + }; + + if (null_data == nullptr) { + for (size_t i = 0; i < s; i++) { + decimalv2_do_crc(i); + } + } else { + for (size_t i = 0; i < s; i++) { + if (null_data[i] == 0) decimalv2_do_crc(i); + } + } + } else { + DO_CRC_HASHES_FUNCTION_COLUMN_IMPL() + } + } +} + template <typename T> void ColumnDecimal<T>::get_permutation(bool reverse, size_t limit, int, IColumn::Permutation& res) const { diff --git a/be/src/vec/columns/column_decimal.h b/be/src/vec/columns/column_decimal.h index 81e037278c..41dba1827d 100644 --- a/be/src/vec/columns/column_decimal.h +++ b/be/src/vec/columns/column_decimal.h @@ -156,6 +156,8 @@ public: void update_hash_with_value(size_t n, SipHash& hash) const override; void update_hashes_with_value(std::vector<SipHash>& hash, const uint8_t* __restrict null_data) const override; + void update_crcs_with_value(std::vector<uint32_t>& hashes, PrimitiveType type, + const uint8_t* __restrict null_data) const override; int compare_at(size_t n, size_t m, const IColumn& rhs_, int nan_direction_hint) const override; void get_permutation(bool reverse, size_t limit, int nan_direction_hint, diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp index 4f18163479..29b1887421 100644 --- a/be/src/vec/columns/column_nullable.cpp +++ b/be/src/vec/columns/column_nullable.cpp @@ -68,6 +68,25 @@ void ColumnNullable::update_hashes_with_value(std::vector<SipHash>& hashes, } } +void ColumnNullable::update_crcs_with_value(std::vector<uint32_t>& hashes, + doris::PrimitiveType type, + const uint8_t* __restrict null_data) const { + DCHECK(null_data == nullptr); + auto s = hashes.size(); + DCHECK(s == size()); + auto* __restrict real_null_data = assert_cast<const ColumnUInt8&>(*null_map).get_data().data(); + if (!has_null()) { + nested_column->update_crcs_with_value(hashes, type, nullptr); + } else { + for (int i = 0; i < s; ++i) { + if (real_null_data[i] != 0) { + hashes[i] = HashUtil::zlib_crc_hash_null(hashes[i]); + } + } + nested_column->update_crcs_with_value(hashes, type, real_null_data); + } +} + MutableColumnPtr ColumnNullable::clone_resized(size_t new_size) const { MutableColumnPtr new_nested_col = get_nested_column().clone_resized(new_size); auto new_null_map = ColumnUInt8::create(); diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index 2a8bf52dd2..523ff337ae 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -161,6 +161,8 @@ public: void update_hash_with_value(size_t n, SipHash& hash) const override; void update_hashes_with_value(std::vector<SipHash>& hashes, const uint8_t* __restrict null_data) const override; + void update_crcs_with_value(std::vector<uint32_t>& hash, PrimitiveType type, + const uint8_t* __restrict null_data) const override; void get_extremes(Field& min, Field& max) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) const override { diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index 18f0d74d23..20f0d3c534 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -104,6 +104,26 @@ void ColumnString::insert_indices_from(const IColumn& src, const int* indices_be } } +void ColumnString::update_crcs_with_value(std::vector<uint32_t>& hashes, doris::PrimitiveType type, + const uint8_t* __restrict null_data) const { + auto s = hashes.size(); + DCHECK(s == size()); + + if (null_data == nullptr) { + for (size_t i = 0; i < s; i++) { + auto data_ref = get_data_at(i); + hashes[i] = HashUtil::zlib_crc_hash(data_ref.data, data_ref.size, hashes[i]); + } + } else { + for (size_t i = 0; i < s; i++) { + if (null_data[i] == 0) { + auto data_ref = get_data_at(i); + hashes[i] = HashUtil::zlib_crc_hash(data_ref.data, data_ref.size, hashes[i]); + } + } + } +} + ColumnPtr ColumnString::filter(const Filter& filt, ssize_t result_size_hint) const { if (offsets.size() == 0) return ColumnString::create(); diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h index 8c504c6d08..ee8e6cfdd3 100644 --- a/be/src/vec/columns/column_string.h +++ b/be/src/vec/columns/column_string.h @@ -256,6 +256,9 @@ public: SIP_HASHES_FUNCTION_COLUMN_IMPL(); } + void update_crcs_with_value(std::vector<uint32_t>& hashes, PrimitiveType type, + const uint8_t* __restrict null_data) const override; + void insert_range_from(const IColumn& src, size_t start, size_t length) override; void insert_indices_from(const IColumn& src, const int* indices_begin, diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp index d60f8816a1..1678e5d7f4 100644 --- a/be/src/vec/columns/column_vector.cpp +++ b/be/src/vec/columns/column_vector.cpp @@ -26,7 +26,6 @@ #include <cmath> #include <cstring> -#include "runtime/datetime_value.h" #include "util/simd/bits.h" #include "vec/common/arena.h" #include "vec/common/assert_cast.h" @@ -119,6 +118,38 @@ void ColumnVector<T>::sort_column(const ColumnSorter* sorter, EqualFlags& flags, sorter->template sort_column(static_cast<const Self&>(*this), flags, perms, range, last_column); } +template <typename T> +void ColumnVector<T>::update_crcs_with_value(std::vector<uint32_t>& hashes, PrimitiveType type, + const uint8_t* __restrict null_data) const { + auto s = hashes.size(); + DCHECK(s == size()); + + if constexpr (!std::is_same_v<T, Int64>) { + DO_CRC_HASHES_FUNCTION_COLUMN_IMPL() + } else { + if (type == TYPE_DATE || type == TYPE_DATETIME) { + char buf[64]; + auto date_convert_do_crc = [&](size_t i) { + const DateTimeValue& date_val = (const DateTimeValue&)data[i]; + auto len = date_val.to_buffer(buf); + hashes[i] = HashUtil::zlib_crc_hash(buf, len, hashes[i]); + }; + + if (null_data == nullptr) { + for (size_t i = 0; i < s; i++) { + date_convert_do_crc(i); + } + } else { + for (size_t i = 0; i < s; i++) { + if (null_data[i] == 0) date_convert_do_crc(i); + } + } + } else { + DO_CRC_HASHES_FUNCTION_COLUMN_IMPL() + } + } +} + template <typename T> struct ColumnVector<T>::less { const Self& parent; diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index 3a8ec82382..447886d08f 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -250,6 +250,9 @@ public: void update_hashes_with_value(std::vector<SipHash>& hashes, const uint8_t* __restrict null_data) const override; + void update_crcs_with_value(std::vector<uint32_t>& hashes, PrimitiveType type, + const uint8_t* __restrict null_data) const override; + size_t byte_size() const override { return data.size() * sizeof(data[0]); } size_t allocated_bytes() const override { return data.allocated_bytes(); } diff --git a/be/src/vec/sink/vdata_stream_sender.cpp b/be/src/vec/sink/vdata_stream_sender.cpp index 679890a60c..03482ed3b6 100644 --- a/be/src/vec/sink/vdata_stream_sender.cpp +++ b/be/src/vec/sink/vdata_stream_sender.cpp @@ -536,24 +536,12 @@ Status VDataStreamSender::send(RuntimeState* state, Block* block) { // vectorized calculate hash val int rows = block->rows(); // for each row, we have a hash_val - std::vector<size_t> hash_vals(rows); + std::vector<uint32_t> hash_vals(rows); // result[j] means column index, i means rows index for (int j = 0; j < result_size; ++j) { - auto& column = block->get_by_position(result[j]).column; - for (int i = 0; i < rows; ++i) { - auto val = column->get_data_at(i); - if (val.data == nullptr) { - // nullptr is treat as 0 when hash - static const int INT_VALUE = 0; - static const TypeDescriptor INT_TYPE(TYPE_INT); - hash_vals[i] = RawValue::zlib_crc32(&INT_VALUE, INT_TYPE, hash_vals[i]); - } else { - hash_vals[i] = RawValue::zlib_crc32(val.data, val.size, - _partition_expr_ctxs[j]->root()->type(), - hash_vals[i]); - } - } + block->get_by_position(result[j]).column->update_crcs_with_value( + hash_vals, _partition_expr_ctxs[j]->root()->type().type); } Block::erase_useless_column(block, column_to_keep); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java index 91a17cd20c..2bdb3e43ac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java @@ -40,6 +40,7 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.sql.Timestamp; import java.time.LocalDateTime; import java.time.Year; @@ -495,14 +496,27 @@ public class DateLiteral extends LiteralExpr { // Date column and Datetime column's hash value is not same. @Override public ByteBuffer getHashValue(PrimitiveType type) { - // This hash value should be computed using new String since precision is introduced to datetime. - // But it is hard to keep compatibility. So I don't change this function here. - String value = convertToString(type); ByteBuffer buffer; - try { - buffer = ByteBuffer.wrap(value.getBytes("UTF-8")); - } catch (Exception e) { - throw new RuntimeException(e); + if (type == PrimitiveType.DATEV2) { + int value = (int) ((year << 9) | (month << 5) | day); + buffer = ByteBuffer.allocate(4); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putInt(value); + } else if (type == PrimitiveType.DATETIMEV2) { + long value = (year << 50) | (month << 46) | (day << 41) | (hour << 36) + | (minute << 30) | (second << 24) | microsecond; + buffer = ByteBuffer.allocate(8); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putLong(value); + } else { + // This hash value should be computed using new String since precision is introduced to datetime. + // But it is hard to keep compatibility. So I don't change this function here. + String value = convertToString(type); + try { + buffer = ByteBuffer.wrap(value.getBytes("UTF-8")); + } catch (Exception e) { + throw new RuntimeException(e); + } } return buffer; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DecimalLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DecimalLiteral.java index 1c013cf5f1..53ca2ba9ca 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DecimalLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DecimalLiteral.java @@ -159,9 +159,6 @@ public class DecimalLiteral extends LiteralExpr { buffer.putLong(value.longValue()); break; case DECIMALV2: - case DECIMAL32: - case DECIMAL64: - case DECIMAL128: buffer = ByteBuffer.allocate(12); buffer.order(ByteOrder.LITTLE_ENDIAN); @@ -170,6 +167,19 @@ public class DecimalLiteral extends LiteralExpr { buffer.putLong(integerValue); buffer.putInt(fracValue); break; + case DECIMAL32: + buffer = ByteBuffer.allocate(4); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putInt(value.unscaledValue().intValue()); + break; + case DECIMAL64: + buffer = ByteBuffer.allocate(8); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putLong(value.unscaledValue().longValue()); + break; + case DECIMAL128: + LargeIntLiteral tmp = new LargeIntLiteral(value.unscaledValue()); + return tmp.getHashValue(type); default: return super.getHashValue(type); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LargeIntLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LargeIntLiteral.java index 7cb87669a4..fde708b05b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LargeIntLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LargeIntLiteral.java @@ -62,6 +62,12 @@ public class LargeIntLiteral extends LiteralExpr { analysisDone(); } + public LargeIntLiteral(BigInteger v) { + super(); + type = Type.LARGEINT; + value = v; + } + public LargeIntLiteral(String value) throws AnalysisException { super(); BigInteger bigInt; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org