This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 64870b3bbbb [Chore](hash-table) remove has_nullable_keys template argument of MethodKeysFixed (#43005) 64870b3bbbb is described below commit 64870b3bbbb245a5bf8c02d58cf8e9a8dd2532d8 Author: Pxl <pxl...@qq.com> AuthorDate: Fri Nov 1 18:04:20 2024 +0800 [Chore](hash-table) remove has_nullable_keys template argument of MethodKeysFixed (#43005) ## Proposed changes remove has_nullable_keys template argument of MethodKeysFixed --- be/src/pipeline/common/agg_utils.h | 54 +++++------- be/src/pipeline/common/distinct_agg_utils.h | 45 ++++------ be/src/pipeline/common/join_utils.h | 66 ++++++--------- be/src/pipeline/common/partition_sort_utils.h | 98 +++++++++------------- be/src/pipeline/common/set_utils.h | 26 +++--- .../exec/join/process_hash_table_probe_impl.h | 34 ++++---- be/src/vec/common/columns_hashing.h | 18 ++-- be/src/vec/common/columns_hashing_impl.h | 58 ------------- be/src/vec/common/hash_table/hash_map_context.h | 47 ++++++----- be/src/vec/common/hash_table/hash_map_util.h | 12 +-- 10 files changed, 164 insertions(+), 294 deletions(-) diff --git a/be/src/pipeline/common/agg_utils.h b/be/src/pipeline/common/agg_utils.h index a3cc175b1ed..135bc677123 100644 --- a/be/src/pipeline/common/agg_utils.h +++ b/be/src/pipeline/common/agg_utils.h @@ -80,23 +80,19 @@ using AggregatedMethodVariants = std::variant< vectorized::UInt256, AggDataNullable<vectorized::UInt256>>>, vectorized::MethodSingleNullableColumn< vectorized::MethodStringNoCache<AggregatedDataWithNullableShortStringKey>>, - vectorized::MethodKeysFixed<AggData<vectorized::UInt64>, false>, - vectorized::MethodKeysFixed<AggData<vectorized::UInt64>, true>, - vectorized::MethodKeysFixed<AggData<vectorized::UInt128>, false>, - vectorized::MethodKeysFixed<AggData<vectorized::UInt128>, true>, - vectorized::MethodKeysFixed<AggData<vectorized::UInt256>, false>, - vectorized::MethodKeysFixed<AggData<vectorized::UInt256>, true>, - vectorized::MethodKeysFixed<AggData<vectorized::UInt136>, false>, - vectorized::MethodKeysFixed<AggData<vectorized::UInt136>, true>>; + vectorized::MethodKeysFixed<AggData<vectorized::UInt64>>, + vectorized::MethodKeysFixed<AggData<vectorized::UInt128>>, + vectorized::MethodKeysFixed<AggData<vectorized::UInt256>>, + vectorized::MethodKeysFixed<AggData<vectorized::UInt136>>>; struct AggregatedDataVariants : public DataVariants<AggregatedMethodVariants, vectorized::MethodSingleNullableColumn, - vectorized::MethodOneNumber, vectorized::MethodKeysFixed, - vectorized::DataWithNullKey> { + vectorized::MethodOneNumber, vectorized::DataWithNullKey> { AggregatedDataWithoutKey without_key = nullptr; - template <bool nullable> void init(const std::vector<vectorized::DataTypePtr>& data_types, HashKeyType type) { + bool nullable = data_types.size() == 1 && data_types[0]->is_nullable(); + switch (type) { case HashKeyType::without_key: break; @@ -104,28 +100,28 @@ struct AggregatedDataVariants method_variant.emplace<vectorized::MethodSerialized<AggregatedDataWithStringKey>>(); break; case HashKeyType::int8_key: - emplace_single<vectorized::UInt8, AggData<vectorized::UInt8>, nullable>(); + emplace_single<vectorized::UInt8, AggData<vectorized::UInt8>>(nullable); break; case HashKeyType::int16_key: - emplace_single<vectorized::UInt16, AggData<vectorized::UInt16>, nullable>(); + emplace_single<vectorized::UInt16, AggData<vectorized::UInt16>>(nullable); break; case HashKeyType::int32_key: - emplace_single<vectorized::UInt32, AggData<vectorized::UInt32>, nullable>(); + emplace_single<vectorized::UInt32, AggData<vectorized::UInt32>>(nullable); break; case HashKeyType::int32_key_phase2: - emplace_single<vectorized::UInt32, AggregatedDataWithUInt32KeyPhase2, nullable>(); + emplace_single<vectorized::UInt32, AggregatedDataWithUInt32KeyPhase2>(nullable); break; case HashKeyType::int64_key: - emplace_single<vectorized::UInt64, AggData<vectorized::UInt64>, nullable>(); + emplace_single<vectorized::UInt64, AggData<vectorized::UInt64>>(nullable); break; case HashKeyType::int64_key_phase2: - emplace_single<vectorized::UInt64, AggregatedDataWithUInt64KeyPhase2, nullable>(); + emplace_single<vectorized::UInt64, AggregatedDataWithUInt64KeyPhase2>(nullable); break; case HashKeyType::int128_key: - emplace_single<vectorized::UInt128, AggData<vectorized::UInt128>, nullable>(); + emplace_single<vectorized::UInt128, AggData<vectorized::UInt128>>(nullable); break; case HashKeyType::int256_key: - emplace_single<vectorized::UInt256, AggData<vectorized::UInt256>, nullable>(); + emplace_single<vectorized::UInt256, AggData<vectorized::UInt256>>(nullable); break; case HashKeyType::string_key: if (nullable) { @@ -138,24 +134,20 @@ struct AggregatedDataVariants } break; case HashKeyType::fixed64: - method_variant - .emplace<vectorized::MethodKeysFixed<AggData<vectorized::UInt64>, nullable>>( - get_key_sizes(data_types)); + method_variant.emplace<vectorized::MethodKeysFixed<AggData<vectorized::UInt64>>>( + get_key_sizes(data_types)); break; case HashKeyType::fixed128: - method_variant - .emplace<vectorized::MethodKeysFixed<AggData<vectorized::UInt128>, nullable>>( - get_key_sizes(data_types)); + method_variant.emplace<vectorized::MethodKeysFixed<AggData<vectorized::UInt128>>>( + get_key_sizes(data_types)); break; case HashKeyType::fixed136: - method_variant - .emplace<vectorized::MethodKeysFixed<AggData<vectorized::UInt136>, nullable>>( - get_key_sizes(data_types)); + method_variant.emplace<vectorized::MethodKeysFixed<AggData<vectorized::UInt136>>>( + get_key_sizes(data_types)); break; case HashKeyType::fixed256: - method_variant - .emplace<vectorized::MethodKeysFixed<AggData<vectorized::UInt256>, nullable>>( - get_key_sizes(data_types)); + method_variant.emplace<vectorized::MethodKeysFixed<AggData<vectorized::UInt256>>>( + get_key_sizes(data_types)); break; default: throw Exception(ErrorCode::INTERNAL_ERROR, diff --git a/be/src/pipeline/common/distinct_agg_utils.h b/be/src/pipeline/common/distinct_agg_utils.h index c7ecbd2142c..806039d5a36 100644 --- a/be/src/pipeline/common/distinct_agg_utils.h +++ b/be/src/pipeline/common/distinct_agg_utils.h @@ -72,48 +72,43 @@ using DistinctMethodVariants = std::variant< vectorized::DataWithNullKey<DistinctData<vectorized::UInt256>>>>, vectorized::MethodSingleNullableColumn<vectorized::MethodStringNoCache< vectorized::DataWithNullKey<DistinctDataWithShortStringKey>>>, - vectorized::MethodKeysFixed<DistinctData<vectorized::UInt64>, false>, - vectorized::MethodKeysFixed<DistinctData<vectorized::UInt64>, true>, - vectorized::MethodKeysFixed<DistinctData<vectorized::UInt128>, false>, - vectorized::MethodKeysFixed<DistinctData<vectorized::UInt128>, true>, - vectorized::MethodKeysFixed<DistinctData<vectorized::UInt256>, false>, - vectorized::MethodKeysFixed<DistinctData<vectorized::UInt256>, true>, - vectorized::MethodKeysFixed<DistinctData<vectorized::UInt136>, false>, - vectorized::MethodKeysFixed<DistinctData<vectorized::UInt136>, true>>; + vectorized::MethodKeysFixed<DistinctData<vectorized::UInt64>>, + vectorized::MethodKeysFixed<DistinctData<vectorized::UInt128>>, + vectorized::MethodKeysFixed<DistinctData<vectorized::UInt256>>, + vectorized::MethodKeysFixed<DistinctData<vectorized::UInt136>>>; struct DistinctDataVariants : public DataVariants<DistinctMethodVariants, vectorized::MethodSingleNullableColumn, - vectorized::MethodOneNumber, vectorized::MethodKeysFixed, - vectorized::DataWithNullKey> { - template <bool nullable> + vectorized::MethodOneNumber, vectorized::DataWithNullKey> { void init(const std::vector<vectorized::DataTypePtr>& data_types, HashKeyType type) { + bool nullable = data_types.size() == 1 && data_types[0]->is_nullable(); switch (type) { case HashKeyType::serialized: method_variant.emplace<vectorized::MethodSerialized<DistinctDataWithStringKey>>(); break; case HashKeyType::int8_key: - emplace_single<vectorized::UInt8, DistinctData<vectorized::UInt8>, nullable>(); + emplace_single<vectorized::UInt8, DistinctData<vectorized::UInt8>>(nullable); break; case HashKeyType::int16_key: - emplace_single<vectorized::UInt16, DistinctData<vectorized::UInt16>, nullable>(); + emplace_single<vectorized::UInt16, DistinctData<vectorized::UInt16>>(nullable); break; case HashKeyType::int32_key: - emplace_single<vectorized::UInt32, DistinctData<vectorized::UInt32>, nullable>(); + emplace_single<vectorized::UInt32, DistinctData<vectorized::UInt32>>(nullable); break; case HashKeyType::int32_key_phase2: - emplace_single<vectorized::UInt32, DistinctDataPhase2<vectorized::UInt32>, nullable>(); + emplace_single<vectorized::UInt32, DistinctDataPhase2<vectorized::UInt32>>(nullable); break; case HashKeyType::int64_key: - emplace_single<vectorized::UInt64, DistinctData<vectorized::UInt64>, nullable>(); + emplace_single<vectorized::UInt64, DistinctData<vectorized::UInt64>>(nullable); break; case HashKeyType::int64_key_phase2: - emplace_single<vectorized::UInt64, DistinctDataPhase2<vectorized::UInt64>, nullable>(); + emplace_single<vectorized::UInt64, DistinctDataPhase2<vectorized::UInt64>>(nullable); break; case HashKeyType::int128_key: - emplace_single<vectorized::UInt128, DistinctData<vectorized::UInt128>, nullable>(); + emplace_single<vectorized::UInt128, DistinctData<vectorized::UInt128>>(nullable); break; case HashKeyType::int256_key: - emplace_single<vectorized::UInt256, DistinctData<vectorized::UInt256>, nullable>(); + emplace_single<vectorized::UInt256, DistinctData<vectorized::UInt256>>(nullable); break; case HashKeyType::string_key: if (nullable) { @@ -126,23 +121,19 @@ struct DistinctDataVariants } break; case HashKeyType::fixed64: - method_variant.emplace< - vectorized::MethodKeysFixed<DistinctData<vectorized::UInt64>, nullable>>( + method_variant.emplace<vectorized::MethodKeysFixed<DistinctData<vectorized::UInt64>>>( get_key_sizes(data_types)); break; case HashKeyType::fixed128: - method_variant.emplace< - vectorized::MethodKeysFixed<DistinctData<vectorized::UInt128>, nullable>>( + method_variant.emplace<vectorized::MethodKeysFixed<DistinctData<vectorized::UInt128>>>( get_key_sizes(data_types)); break; case HashKeyType::fixed136: - method_variant.emplace< - vectorized::MethodKeysFixed<DistinctData<vectorized::UInt136>, nullable>>( + method_variant.emplace<vectorized::MethodKeysFixed<DistinctData<vectorized::UInt136>>>( get_key_sizes(data_types)); break; case HashKeyType::fixed256: - method_variant.emplace< - vectorized::MethodKeysFixed<DistinctData<vectorized::UInt256>, nullable>>( + method_variant.emplace<vectorized::MethodKeysFixed<DistinctData<vectorized::UInt256>>>( get_key_sizes(data_types)); break; default: diff --git a/be/src/pipeline/common/join_utils.h b/be/src/pipeline/common/join_utils.h index 52c56abde1a..e214d1a5293 100644 --- a/be/src/pipeline/common/join_utils.h +++ b/be/src/pipeline/common/join_utils.h @@ -36,43 +36,29 @@ using JoinOpVariants = std::integral_constant<TJoinOp::type, TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN>, std::integral_constant<TJoinOp::type, TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN>>; -using SerializedHashTableContext = vectorized::MethodSerialized<JoinHashMap<StringRef>>; -using I8HashTableContext = vectorized::PrimaryTypeHashTableContext<vectorized::UInt8>; -using I16HashTableContext = vectorized::PrimaryTypeHashTableContext<vectorized::UInt16>; -using I32HashTableContext = vectorized::PrimaryTypeHashTableContext<vectorized::UInt32>; -using I64HashTableContext = vectorized::PrimaryTypeHashTableContext<vectorized::UInt64>; -using I128HashTableContext = vectorized::PrimaryTypeHashTableContext<vectorized::UInt128>; -using I256HashTableContext = vectorized::PrimaryTypeHashTableContext<vectorized::UInt256>; -using MethodOneString = vectorized::MethodStringNoCache<JoinHashMap<StringRef>>; -template <bool has_null> -using I64FixedKeyHashTableContext = - vectorized::FixedKeyHashTableContext<vectorized::UInt64, has_null>; - -template <bool has_null> -using I128FixedKeyHashTableContext = - vectorized::FixedKeyHashTableContext<vectorized::UInt128, has_null>; +template <class T> +using PrimaryTypeHashTableContext = vectorized::MethodOneNumber<T, JoinHashMap<T, HashCRC32<T>>>; -template <bool has_null> -using I256FixedKeyHashTableContext = - vectorized::FixedKeyHashTableContext<vectorized::UInt256, has_null>; +template <class Key> +using FixedKeyHashTableContext = vectorized::MethodKeysFixed<JoinHashMap<Key, HashCRC32<Key>>>; -template <bool has_null> -using I136FixedKeyHashTableContext = - vectorized::FixedKeyHashTableContext<vectorized::UInt136, has_null>; +using SerializedHashTableContext = vectorized::MethodSerialized<JoinHashMap<StringRef>>; +using MethodOneString = vectorized::MethodStringNoCache<JoinHashMap<StringRef>>; -using HashTableVariants = - std::variant<std::monostate, SerializedHashTableContext, I8HashTableContext, - I16HashTableContext, I32HashTableContext, I64HashTableContext, - I128HashTableContext, I256HashTableContext, I64FixedKeyHashTableContext<true>, - I64FixedKeyHashTableContext<false>, I128FixedKeyHashTableContext<true>, - I128FixedKeyHashTableContext<false>, I256FixedKeyHashTableContext<true>, - I256FixedKeyHashTableContext<false>, I136FixedKeyHashTableContext<true>, - I136FixedKeyHashTableContext<false>, MethodOneString>; +using HashTableVariants = std::variant< + std::monostate, SerializedHashTableContext, PrimaryTypeHashTableContext<vectorized::UInt8>, + PrimaryTypeHashTableContext<vectorized::UInt16>, + PrimaryTypeHashTableContext<vectorized::UInt32>, + PrimaryTypeHashTableContext<vectorized::UInt64>, + PrimaryTypeHashTableContext<vectorized::UInt128>, + PrimaryTypeHashTableContext<vectorized::UInt256>, + FixedKeyHashTableContext<vectorized::UInt64>, FixedKeyHashTableContext<vectorized::UInt128>, + FixedKeyHashTableContext<vectorized::UInt136>, + FixedKeyHashTableContext<vectorized::UInt256>, MethodOneString>; struct JoinDataVariants { HashTableVariants method_variant; - template <bool nullable> void init(const std::vector<vectorized::DataTypePtr>& data_types, HashKeyType type) { // todo: support single column nullable context switch (type) { @@ -80,40 +66,40 @@ struct JoinDataVariants { method_variant.emplace<SerializedHashTableContext>(); break; case HashKeyType::int8_key: - method_variant.emplace<I8HashTableContext>(); + method_variant.emplace<PrimaryTypeHashTableContext<vectorized::UInt8>>(); break; case HashKeyType::int16_key: - method_variant.emplace<I16HashTableContext>(); + method_variant.emplace<PrimaryTypeHashTableContext<vectorized::UInt16>>(); break; case HashKeyType::int32_key: - method_variant.emplace<I32HashTableContext>(); + method_variant.emplace<PrimaryTypeHashTableContext<vectorized::UInt32>>(); break; case HashKeyType::int64_key: - method_variant.emplace<I64HashTableContext>(); + method_variant.emplace<PrimaryTypeHashTableContext<vectorized::UInt64>>(); break; case HashKeyType::int128_key: - method_variant.emplace<I128HashTableContext>(); + method_variant.emplace<PrimaryTypeHashTableContext<vectorized::UInt128>>(); break; case HashKeyType::int256_key: - method_variant.emplace<I256HashTableContext>(); + method_variant.emplace<PrimaryTypeHashTableContext<vectorized::UInt256>>(); break; case HashKeyType::string_key: method_variant.emplace<MethodOneString>(); break; case HashKeyType::fixed64: - method_variant.emplace<I64FixedKeyHashTableContext<nullable>>( + method_variant.emplace<FixedKeyHashTableContext<vectorized::UInt64>>( get_key_sizes(data_types)); break; case HashKeyType::fixed128: - method_variant.emplace<I128FixedKeyHashTableContext<nullable>>( + method_variant.emplace<FixedKeyHashTableContext<vectorized::UInt128>>( get_key_sizes(data_types)); break; case HashKeyType::fixed136: - method_variant.emplace<I136FixedKeyHashTableContext<nullable>>( + method_variant.emplace<FixedKeyHashTableContext<vectorized::UInt136>>( get_key_sizes(data_types)); break; case HashKeyType::fixed256: - method_variant.emplace<I256FixedKeyHashTableContext<nullable>>( + method_variant.emplace<FixedKeyHashTableContext<vectorized::UInt256>>( get_key_sizes(data_types)); break; default: diff --git a/be/src/pipeline/common/partition_sort_utils.h b/be/src/pipeline/common/partition_sort_utils.h index 38bc8744dc1..9317a783ba6 100644 --- a/be/src/pipeline/common/partition_sort_utils.h +++ b/be/src/pipeline/common/partition_sort_utils.h @@ -123,57 +123,41 @@ public: using PartitionDataPtr = PartitionBlocks*; using PartitionDataWithStringKey = PHHashMap<StringRef, PartitionDataPtr>; using PartitionDataWithShortStringKey = StringHashMap<PartitionDataPtr>; -using PartitionDataWithUInt8Key = PHHashMap<vectorized::UInt8, PartitionDataPtr>; -using PartitionDataWithUInt16Key = PHHashMap<vectorized::UInt16, PartitionDataPtr>; -using PartitionDataWithUInt32Key = - PHHashMap<vectorized::UInt32, PartitionDataPtr, HashCRC32<vectorized::UInt32>>; -using PartitionDataWithUInt64Key = - PHHashMap<vectorized::UInt64, PartitionDataPtr, HashCRC32<vectorized::UInt64>>; -using PartitionDataWithUInt128Key = - PHHashMap<vectorized::UInt128, PartitionDataPtr, HashCRC32<vectorized::UInt128>>; -using PartitionDataWithUInt256Key = - PHHashMap<vectorized::UInt256, PartitionDataPtr, HashCRC32<vectorized::UInt256>>; -using PartitionDataWithUInt136Key = - PHHashMap<vectorized::UInt136, PartitionDataPtr, HashCRC32<vectorized::UInt136>>; + +template <typename T> +using PartitionData = PHHashMap<T, PartitionDataPtr, HashCRC32<T>>; + +template <typename T> +using PartitionDataSingle = vectorized::MethodOneNumber<T, PartitionData<T>>; + +template <typename T> +using PartitionDataSingleNullable = vectorized::MethodSingleNullableColumn< + vectorized::MethodOneNumber<T, vectorized::DataWithNullKey<PartitionData<T>>>>; using PartitionedMethodVariants = std::variant< std::monostate, vectorized::MethodSerialized<PartitionDataWithStringKey>, - vectorized::MethodOneNumber<vectorized::UInt8, PartitionDataWithUInt8Key>, - vectorized::MethodOneNumber<vectorized::UInt16, PartitionDataWithUInt16Key>, - vectorized::MethodOneNumber<vectorized::UInt32, PartitionDataWithUInt32Key>, - vectorized::MethodOneNumber<vectorized::UInt64, PartitionDataWithUInt64Key>, - vectorized::MethodOneNumber<vectorized::UInt128, PartitionDataWithUInt128Key>, - vectorized::MethodOneNumber<vectorized::UInt256, PartitionDataWithUInt256Key>, - vectorized::MethodSingleNullableColumn<vectorized::MethodOneNumber< - vectorized::UInt8, vectorized::DataWithNullKey<PartitionDataWithUInt8Key>>>, - vectorized::MethodSingleNullableColumn<vectorized::MethodOneNumber< - vectorized::UInt16, vectorized::DataWithNullKey<PartitionDataWithUInt16Key>>>, - vectorized::MethodSingleNullableColumn<vectorized::MethodOneNumber< - vectorized::UInt32, vectorized::DataWithNullKey<PartitionDataWithUInt32Key>>>, - vectorized::MethodSingleNullableColumn<vectorized::MethodOneNumber< - vectorized::UInt64, vectorized::DataWithNullKey<PartitionDataWithUInt64Key>>>, - vectorized::MethodSingleNullableColumn<vectorized::MethodOneNumber< - vectorized::UInt128, vectorized::DataWithNullKey<PartitionDataWithUInt128Key>>>, - vectorized::MethodSingleNullableColumn<vectorized::MethodOneNumber< - vectorized::UInt256, vectorized::DataWithNullKey<PartitionDataWithUInt256Key>>>, - vectorized::MethodKeysFixed<PartitionDataWithUInt64Key, false>, - vectorized::MethodKeysFixed<PartitionDataWithUInt64Key, true>, - vectorized::MethodKeysFixed<PartitionDataWithUInt128Key, false>, - vectorized::MethodKeysFixed<PartitionDataWithUInt128Key, true>, - vectorized::MethodKeysFixed<PartitionDataWithUInt256Key, false>, - vectorized::MethodKeysFixed<PartitionDataWithUInt256Key, true>, - vectorized::MethodKeysFixed<PartitionDataWithUInt136Key, false>, - vectorized::MethodKeysFixed<PartitionDataWithUInt136Key, true>, + PartitionDataSingle<vectorized::UInt8>, PartitionDataSingle<vectorized::UInt16>, + PartitionDataSingle<vectorized::UInt32>, PartitionDataSingle<vectorized::UInt64>, + PartitionDataSingle<vectorized::UInt128>, PartitionDataSingle<vectorized::UInt256>, + PartitionDataSingleNullable<vectorized::UInt8>, + PartitionDataSingleNullable<vectorized::UInt16>, + PartitionDataSingleNullable<vectorized::UInt32>, + PartitionDataSingleNullable<vectorized::UInt64>, + PartitionDataSingleNullable<vectorized::UInt128>, + PartitionDataSingleNullable<vectorized::UInt256>, + vectorized::MethodKeysFixed<PartitionData<vectorized::UInt64>>, + vectorized::MethodKeysFixed<PartitionData<vectorized::UInt128>>, + vectorized::MethodKeysFixed<PartitionData<vectorized::UInt256>>, + vectorized::MethodKeysFixed<PartitionData<vectorized::UInt136>>, vectorized::MethodStringNoCache<PartitionDataWithShortStringKey>, vectorized::MethodSingleNullableColumn<vectorized::MethodStringNoCache< vectorized::DataWithNullKey<PartitionDataWithShortStringKey>>>>; struct PartitionedHashMapVariants : public DataVariants<PartitionedMethodVariants, vectorized::MethodSingleNullableColumn, - vectorized::MethodOneNumber, vectorized::MethodKeysFixed, - vectorized::DataWithNullKey> { - template <bool nullable> + vectorized::MethodOneNumber, vectorized::DataWithNullKey> { void init(const std::vector<vectorized::DataTypePtr>& data_types, HashKeyType type) { + bool nullable = data_types.size() == 1 && data_types[0]->is_nullable(); switch (type) { case HashKeyType::without_key: { break; @@ -183,27 +167,27 @@ struct PartitionedHashMapVariants break; } case HashKeyType::int8_key: { - emplace_single<vectorized::UInt8, PartitionDataWithUInt8Key, nullable>(); + emplace_single<vectorized::UInt8, PartitionData<vectorized::UInt8>>(nullable); break; } case HashKeyType::int16_key: { - emplace_single<vectorized::UInt16, PartitionDataWithUInt16Key, nullable>(); + emplace_single<vectorized::UInt16, PartitionData<vectorized::UInt16>>(nullable); break; } case HashKeyType::int32_key: { - emplace_single<vectorized::UInt32, PartitionDataWithUInt32Key, nullable>(); + emplace_single<vectorized::UInt32, PartitionData<vectorized::UInt32>>(nullable); break; } case HashKeyType::int64_key: { - emplace_single<vectorized::UInt64, PartitionDataWithUInt64Key, nullable>(); + emplace_single<vectorized::UInt64, PartitionData<vectorized::UInt64>>(nullable); break; } case HashKeyType::int128_key: { - emplace_single<vectorized::UInt128, PartitionDataWithUInt128Key, nullable>(); + emplace_single<vectorized::UInt128, PartitionData<vectorized::UInt128>>(nullable); break; } case HashKeyType::int256_key: { - emplace_single<vectorized::UInt256, PartitionDataWithUInt256Key, nullable>(); + emplace_single<vectorized::UInt256, PartitionData<vectorized::UInt256>>(nullable); break; } case HashKeyType::string_key: { @@ -218,24 +202,20 @@ struct PartitionedHashMapVariants break; } case HashKeyType::fixed64: - method_variant - .emplace<vectorized::MethodKeysFixed<PartitionDataWithUInt64Key, nullable>>( - get_key_sizes(data_types)); + method_variant.emplace<vectorized::MethodKeysFixed<PartitionData<vectorized::UInt64>>>( + get_key_sizes(data_types)); break; case HashKeyType::fixed128: - method_variant - .emplace<vectorized::MethodKeysFixed<PartitionDataWithUInt128Key, nullable>>( - get_key_sizes(data_types)); + method_variant.emplace<vectorized::MethodKeysFixed<PartitionData<vectorized::UInt128>>>( + get_key_sizes(data_types)); break; case HashKeyType::fixed136: - method_variant - .emplace<vectorized::MethodKeysFixed<PartitionDataWithUInt136Key, nullable>>( - get_key_sizes(data_types)); + method_variant.emplace<vectorized::MethodKeysFixed<PartitionData<vectorized::UInt136>>>( + get_key_sizes(data_types)); break; case HashKeyType::fixed256: - method_variant - .emplace<vectorized::MethodKeysFixed<PartitionDataWithUInt256Key, nullable>>( - get_key_sizes(data_types)); + method_variant.emplace<vectorized::MethodKeysFixed<PartitionData<vectorized::UInt256>>>( + get_key_sizes(data_types)); break; default: throw Exception(ErrorCode::INTERNAL_ERROR, diff --git a/be/src/pipeline/common/set_utils.h b/be/src/pipeline/common/set_utils.h index 014546be124..9b1a2579cf9 100644 --- a/be/src/pipeline/common/set_utils.h +++ b/be/src/pipeline/common/set_utils.h @@ -25,10 +25,9 @@ namespace doris { -template <class Key, bool has_null> +template <class Key> using SetFixedKeyHashTableContext = - vectorized::MethodKeysFixed<HashMap<Key, pipeline::RowRefListWithFlags, HashCRC32<Key>>, - has_null>; + vectorized::MethodKeysFixed<HashMap<Key, pipeline::RowRefListWithFlags, HashCRC32<Key>>>; template <class T> using SetPrimaryTypeHashTableContext = @@ -47,19 +46,14 @@ using SetHashTableVariants = SetPrimaryTypeHashTableContext<vectorized::UInt64>, SetPrimaryTypeHashTableContext<vectorized::UInt128>, SetPrimaryTypeHashTableContext<vectorized::UInt256>, - SetFixedKeyHashTableContext<vectorized::UInt64, true>, - SetFixedKeyHashTableContext<vectorized::UInt64, false>, - SetFixedKeyHashTableContext<vectorized::UInt128, true>, - SetFixedKeyHashTableContext<vectorized::UInt128, false>, - SetFixedKeyHashTableContext<vectorized::UInt256, true>, - SetFixedKeyHashTableContext<vectorized::UInt256, false>, - SetFixedKeyHashTableContext<vectorized::UInt136, true>, - SetFixedKeyHashTableContext<vectorized::UInt136, false>>; + SetFixedKeyHashTableContext<vectorized::UInt64>, + SetFixedKeyHashTableContext<vectorized::UInt128>, + SetFixedKeyHashTableContext<vectorized::UInt256>, + SetFixedKeyHashTableContext<vectorized::UInt136>>; struct SetDataVariants { SetHashTableVariants method_variant; - template <bool nullable> void init(const std::vector<vectorized::DataTypePtr>& data_types, HashKeyType type) { switch (type) { case HashKeyType::serialized: @@ -87,19 +81,19 @@ struct SetDataVariants { method_variant.emplace<SetMethodOneString>(); break; case HashKeyType::fixed64: - method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt64, nullable>>( + method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt64>>( get_key_sizes(data_types)); break; case HashKeyType::fixed128: - method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt128, nullable>>( + method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt128>>( get_key_sizes(data_types)); break; case HashKeyType::fixed136: - method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt136, nullable>>( + method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt136>>( get_key_sizes(data_types)); break; case HashKeyType::fixed256: - method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt256, nullable>>( + method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt256>>( get_key_sizes(data_types)); break; default: diff --git a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h index 231c231c813..05cd3d7d9e0 100644 --- a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h +++ b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h @@ -714,24 +714,20 @@ struct ExtractType<T(U)> { ExtractType<void(T)>::Type & hash_table_ctx, vectorized::MutableBlock & mutable_block, \ vectorized::Block * output_block, bool* eos, bool is_mark_join); -#define INSTANTIATION_FOR(JoinOpType) \ - template struct ProcessHashTableProbe<JoinOpType>; \ - \ - INSTANTIATION(JoinOpType, (SerializedHashTableContext)); \ - INSTANTIATION(JoinOpType, (I8HashTableContext)); \ - INSTANTIATION(JoinOpType, (I16HashTableContext)); \ - INSTANTIATION(JoinOpType, (I32HashTableContext)); \ - INSTANTIATION(JoinOpType, (I64HashTableContext)); \ - INSTANTIATION(JoinOpType, (I128HashTableContext)); \ - INSTANTIATION(JoinOpType, (I256HashTableContext)); \ - INSTANTIATION(JoinOpType, (I64FixedKeyHashTableContext<true>)); \ - INSTANTIATION(JoinOpType, (I64FixedKeyHashTableContext<false>)); \ - INSTANTIATION(JoinOpType, (I128FixedKeyHashTableContext<true>)); \ - INSTANTIATION(JoinOpType, (I128FixedKeyHashTableContext<false>)); \ - INSTANTIATION(JoinOpType, (I256FixedKeyHashTableContext<true>)); \ - INSTANTIATION(JoinOpType, (I256FixedKeyHashTableContext<false>)); \ - INSTANTIATION(JoinOpType, (I136FixedKeyHashTableContext<true>)); \ - INSTANTIATION(JoinOpType, (MethodOneString)); \ - INSTANTIATION(JoinOpType, (I136FixedKeyHashTableContext<false>)); +#define INSTANTIATION_FOR(JoinOpType) \ + template struct ProcessHashTableProbe<JoinOpType>; \ + \ + INSTANTIATION(JoinOpType, (SerializedHashTableContext)); \ + INSTANTIATION(JoinOpType, (PrimaryTypeHashTableContext<vectorized::UInt8>)); \ + INSTANTIATION(JoinOpType, (PrimaryTypeHashTableContext<vectorized::UInt16>)); \ + INSTANTIATION(JoinOpType, (PrimaryTypeHashTableContext<vectorized::UInt32>)); \ + INSTANTIATION(JoinOpType, (PrimaryTypeHashTableContext<vectorized::UInt64>)); \ + INSTANTIATION(JoinOpType, (PrimaryTypeHashTableContext<vectorized::UInt128>)); \ + INSTANTIATION(JoinOpType, (PrimaryTypeHashTableContext<vectorized::UInt256>)); \ + INSTANTIATION(JoinOpType, (FixedKeyHashTableContext<vectorized::UInt64>)); \ + INSTANTIATION(JoinOpType, (FixedKeyHashTableContext<vectorized::UInt128>)); \ + INSTANTIATION(JoinOpType, (FixedKeyHashTableContext<vectorized::UInt136>)); \ + INSTANTIATION(JoinOpType, (FixedKeyHashTableContext<vectorized::UInt256>)); \ + INSTANTIATION(JoinOpType, (MethodOneString)); #include "common/compile_check_end.h" } // namespace doris::pipeline diff --git a/be/src/vec/common/columns_hashing.h b/be/src/vec/common/columns_hashing.h index 4bdbf51444f..6a59c5964e4 100644 --- a/be/src/vec/common/columns_hashing.h +++ b/be/src/vec/common/columns_hashing.h @@ -38,11 +38,6 @@ namespace doris::vectorized { using Sizes = std::vector<size_t>; -inline bool has_nullable_key(const std::vector<DataTypePtr>& data_types) { - return std::ranges::any_of(data_types.begin(), data_types.end(), - [](auto t) { return t->is_nullable(); }); -} - inline Sizes get_key_sizes(const std::vector<DataTypePtr>& data_types) { Sizes key_sizes; for (const auto& data_type : data_types) { @@ -101,17 +96,14 @@ protected: }; /// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits. -template <typename Value, typename Key, typename Mapped, bool has_nullable_keys = false> +template <typename Value, typename Key, typename Mapped> struct HashMethodKeysFixed - : private columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys>, - public columns_hashing_impl::HashMethodBase< - HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys>, Value, Mapped, - false> { - using Self = HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys>; + : public columns_hashing_impl::HashMethodBase<HashMethodKeysFixed<Value, Key, Mapped>, + Value, Mapped, false> { + using Self = HashMethodKeysFixed<Value, Key, Mapped>; using BaseHashed = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>; - using Base = columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys>; - HashMethodKeysFixed(const ColumnRawPtrs& key_columns) : Base(key_columns) {} + HashMethodKeysFixed(const ColumnRawPtrs& key_columns) {} }; template <typename SingleColumnMethod, typename Mapped> diff --git a/be/src/vec/common/columns_hashing_impl.h b/be/src/vec/common/columns_hashing_impl.h index 2665d9b7979..a11ec17ec70 100644 --- a/be/src/vec/common/columns_hashing_impl.h +++ b/be/src/vec/common/columns_hashing_impl.h @@ -149,64 +149,6 @@ protected: } }; -template <typename T> -struct MappedCache : public PaddedPODArray<T> {}; - -template <> -struct MappedCache<void> {}; - -/// This class is designed to provide the functionality that is required for -/// supporting nullable keys in HashMethodKeysFixed. If there are -/// no nullable keys, this class is merely implemented as an empty shell. -template <typename Key, bool has_nullable_keys> -class BaseStateKeysFixed; - -/// Case where nullable keys are supported. -template <typename Key> -class BaseStateKeysFixed<Key, true> { -protected: - BaseStateKeysFixed(const ColumnRawPtrs& key_columns) { - null_maps.reserve(key_columns.size()); - actual_columns.reserve(key_columns.size()); - - for (const auto& col : key_columns) { - if (auto* nullable_col = check_and_get_column<ColumnNullable>(col)) { - actual_columns.push_back(&nullable_col->get_nested_column()); - null_maps.push_back(&nullable_col->get_null_map_column()); - } else { - actual_columns.push_back(col); - null_maps.push_back(nullptr); - } - } - } - - /// Return the columns which actually contain the values of the keys. - /// For a given key column, if it is nullable, we return its nested - /// column. Otherwise we return the key column itself. - const ColumnRawPtrs& get_actual_columns() const { return actual_columns; } - - const ColumnRawPtrs& get_nullmap_columns() const { return null_maps; } - -private: - ColumnRawPtrs actual_columns; - ColumnRawPtrs null_maps; -}; - -/// Case where nullable keys are not supported. -template <typename Key> -class BaseStateKeysFixed<Key, false> { -protected: - BaseStateKeysFixed(const ColumnRawPtrs& columns) : actual_columns(columns) {} - - const ColumnRawPtrs& get_actual_columns() const { return actual_columns; } - - const ColumnRawPtrs& get_nullmap_columns() const { return null_maps; } - -private: - ColumnRawPtrs actual_columns; - ColumnRawPtrs null_maps; -}; - } // namespace columns_hashing_impl } // namespace ColumnsHashing diff --git a/be/src/vec/common/hash_table/hash_map_context.h b/be/src/vec/common/hash_table/hash_map_context.h index 973f04f064f..5354155c529 100644 --- a/be/src/vec/common/hash_table/hash_map_context.h +++ b/be/src/vec/common/hash_table/hash_map_context.h @@ -375,7 +375,7 @@ struct MethodOneNumber : public MethodBase<TData> { } }; -template <typename TData, bool has_nullable_keys = false> +template <typename TData> struct MethodKeysFixed : public MethodBase<TData> { using Base = MethodBase<TData>; using typename Base::Key; @@ -384,8 +384,7 @@ struct MethodKeysFixed : public MethodBase<TData> { using Base::hash_table; using Base::iterator; - using State = ColumnsHashing::HashMethodKeysFixed<typename Base::Value, Key, Mapped, - has_nullable_keys>; + using State = ColumnsHashing::HashMethodKeysFixed<typename Base::Value, Key, Mapped>; // need keep until the hash probe end. use only in join std::vector<Key> build_stored_keys; @@ -469,20 +468,22 @@ struct MethodKeysFixed : public MethodBase<TData> { bool is_build = false, uint32_t bucket_size = 0) override { ColumnRawPtrs actual_columns; ColumnRawPtrs null_maps; - if (has_nullable_keys) { - actual_columns.reserve(key_columns.size()); - null_maps.reserve(key_columns.size()); - for (const auto& col : key_columns) { - if (const auto* nullable_col = check_and_get_column<ColumnNullable>(col)) { - actual_columns.push_back(&nullable_col->get_nested_column()); - null_maps.push_back(&nullable_col->get_null_map_column()); - } else { - actual_columns.push_back(col); - null_maps.push_back(nullptr); - } + actual_columns.reserve(key_columns.size()); + null_maps.reserve(key_columns.size()); + bool has_nullable_key = false; + + for (const auto& col : key_columns) { + if (const auto* nullable_col = check_and_get_column<ColumnNullable>(col)) { + actual_columns.push_back(&nullable_col->get_nested_column()); + null_maps.push_back(&nullable_col->get_null_map_column()); + has_nullable_key = true; + } else { + actual_columns.push_back(col); + null_maps.push_back(nullptr); } - } else { - actual_columns = key_columns; + } + if (!has_nullable_key) { + null_maps.clear(); } if (is_build) { @@ -503,7 +504,13 @@ struct MethodKeysFixed : public MethodBase<TData> { void insert_keys_into_columns(std::vector<typename Base::Key>& input_keys, MutableColumns& key_columns, const size_t num_rows) override { // In any hash key value, column values to be read start just after the bitmap, if it exists. - size_t pos = has_nullable_keys ? get_bitmap_size(key_columns.size()) : 0; + size_t pos = 0; + for (size_t i = 0; i < key_columns.size(); ++i) { + if (key_columns[i]->is_nullable()) { + pos = get_bitmap_size(key_columns.size()); + break; + } + } for (size_t i = 0; i < key_columns.size(); ++i) { size_t size = key_sizes[i]; @@ -607,10 +614,4 @@ struct MethodSingleNullableColumn : public SingleColumnMethod { } }; -template <class T> -using PrimaryTypeHashTableContext = MethodOneNumber<T, JoinHashMap<T, HashCRC32<T>>>; - -template <class Key, bool has_null> -using FixedKeyHashTableContext = MethodKeysFixed<JoinHashMap<Key, HashCRC32<Key>>, has_null>; - } // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/common/hash_table/hash_map_util.h b/be/src/vec/common/hash_table/hash_map_util.h index 292e6307851..d949fafecf9 100644 --- a/be/src/vec/common/hash_table/hash_map_util.h +++ b/be/src/vec/common/hash_table/hash_map_util.h @@ -36,11 +36,7 @@ Status init_hash_method(DataVariants* data, const std::vector<vectorized::DataTy auto type = HashKeyType::EMPTY; try { type = get_hash_key_type_with_phase(get_hash_key_type(data_types), !is_first_phase); - if (has_nullable_key(data_types)) { - data->template init<true>(data_types, type); - } else { - data->template init<false>(data_types, type); - } + data->init(data_types, type); } catch (const Exception& e) { // method_variant may meet valueless_by_exception, so we set it to monostate data->method_variant.template emplace<std::monostate>(); @@ -58,15 +54,15 @@ Status init_hash_method(DataVariants* data, const std::vector<vectorized::DataTy template <typename MethodVariants, template <typename> typename MethodNullable, template <typename, typename> typename MethodOneNumber, - template <typename, bool> typename MethodFixed, template <typename> typename DataNullable> + template <typename> typename DataNullable> struct DataVariants { DataVariants() = default; DataVariants(const DataVariants&) = delete; DataVariants& operator=(const DataVariants&) = delete; MethodVariants method_variant; - template <typename T, typename TT, bool nullable> - void emplace_single() { + template <typename T, typename TT> + void emplace_single(bool nullable) { if (nullable) { method_variant.template emplace<MethodNullable<MethodOneNumber<T, DataNullable<TT>>>>(); } else { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org