This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-1.1-lts in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.1-lts by this push: new ff48ae51e1 [Bug](bitmap) intersect_count function use in string cause ASAN error (#11936) (#12980) ff48ae51e1 is described below commit ff48ae51e17e16cd0b79b2b5922e40a15c31e259 Author: HappenLee <happen...@hotmail.com> AuthorDate: Tue Sep 27 08:41:54 2022 +0800 [Bug](bitmap) intersect_count function use in string cause ASAN error (#11936) (#12980) --- be/src/util/bitmap_intersect.h | 111 +++++++++++++++++++++ .../aggregate_function_orthogonal_bitmap.cpp | 3 +- .../aggregate_function_orthogonal_bitmap.h | 13 ++- 3 files changed, 122 insertions(+), 5 deletions(-) diff --git a/be/src/util/bitmap_intersect.h b/be/src/util/bitmap_intersect.h index dcda6ae5a5..94ff8dc283 100644 --- a/be/src/util/bitmap_intersect.h +++ b/be/src/util/bitmap_intersect.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. #pragma once +#include <parallel_hashmap/phmap.h> + #include "runtime/string_value.h" #include "udf/udf.h" #include "util/bitmap_value.h" @@ -103,6 +105,15 @@ inline char* Helper::write_to<StringValue>(const StringValue& v, char* dest) { dest += v.len; return dest; } + +template <> +inline char* Helper::write_to<std::string>(const std::string& v, char* dest) { + *(uint32_t*)dest = v.size(); + dest += 4; + memcpy(dest, v.c_str(), v.size()); + dest += v.size(); + return dest; +} // write_to end template <> @@ -119,6 +130,11 @@ template <> inline int32_t Helper::serialize_size<StringValue>(const StringValue& v) { return v.len + 4; } + +template <> +inline int32_t Helper::serialize_size<std::string>(const std::string& v) { + return v.size() + 4; +} // serialize_size end template <> @@ -147,6 +163,14 @@ inline void Helper::read_from<StringValue>(const char** src, StringValue* result *result = StringValue((char*)*src, length); *src += length; } + +template <> +inline void Helper::read_from<std::string>(const char** src, std::string* result) { + int32_t length = *(int32_t*)(*src); + *src += 4; + *result = std::string((char*)*src, length); + *src += length; +} // read_from end } // namespace detail @@ -242,4 +266,91 @@ private: std::map<T, BitmapValue> _bitmaps; }; +template <> +struct BitmapIntersect<std::string_view> { +public: + BitmapIntersect() = default; + + explicit BitmapIntersect(const char* src) { deserialize(src); } + + void add_key(const std::string_view key) { + BitmapValue empty_bitmap; + _bitmaps[key] = empty_bitmap; + } + + void update(const std::string_view& key, const BitmapValue& bitmap) { + if (_bitmaps.find(key) != _bitmaps.end()) { + _bitmaps[key] |= bitmap; + } + } + + void merge(const BitmapIntersect& other) { + for (auto& kv : other._bitmaps) { + if (_bitmaps.find(kv.first) != _bitmaps.end()) { + _bitmaps[kv.first] |= kv.second; + } else { + _bitmaps[kv.first] = kv.second; + } + } + } + + // intersection + BitmapValue intersect() const { + BitmapValue result; + auto it = _bitmaps.begin(); + result |= it->second; + it++; + for (; it != _bitmaps.end(); it++) { + result &= it->second; + } + return result; + } + + // calculate the intersection for _bitmaps's bitmap values + int64_t intersect_count() const { + if (_bitmaps.empty()) { + return 0; + } + return intersect().cardinality(); + } + + // the serialize size + size_t size() { + size_t size = 4; + for (auto& kv : _bitmaps) { + size += detail::Helper::serialize_size(kv.first); + size += kv.second.getSizeInBytes(); + } + return size; + } + + //must call size() first + void serialize(char* dest) { + char* writer = dest; + *(int32_t*)writer = _bitmaps.size(); + writer += 4; + for (auto& kv : _bitmaps) { + writer = detail::Helper::write_to(kv.first, writer); + kv.second.write(writer); + writer += kv.second.getSizeInBytes(); + } + } + + void deserialize(const char* src) { + const char* reader = src; + int32_t bitmaps_size = *(int32_t*)reader; + reader += 4; + for (int32_t i = 0; i < bitmaps_size; i++) { + std::string key; + detail::Helper::read_from(&reader, &key); + BitmapValue bitmap(reader); + reader += bitmap.getSizeInBytes(); + _bitmaps[key] = bitmap; + } + } + +private: + phmap::flat_hash_map<std::string, BitmapValue> _bitmaps; +}; + } // namespace doris diff --git a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp index 470a6c8388..b95608ebbc 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp @@ -47,7 +47,8 @@ AggregateFunctionPtr create_aggregate_function_orthogonal(const std::string& nam if (res) { return res; } else if (which.is_string_or_fixed_string()) { - return std::make_shared<AggFunctionOrthBitmapFunc<Impl<StringValue>>>(argument_types); + return std::make_shared<AggFunctionOrthBitmapFunc<Impl<std::string_view>>>( + argument_types); } LOG(WARNING) << "Incorrect Type " << argument_type.get_name() << " of arguments for aggregate function " << name; diff --git a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h index 4f1fb69ec4..0c8b8b9b20 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h +++ b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h @@ -45,8 +45,11 @@ public: if constexpr (IsNumber<T>) { bitmap.update(data_col.get_element(row_num), bitmap_value); - } else { - bitmap.update(StringValue(data_col.get_data_at(row_num)), bitmap_value); + } + if constexpr (std::is_same_v<T, std::string_view>) { + // TODO: rethink here we really need to do a virtual function call + auto sr = data_col.get_data_at(row_num); + bitmap.update(std::string_view {sr.data, sr.size}, bitmap_value); } } @@ -57,8 +60,10 @@ public: const auto& col = static_cast<const ColVecData&>(*columns[idx]); if constexpr (IsNumber<T>) { bitmap.add_key(col.get_element(row_num)); - } else { - bitmap.add_key(StringValue(col.get_data_at(row_num))); + } + if constexpr (std::is_same_v<T, std::string_view>) { + auto sr = col.get_data_at(row_num); + bitmap.add_key(std::string_view {sr.data, sr.size}); } } first_init = false; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org