This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-1.1-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.1-lts by this push:
     new ff48ae51e1 [Bug](bitmap) intersect_count function use in string cause 
ASAN error (#11936) (#12980)
ff48ae51e1 is described below

commit ff48ae51e17e16cd0b79b2b5922e40a15c31e259
Author: HappenLee <happen...@hotmail.com>
AuthorDate: Tue Sep 27 08:41:54 2022 +0800

    [Bug](bitmap) intersect_count function use in string cause ASAN error 
(#11936) (#12980)
---
 be/src/util/bitmap_intersect.h                     | 111 +++++++++++++++++++++
 .../aggregate_function_orthogonal_bitmap.cpp       |   3 +-
 .../aggregate_function_orthogonal_bitmap.h         |  13 ++-
 3 files changed, 122 insertions(+), 5 deletions(-)

diff --git a/be/src/util/bitmap_intersect.h b/be/src/util/bitmap_intersect.h
index dcda6ae5a5..94ff8dc283 100644
--- a/be/src/util/bitmap_intersect.h
+++ b/be/src/util/bitmap_intersect.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 #pragma once
+#include <parallel_hashmap/phmap.h>
+
 #include "runtime/string_value.h"
 #include "udf/udf.h"
 #include "util/bitmap_value.h"
@@ -103,6 +105,15 @@ inline char* Helper::write_to<StringValue>(const 
StringValue& v, char* dest) {
     dest += v.len;
     return dest;
 }
+
+template <>
+inline char* Helper::write_to<std::string>(const std::string& v, char* dest) {
+    *(uint32_t*)dest = v.size();
+    dest += 4;
+    memcpy(dest, v.c_str(), v.size());
+    dest += v.size();
+    return dest;
+}
 // write_to end
 
 template <>
@@ -119,6 +130,11 @@ template <>
 inline int32_t Helper::serialize_size<StringValue>(const StringValue& v) {
     return v.len + 4;
 }
+
+template <>
+inline int32_t Helper::serialize_size<std::string>(const std::string& v) {
+    return v.size() + 4;
+}
 // serialize_size end
 
 template <>
@@ -147,6 +163,14 @@ inline void Helper::read_from<StringValue>(const char** 
src, StringValue* result
     *result = StringValue((char*)*src, length);
     *src += length;
 }
+
+template <>
+inline void Helper::read_from<std::string>(const char** src, std::string* 
result) {
+    int32_t length = *(int32_t*)(*src);
+    *src += 4;
+    *result = std::string((char*)*src, length);
+    *src += length;
+}
 // read_from end
 
 } // namespace detail
@@ -242,4 +266,91 @@ private:
     std::map<T, BitmapValue> _bitmaps;
 };
 
+template <>
+struct BitmapIntersect<std::string_view> {
+public:
+    BitmapIntersect() = default;
+
+    explicit BitmapIntersect(const char* src) { deserialize(src); }
+
+    void add_key(const std::string_view key) {
+        BitmapValue empty_bitmap;
+        _bitmaps[key] = empty_bitmap;
+    }
+
+    void update(const std::string_view& key, const BitmapValue& bitmap) {
+        if (_bitmaps.find(key) != _bitmaps.end()) {
+            _bitmaps[key] |= bitmap;
+        }
+    }
+
+    void merge(const BitmapIntersect& other) {
+        for (auto& kv : other._bitmaps) {
+            if (_bitmaps.find(kv.first) != _bitmaps.end()) {
+                _bitmaps[kv.first] |= kv.second;
+            } else {
+                _bitmaps[kv.first] = kv.second;
+            }
+        }
+    }
+
+    // intersection
+    BitmapValue intersect() const {
+        BitmapValue result;
+        auto it = _bitmaps.begin();
+        result |= it->second;
+        it++;
+        for (; it != _bitmaps.end(); it++) {
+            result &= it->second;
+        }
+        return result;
+    }
+
+    // calculate the intersection for _bitmaps's bitmap values
+    int64_t intersect_count() const {
+        if (_bitmaps.empty()) {
+            return 0;
+        }
+        return intersect().cardinality();
+    }
+
+    // the serialize size
+    size_t size() {
+        size_t size = 4;
+        for (auto& kv : _bitmaps) {
+            size += detail::Helper::serialize_size(kv.first);
+            size += kv.second.getSizeInBytes();
+        }
+        return size;
+    }
+
+    //must call size() first
+    void serialize(char* dest) {
+        char* writer = dest;
+        *(int32_t*)writer = _bitmaps.size();
+        writer += 4;
+        for (auto& kv : _bitmaps) {
+            writer = detail::Helper::write_to(kv.first, writer);
+            kv.second.write(writer);
+            writer += kv.second.getSizeInBytes();
+        }
+    }
+
+    void deserialize(const char* src) {
+        const char* reader = src;
+        int32_t bitmaps_size = *(int32_t*)reader;
+        reader += 4;
+        for (int32_t i = 0; i < bitmaps_size; i++) {
+            std::string key;
+            detail::Helper::read_from(&reader, &key);
+            BitmapValue bitmap(reader);
+            reader += bitmap.getSizeInBytes();
+            _bitmaps[key] = bitmap;
+        }
+    }
+
+private:
+    phmap::flat_hash_map<std::string, BitmapValue> _bitmaps;
+};
+
 } // namespace doris
diff --git 
a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp 
b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp
index 470a6c8388..b95608ebbc 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp
@@ -47,7 +47,8 @@ AggregateFunctionPtr 
create_aggregate_function_orthogonal(const std::string& nam
         if (res) {
             return res;
         } else if (which.is_string_or_fixed_string()) {
-            return 
std::make_shared<AggFunctionOrthBitmapFunc<Impl<StringValue>>>(argument_types);
+            return 
std::make_shared<AggFunctionOrthBitmapFunc<Impl<std::string_view>>>(
+                    argument_types);
         }
         LOG(WARNING) << "Incorrect Type " << argument_type.get_name()
                      << " of arguments for aggregate function " << name;
diff --git 
a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h 
b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h
index 4f1fb69ec4..0c8b8b9b20 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h
@@ -45,8 +45,11 @@ public:
 
         if constexpr (IsNumber<T>) {
             bitmap.update(data_col.get_element(row_num), bitmap_value);
-        } else {
-            bitmap.update(StringValue(data_col.get_data_at(row_num)), 
bitmap_value);
+        }
+        if constexpr (std::is_same_v<T, std::string_view>) {
+            // TODO: rethink here we really need to do a virtual function call
+            auto sr = data_col.get_data_at(row_num);
+            bitmap.update(std::string_view {sr.data, sr.size}, bitmap_value);
         }
     }
 
@@ -57,8 +60,10 @@ public:
                 const auto& col = static_cast<const 
ColVecData&>(*columns[idx]);
                 if constexpr (IsNumber<T>) {
                     bitmap.add_key(col.get_element(row_num));
-                } else {
-                    bitmap.add_key(StringValue(col.get_data_at(row_num)));
+                }
+                if constexpr (std::is_same_v<T, std::string_view>) {
+                    auto sr = col.get_data_at(row_num);
+                    bitmap.add_key(std::string_view {sr.data, sr.size});
                 }
             }
             first_init = false;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to