HappenLee commented on a change in pull request #7828:
URL: https://github.com/apache/incubator-doris/pull/7828#discussion_r801252518



##########
File path: be/src/olap/hll.h
##########
@@ -181,7 +303,7 @@ class HyperLogLog {
     uint8_t* _registers = nullptr;
 
 private:
-    DISALLOW_COPY_AND_ASSIGN(HyperLogLog);
+    //DISALLOW_COPY_AND_ASSIGN(HyperLogLog);

Review comment:
       delete the code

##########
File path: be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.h
##########
@@ -116,12 +114,11 @@ class AggregateFunctionHLLUnion final : public 
AggregateFunctionHLLUnionAgg {
     AggregateFunctionHLLUnion(const IDataType& data_type, const DataTypes& 
argument_types_)
             : AggregateFunctionHLLUnionAgg(data_type, argument_types_) {}
 
-    DataTypePtr get_return_type() const override { return 
std::make_shared<DataTypeString>(); }
+    DataTypePtr get_return_type() const override { return 
std::make_shared<DataTypeHLL>(); } 
 
     void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& 
to) const override {
-        auto& column = static_cast<ColumnString&>(to);
-        auto result = this->data(place).get();
-        column.insert_data(result.c_str(), result.length());
+        auto& column = static_cast<ColumnHLL&>(to);
+        column.get_data().push_back(this->data(place).get());

Review comment:
       emplace_back to reduce unless copy

##########
File path: be/src/vec/functions/hll_empty.cpp
##########
@@ -20,19 +20,21 @@
 #include "vec/data_types/data_type_string.h"
 #include "vec/functions/function_const.h"
 #include "vec/functions/simple_function_factory.h"
-
+#include "vec/data_types/data_type_hll.h"
+#include "vec/columns/column_complex.h"
 namespace doris::vectorized {
 
 struct HLLEmptyImpl {
     static constexpr auto name = "hll_empty";
-    static auto get_return_type() { return std::make_shared<DataTypeString>(); 
}
-    static Field init_value() {
+    using ReturnColVec = ColumnHLL;
+    static auto get_return_type() { return std::make_shared<DataTypeHLL>(); }
+    static HyperLogLog init_value() {
         auto hll = HyperLogLog::empty();
-        return {hll.c_str(), hll.size()};
+        return HyperLogLog(hll);

Review comment:
       directly construct the class

##########
File path: be/src/olap/hll.h
##########
@@ -88,6 +88,128 @@ class HyperLogLog {
         _explicit_data_num = 1;
     }
 
+    HyperLogLog(const HyperLogLog& other) {
+        this->_type = other._type;
+        switch (other._type) {
+            case HLL_DATA_EMPTY:
+                break;
+            case HLL_DATA_EXPLICIT: {
+                this->_explicit_data_num = other._explicit_data_num;
+                _explicit_data = new uint64_t[HLL_EXPLICIT_INT64_NUM_DOUBLE];
+                memcpy(_explicit_data, other._explicit_data,
+                    sizeof(*_explicit_data) * _explicit_data_num);
+                break;
+            }
+            case HLL_DATA_SPARSE:
+            case HLL_DATA_FULL: {
+                _registers = new uint8_t[HLL_REGISTERS_COUNT];
+                memcpy(_registers, other._registers, HLL_REGISTERS_COUNT);
+                break;
+            default:
+                break;
+            }
+        }
+    }
+
+    HyperLogLog(HyperLogLog&& other) {
+        this->_type = other._type;
+        switch (other._type) {
+            case HLL_DATA_EMPTY:
+                break;
+            case HLL_DATA_EXPLICIT: {
+                this->_explicit_data_num = other._explicit_data_num;
+                this->_explicit_data = other._explicit_data;
+                other._explicit_data_num = 0;
+                other._explicit_data = nullptr;
+                other._type = HLL_DATA_EMPTY;
+                break;
+            }
+            case HLL_DATA_SPARSE:
+            case HLL_DATA_FULL: {
+                this->_registers = other._registers;
+                other._registers = nullptr;
+                other._type = HLL_DATA_EMPTY;
+                break;
+            default:
+                break;
+            }
+        }
+    }
+
+    HyperLogLog& operator=(HyperLogLog&& other) {
+        if (this != &other) {
+            if(_registers) {

Review comment:
       if (

##########
File path: be/src/vec/functions/hll_hash.cpp
##########
@@ -20,73 +20,47 @@
 #include "udf/udf.h"
 #include "vec/functions/function_always_not_nullable.h"
 #include "vec/functions/simple_function_factory.h"
+#include "vec/data_types/data_type_hll.h"
 
 namespace doris::vectorized {
 
 struct HLLHash {
     static constexpr auto name = "hll_hash";
 
-    using ReturnType = DataTypeString;
+    using ReturnType = DataTypeHLL;
 
     static void vector(const ColumnString::Chars& data, const 
ColumnString::Offsets& offsets,
                        MutableColumnPtr& col_res) {
-        ColumnString::Chars& res_data = 
reinterpret_cast<ColumnString*>(col_res.get())->get_chars();
-        ColumnString::Offsets& res_offsets =
-                reinterpret_cast<ColumnString*>(col_res.get())->get_offsets();
-
+        auto* res_column = reinterpret_cast<ColumnHLL*>(col_res.get());
+        auto& res_data = res_column->get_data();
         size_t size = offsets.size();
-        res_offsets.resize(size);
-        res_data.reserve(data.size());
-
-        size_t prev_offset = 0;
-        size_t res_offset = 0;
 
         for (size_t i = 0; i < size; ++i) {
-            auto hash_string = HllFunctions::hll_hash(
-                    StringVal((uint8_t*)(&data[prev_offset]), offsets[i] - 
prev_offset - 1));
-
-            res_data.resize(res_data.size() + hash_string.length() + 1);
-            memcpy_small_allow_read_write_overflow15(&res_data[res_offset], 
hash_string.c_str(),
-                                                     hash_string.length());
-            res_offset += hash_string.length() + 1;
-            res_data[res_offset - 1] = 0;
-
-            res_offsets[i] = res_offset;
-            prev_offset = offsets[i];
+            const char* raw_str = reinterpret_cast<const 
char*>(&data[offsets[i - 1]]);
+            size_t str_size = offsets[i] - offsets[i - 1] - 1;
+            uint64_t hash_value = HashUtil::murmur_hash64A(raw_str, str_size, 
HashUtil::MURMUR_SEED);
+            res_data[i].update(hash_value);
         }
     }
 
     static void vector_nullable(const ColumnString::Chars& data,
                                 const ColumnString::Offsets& offsets, const 
NullMap& nullmap,
                                 MutableColumnPtr& col_res) {
-        ColumnString::Chars& res_data = 
reinterpret_cast<ColumnString*>(col_res.get())->get_chars();
-        ColumnString::Offsets& res_offsets =
-                reinterpret_cast<ColumnString*>(col_res.get())->get_offsets();
 
+        auto* res_column = reinterpret_cast<ColumnHLL*>(col_res.get());
+        auto& res_data = res_column->get_data();
         size_t size = offsets.size();
-        res_offsets.resize(size);
-        res_data.reserve(data.size());
-
-        size_t prev_offset = 0;
-        size_t res_offset = 0;
 
         for (size_t i = 0; i < size; ++i) {
-            std::string hash_string;
             if (nullmap[i]) {
-                hash_string = HyperLogLog::empty();
+                std::string hash_string = HyperLogLog::empty();
+                res_data[i] = HyperLogLog(Slice(hash_string));

Review comment:
       here should direct continue




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to