wangshuo128 commented on a change in pull request #8234:
URL: https://github.com/apache/incubator-doris/pull/8234#discussion_r824382538



##########
File path: 
fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java
##########
@@ -386,25 +386,18 @@ public void analyze(Analyzer analyzer) throws 
UserException {
                 }
             }
 
-            if (columnDef.getType().isHllType()) {
-                hasHll = true;
-            }
-
-            if (columnDef.getAggregateType() == AggregateType.BITMAP_UNION) {
-                hasBitmap = columnDef.getType().isBitmapType();
+            if (columnDef.getType().isObjectStored()) {
+                hasObjectStored = true;
+                objectStoredColumn = columnDef.getName();
             }
 
             if (!columnSet.add(columnDef.getName())) {
                 
ErrorReport.reportAnalysisException(ErrorCode.ERR_DUP_FIELDNAME, 
columnDef.getName());
             }
         }
 
-        if (hasHll && keysDesc.getKeysType() != KeysType.AGG_KEYS) {
-            throw new AnalysisException("HLL must be used in AGG_KEYS");
-        }
-
-        if (hasBitmap && keysDesc.getKeysType() != KeysType.AGG_KEYS) {
-            throw new AnalysisException("BITMAP_UNION must be used in 
AGG_KEYS");
+        if (hasObjectStored && keysDesc.getKeysType() != KeysType.AGG_KEYS) {
+            throw new AnalysisException("object stored column must be used in 
AGG_KEYS table, object stored column:" + objectStoredColumn);

Review comment:
       `object stored` is the implementation details,  which should be 
transparent to the end-users. Let's just use the specific column type name.
   ```suggestion
               throw new AnalysisException(objectStoredColumn + " must be used 
in AGG_KEYS");
   ```

##########
File path: fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java
##########
@@ -49,11 +49,12 @@
     VARCHAR("VARCHAR", 16, TPrimitiveType.VARCHAR),
 
     DECIMALV2("DECIMALV2", 16, TPrimitiveType.DECIMALV2),
-    
-    HLL("HLL", 16, TPrimitiveType.HLL),
     TIME("TIME", 8, TPrimitiveType.TIME),
-    // we use OBJECT type represent BITMAP type in Backend
+    // these following type are stored as object binary in BE.

Review comment:
       ```suggestion
       // these following types are stored as object binary in BE.
   ```

##########
File path: be/src/exprs/quantile_function.cpp
##########
@@ -0,0 +1,127 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exprs/quantile_function.h"
+#include "exprs/anyval_util.h"
+#include "gutil/strings/numbers.h"
+#include "gutil/strings/split.h"
+#include "util/quantile_state.h"
+#include "util/string_parser.hpp"
+#include "util/slice.h"
+
+namespace doris {
+
+using doris_udf::DoubleVal;
+using doris_udf::StringVal;
+using doris_udf::FloatVal;
+
+void QuantileStateFunctions::init(){}
+
+void QuantileStateFunctions::quantile_state_init(FunctionContext* ctx, 
StringVal* dst) {
+    dst->is_null = false;
+    dst->len = sizeof(QuantileState<double>);
+    dst->ptr = (uint8_t*) new QuantileState<double>();
+}
+
+static StringVal serialize(FunctionContext* ctx, QuantileState<double>* value) 
{
+    StringVal result(ctx, value->get_serialized_size());
+    value->serialize(result.ptr);
+    return result;
+}
+
+StringVal QuantileStateFunctions::to_quantile_state(FunctionContext* ctx, 
const StringVal& src) {
+    QuantileState<double> quantile_state;
+    quantile_state.set_compression(2048);

Review comment:
       It's better to use a named constant to represent `2048` and `10000`, 
etc, rather than these magic numbers.

##########
File path: be/src/olap/row_block.cpp
##########
@@ -89,8 +89,7 @@ void RowBlock::_compute_layout() {
         _field_offset_in_memory.push_back(memory_size);
 
         // All field has a nullbyte in memory
-        if (column.type() == OLAP_FIELD_TYPE_VARCHAR || column.type() == 
OLAP_FIELD_TYPE_HLL ||
-            column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == 
OLAP_FIELD_TYPE_OBJECT ||column.type() == OLAP_FIELD_TYPE_STRING) {
+        if (column.is_slice_stored()) {

Review comment:
       I prefer `is_length_variable_type` to `is_variable_type` 

##########
File path: be/src/olap/aggregate_func.h
##########
@@ -580,6 +581,53 @@ struct 
AggregateFuncTraits<OLAP_FIELD_AGGREGATION_BITMAP_UNION, OLAP_FIELD_TYPE_
         : public AggregateFuncTraits<OLAP_FIELD_AGGREGATION_BITMAP_UNION, 
OLAP_FIELD_TYPE_OBJECT> {
 };
 
+template <>
+struct AggregateFuncTraits<OLAP_FIELD_AGGREGATION_QUANTILE_UNION, 
OLAP_FIELD_TYPE_QUANTILE_STATE> {
+    static void init(RowCursorCell* dst, const char* src, bool src_null, 
MemPool* mem_pool,
+                     ObjectPool* agg_pool) {
+        DCHECK_EQ(src_null, false);
+        dst->set_not_null();
+
+        auto* src_slice = reinterpret_cast<const Slice*>(src);
+        auto* dst_slice = reinterpret_cast<Slice*>(dst->mutable_cell_ptr());
+
+        // we use zero size represent this slice is a agg object
+        dst_slice->size = 0;
+        auto* quantile_state = new QuantileState<double>(*src_slice);
+
+        
mem_pool->mem_tracker()->Consume(quantile_state->get_serialized_size());
+
+        dst_slice->data = reinterpret_cast<char*>(quantile_state);
+
+        agg_pool->add(quantile_state);
+    }
+
+    static void update(RowCursorCell* dst, const RowCursorCell& src, MemPool* 
mem_pool) {
+        DCHECK_EQ(src.is_null(), false);
+
+        auto* dst_slice = reinterpret_cast<Slice*>(dst->mutable_cell_ptr());
+        auto* src_slice = reinterpret_cast<const Slice*>(src.cell_ptr());

Review comment:
       ```suggestion
           const auto* src_slice = reinterpret_cast<Slice*>(src.cell_ptr());
   ```

##########
File path: be/src/util/quantile_state.h
##########
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#ifndef DORIS_BE_SRC_OLAP_QUANTILE_STATE_H
+#define DORIS_BE_SRC_OLAP_QUANTILE_STATE_H
+
+
+#include <string>
+#include <vector>
+#include <memory>
+#include "tdigest.h"
+#include "slice.h"
+
+namespace doris {
+
+
+class Slice;
+class TDigest;
+
+const static int QUANTILE_STATE_EXPLICIT_NUM = 2048;
+
+enum QuantileStateType {
+    EMPTY = 0,
+    SINGLE = 1,   // single element
+    EXPLICIT = 2, // more than one elements,stored in vector
+    TDIGEST = 3   // TDIGEST object
+};
+
+template<typename T>
+class QuantileState
+{
+public:
+    QuantileState();
+    explicit QuantileState(float compression);
+    explicit QuantileState(const Slice& slice);
+    void set_compression(float compression);
+    bool deserialize(const Slice& slice);
+    size_t serialize(uint8_t* dst) const;
+    void merge(QuantileState<T>& other);
+    void add_value(const T& value);
+    void clear();
+    bool is_valid(const Slice& slice);
+    size_t get_serialized_size();
+    T get_value_by_percentile(float percentile);
+    T get_explicit_value_by_percentile(float percentile);
+    ~QuantileState();
+
+private:
+    QuantileStateType _type = EMPTY;
+    TDigest* tdigest_ptr = nullptr;

Review comment:
       ```suggestion
       TDigest* _tdigest_ptr = nullptr;
   ```

##########
File path: be/src/util/quantile_state.cpp
##########
@@ -0,0 +1,377 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#include <string.h>
+#include <cmath>
+#include "util/quantile_state.h"
+#include "util/coding.h"
+#include "common/logging.h"
+
+
+namespace doris{
+
+template<typename T>
+QuantileState<T>::QuantileState():_type(EMPTY),compression(2048){}
+
+template<typename T>
+QuantileState<T>::QuantileState(float 
compression):_type(EMPTY),compression(compression){}
+
+
+template<typename T>
+QuantileState<T>::QuantileState(const Slice& slice) {
+    if (!deserialize(slice)) {
+        _type = EMPTY;
+    }
+}
+
+template<typename T>
+QuantileState<T>::~QuantileState() {
+    clear();
+}
+
+template<typename T>
+size_t QuantileState<T>::get_serialized_size() {
+    size_t size = 1 + sizeof(float); // type(QuantileStateType) + 
compression(float)
+    switch(_type) {
+    case EMPTY:
+        break;
+    case SINGLE:
+        size += sizeof(T);
+        break;
+    case EXPLICIT:
+        size += sizeof(uint16_t) + sizeof(T) * _explicit_data.size();
+        break;
+    case TDIGEST:
+        size += tdigest_ptr->serialized_size();
+        break;
+    }
+    return size;
+}
+
+template<typename T>
+void QuantileState<T>::set_compression(float compression) {
+    this->compression = compression;
+}
+
+template<typename T>
+bool QuantileState<T>::is_valid(const Slice& slice) {
+
+    if (slice.size < 1) {
+        return false;
+    }
+    const uint8_t* ptr = (uint8_t*)slice.data;
+    const uint8_t* end = (uint8_t*)slice.data + slice.size;
+    float compress_value = *reinterpret_cast<const float*>(ptr);
+    if (compress_value < 2048 || compress_value > 10000) {
+        return false;
+    }
+    ptr += sizeof(float);
+    
+
+    auto type = (QuantileStateType)*ptr++;
+    switch (type) {
+    case EMPTY:
+        break;
+    case SINGLE:{
+        if ((ptr + sizeof(T)) > end) {
+            return false;
+        }
+        ptr += sizeof(T);
+        // _single_data = *reinterpret_cast<T*>(ptr)++;
+        break;
+    }
+    case EXPLICIT: {
+        if ((ptr + sizeof(uint16_t)) > end) {
+            return false;
+        }
+        uint16_t num_explicits = decode_fixed16_le(ptr);
+        ptr += sizeof(uint16_t);
+        ptr += num_explicits * sizeof(T);
+        break;
+    }
+    case TDIGEST: {
+        if ((ptr + sizeof(uint32_t)) > end) {
+            return false;
+        }
+        uint32_t tdigest_serialized_length = decode_fixed32_le(ptr);
+        ptr += tdigest_serialized_length;
+        break;
+    }
+    default:
+        return false;
+    }
+    return ptr == end;
+}
+
+template<typename T>
+T QuantileState<T>::get_explicit_value_by_percentile(float percentile) {
+    DCHECK(_type == EXPLICIT);
+    if (percentile < 0 || percentile > 1) {
+        LOG(WARNING) << "get_explicit_value_by_percentile failed caused by 
percentile:" << percentile <<" is invalid";
+        return NAN;
+    }
+    int n = _explicit_data.size();
+    std::sort(_explicit_data.begin(), _explicit_data.end());
+
+    double index = (n - 1) * percentile;
+    int intIdx = (int) index;
+    if (intIdx == n-1 ){
+        return _explicit_data[intIdx];
+    }
+    return _explicit_data[intIdx+1] * (index - intIdx) + 
_explicit_data[intIdx] * (intIdx + 1 - index);
+}
+
+template<typename T>
+T QuantileState<T>::get_value_by_percentile(float percentile) {
+    switch(_type) {
+    case EMPTY: {
+        return NAN;
+    }
+    case SINGLE: {
+        return _single_data;
+    }
+    case EXPLICIT: {
+        return get_explicit_value_by_percentile(percentile);
+    }
+    case TDIGEST: {
+        return tdigest_ptr->quantile(percentile);
+    }
+    default:
+        break;
+    }
+    return NAN;
+}
+
+template<typename T>
+bool QuantileState<T>::deserialize(const Slice& slice) {
+    DCHECK(_type == EMPTY);
+
+    // in case of insert error data caused be crashed 
+    if (slice.data == nullptr || slice.size <= 0) {
+        return false;
+    }
+    // check input is valid
+    if (!is_valid(slice)) {
+        LOG(WARNING) << "QuantileState deserialize failed: slice is invalid";
+        return false;
+    }
+
+    const uint8_t* ptr = (uint8_t*)slice.data;
+    compression = *reinterpret_cast<const float*>(ptr);
+    ptr += sizeof(float);
+    // first byte : type
+    _type = (QuantileStateType)*ptr++;
+    switch (_type) {
+    case EMPTY:
+        // 1: empty 
+        break;
+    case SINGLE: {
+        // 2: single_data value
+        _single_data = *reinterpret_cast<const T*>(ptr);
+        ptr += sizeof(T);
+        break;
+    }
+    case EXPLICIT: {
+        // 3: number of explicit values
+        // make sure that num_explicit is positive
+        uint16_t num_explicits = decode_fixed16_le(ptr);
+        ptr += sizeof(uint16_t);
+        _explicit_data.reserve(std::min(num_explicits*2, 
QUANTILE_STATE_EXPLICIT_NUM));
+        _explicit_data.resize(num_explicits);
+        memcpy(&_explicit_data[0], ptr, num_explicits*sizeof(T));
+        ptr += num_explicits*sizeof(T);
+        break;
+    }
+    case TDIGEST: {
+        // 4: Tdigest object value
+        tdigest_ptr = new TDigest(0);
+        tdigest_ptr->unserialize(ptr);
+        break;
+    }
+    default:
+        // revert type to EMPTY
+        _type = EMPTY;
+        return false;
+    }
+    return true;
+
+}
+
+template<typename T>
+size_t QuantileState<T>::serialize(uint8_t* dst) const{
+    uint8_t* ptr = dst;
+    *reinterpret_cast<float *>(ptr) = compression;
+    ptr += sizeof(float);
+    switch (_type) {
+    case EMPTY: {
+        *ptr++ = EMPTY;
+        break;
+    }
+    case SINGLE: {
+        *ptr++ = SINGLE;
+        *reinterpret_cast<T *>(ptr) = _single_data;
+        ptr += sizeof(T);
+        break;
+    }
+    case EXPLICIT: {
+        *ptr++ = EXPLICIT;
+        uint16_t size = _explicit_data.size();
+        *reinterpret_cast<uint16_t *>(ptr) = size;
+        ptr += sizeof(uint16_t);
+        memcpy(ptr, &_explicit_data[0], size*sizeof(T));
+        ptr += size*sizeof(T);
+        break;
+    }
+    case TDIGEST: {
+        *ptr++ = TDIGEST;
+        size_t tdigest_size = tdigest_ptr->serialize(ptr);
+        ptr += tdigest_size;
+        break;
+    }
+    default:
+        break;
+    }
+    return ptr - dst;
+}
+
+template<typename T>
+void QuantileState<T>::merge(QuantileState<T>& other) {
+    switch(other._type) {
+    case EMPTY:
+        break;
+    case SINGLE: {
+        add_value(other._single_data);
+        break;
+    }
+    case EXPLICIT: {
+        switch(_type) {
+        case EMPTY:
+            _type = EXPLICIT;
+            _explicit_data.swap(other._explicit_data);
+            break;
+        case SINGLE:
+            _type = EXPLICIT;
+            _explicit_data.swap(other._explicit_data);
+            add_value(_single_data);
+            break;
+        case EXPLICIT:
+            if (_explicit_data.size() + other._explicit_data.size() > 
QUANTILE_STATE_EXPLICIT_NUM) {
+                _type = TDIGEST;
+                tdigest_ptr = new TDigest(compression);
+                for (int i = 0; i < _explicit_data.size(); i++) {
+                    tdigest_ptr->add(_explicit_data[i]);
+                }
+                for (int i = 0; i < other._explicit_data.size(); i++) {
+                    tdigest_ptr->add(other._explicit_data[i]);
+                }
+            } else {
+                _explicit_data.insert(_explicit_data.end(), 
other._explicit_data.begin(), other._explicit_data.end());            
+            }
+            break;
+        case TDIGEST:
+            for (int i = 0; i < other._explicit_data.size(); i++) {
+                tdigest_ptr->add(other._explicit_data[i]);
+            }
+            break;
+        default:
+            break;
+        }
+        break;
+    }
+    case TDIGEST: {
+        switch(_type) {
+        case EMPTY:
+            _type = TDIGEST;
+            tdigest_ptr = other.tdigest_ptr;
+            other.tdigest_ptr = nullptr;
+            break;
+        case SINGLE:
+            _type = TDIGEST;
+            tdigest_ptr = other.tdigest_ptr;
+            other.tdigest_ptr = nullptr;
+            tdigest_ptr->add(_single_data);
+            break;
+        case EXPLICIT:
+            _type = TDIGEST;
+            tdigest_ptr = other.tdigest_ptr;
+            other.tdigest_ptr = nullptr;
+            for (int i = 0; i < _explicit_data.size(); i++) {
+                tdigest_ptr->add(_explicit_data[i]);
+            }
+            break;
+        case TDIGEST:
+            tdigest_ptr->merge(other.tdigest_ptr);
+            break;
+        default:
+            break;
+        }
+        break;
+    }
+    default:
+        return;

Review comment:
       The `default` branch  should never be reached. What about leaving a 
warning log?

##########
File path: be/src/exprs/quantile_function.cpp
##########
@@ -0,0 +1,127 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exprs/quantile_function.h"
+#include "exprs/anyval_util.h"
+#include "gutil/strings/numbers.h"
+#include "gutil/strings/split.h"
+#include "util/quantile_state.h"
+#include "util/string_parser.hpp"
+#include "util/slice.h"
+
+namespace doris {
+
+using doris_udf::DoubleVal;
+using doris_udf::StringVal;
+using doris_udf::FloatVal;
+
+void QuantileStateFunctions::init(){}
+
+void QuantileStateFunctions::quantile_state_init(FunctionContext* ctx, 
StringVal* dst) {
+    dst->is_null = false;
+    dst->len = sizeof(QuantileState<double>);
+    dst->ptr = (uint8_t*) new QuantileState<double>();
+}
+
+static StringVal serialize(FunctionContext* ctx, QuantileState<double>* value) 
{
+    StringVal result(ctx, value->get_serialized_size());
+    value->serialize(result.ptr);
+    return result;
+}
+
+StringVal QuantileStateFunctions::to_quantile_state(FunctionContext* ctx, 
const StringVal& src) {
+    QuantileState<double> quantile_state;
+    quantile_state.set_compression(2048);
+    const AnyVal* digest_compression = ctx->get_constant_arg(1);
+    if (digest_compression != nullptr) {
+        float compression = reinterpret_cast<const 
FloatVal*>(digest_compression)->val;
+        if (compression >= 2048 && compression <= 10000) {
+            quantile_state.set_compression(compression);
+        }
+    }
+    
+    if(!src.is_null) {
+        StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
+        double double_value = StringParser::string_to_float<double>(
+                reinterpret_cast<char*>(src.ptr), src.len, &parse_result);
+        if (UNLIKELY(parse_result != StringParser::PARSE_SUCCESS)) {
+            std::stringstream error_msg;
+            error_msg << "The input: " << 
std::string(reinterpret_cast<char*>(src.ptr), src.len)
+                      << " is not valid, to_bitmap only support bigint value 
from 0 to "
+                         "18446744073709551615 currently";
+            ctx->set_error(error_msg.str().c_str());
+            return StringVal::null();
+        }
+        quantile_state.add_value(double_value);
+    }
+    return serialize(ctx, &quantile_state);
+}
+
+void QuantileStateFunctions::quantile_union(FunctionContext* ctx, const 
StringVal& src, StringVal* dst) {
+    if(src.is_null) {
+        return;
+    }
+    auto dst_quantile = reinterpret_cast<QuantileState<double>*>(dst->ptr);
+    if(src.len == 0) {
+        
dst_quantile->merge(*reinterpret_cast<QuantileState<double>*>(src.ptr));
+    } else {
+        QuantileState<double> state(Slice(src.ptr, src.len));
+        dst_quantile->merge(state);
+    }
+}
+
+DoubleVal QuantileStateFunctions::quantile_percent(FunctionContext* ctx, 
StringVal& src) {
+    const AnyVal* percentile = ctx->get_constant_arg(1);

Review comment:
       I'm afraid we should add a `quantile_percent _prepare` function to do 
some preparation work, e.g., handle constant arguments checks.

##########
File path: be/src/util/quantile_state.cpp
##########
@@ -0,0 +1,377 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#include <string.h>
+#include <cmath>
+#include "util/quantile_state.h"
+#include "util/coding.h"
+#include "common/logging.h"
+
+
+namespace doris{
+
+template<typename T>
+QuantileState<T>::QuantileState():_type(EMPTY),compression(2048){}
+
+template<typename T>
+QuantileState<T>::QuantileState(float 
compression):_type(EMPTY),compression(compression){}
+
+
+template<typename T>
+QuantileState<T>::QuantileState(const Slice& slice) {
+    if (!deserialize(slice)) {
+        _type = EMPTY;
+    }
+}
+
+template<typename T>
+QuantileState<T>::~QuantileState() {
+    clear();
+}
+
+template<typename T>
+size_t QuantileState<T>::get_serialized_size() {
+    size_t size = 1 + sizeof(float); // type(QuantileStateType) + 
compression(float)
+    switch(_type) {
+    case EMPTY:
+        break;
+    case SINGLE:
+        size += sizeof(T);
+        break;
+    case EXPLICIT:
+        size += sizeof(uint16_t) + sizeof(T) * _explicit_data.size();
+        break;
+    case TDIGEST:
+        size += tdigest_ptr->serialized_size();
+        break;
+    }
+    return size;
+}
+
+template<typename T>
+void QuantileState<T>::set_compression(float compression) {
+    this->compression = compression;
+}
+
+template<typename T>
+bool QuantileState<T>::is_valid(const Slice& slice) {
+
+    if (slice.size < 1) {
+        return false;
+    }
+    const uint8_t* ptr = (uint8_t*)slice.data;
+    const uint8_t* end = (uint8_t*)slice.data + slice.size;
+    float compress_value = *reinterpret_cast<const float*>(ptr);
+    if (compress_value < 2048 || compress_value > 10000) {
+        return false;
+    }
+    ptr += sizeof(float);
+    
+
+    auto type = (QuantileStateType)*ptr++;
+    switch (type) {
+    case EMPTY:
+        break;
+    case SINGLE:{
+        if ((ptr + sizeof(T)) > end) {
+            return false;
+        }
+        ptr += sizeof(T);
+        // _single_data = *reinterpret_cast<T*>(ptr)++;
+        break;
+    }
+    case EXPLICIT: {
+        if ((ptr + sizeof(uint16_t)) > end) {
+            return false;
+        }
+        uint16_t num_explicits = decode_fixed16_le(ptr);
+        ptr += sizeof(uint16_t);
+        ptr += num_explicits * sizeof(T);
+        break;
+    }
+    case TDIGEST: {
+        if ((ptr + sizeof(uint32_t)) > end) {
+            return false;
+        }
+        uint32_t tdigest_serialized_length = decode_fixed32_le(ptr);
+        ptr += tdigest_serialized_length;
+        break;
+    }
+    default:
+        return false;
+    }
+    return ptr == end;
+}
+
+template<typename T>
+T QuantileState<T>::get_explicit_value_by_percentile(float percentile) {
+    DCHECK(_type == EXPLICIT);
+    if (percentile < 0 || percentile > 1) {
+        LOG(WARNING) << "get_explicit_value_by_percentile failed caused by 
percentile:" << percentile <<" is invalid";
+        return NAN;
+    }
+    int n = _explicit_data.size();
+    std::sort(_explicit_data.begin(), _explicit_data.end());
+
+    double index = (n - 1) * percentile;
+    int intIdx = (int) index;
+    if (intIdx == n-1 ){
+        return _explicit_data[intIdx];
+    }
+    return _explicit_data[intIdx+1] * (index - intIdx) + 
_explicit_data[intIdx] * (intIdx + 1 - index);
+}
+
+template<typename T>
+T QuantileState<T>::get_value_by_percentile(float percentile) {
+    switch(_type) {
+    case EMPTY: {
+        return NAN;
+    }
+    case SINGLE: {
+        return _single_data;
+    }
+    case EXPLICIT: {
+        return get_explicit_value_by_percentile(percentile);
+    }
+    case TDIGEST: {
+        return tdigest_ptr->quantile(percentile);
+    }
+    default:
+        break;
+    }
+    return NAN;
+}
+
+template<typename T>
+bool QuantileState<T>::deserialize(const Slice& slice) {
+    DCHECK(_type == EMPTY);
+
+    // in case of insert error data caused be crashed 
+    if (slice.data == nullptr || slice.size <= 0) {
+        return false;
+    }
+    // check input is valid
+    if (!is_valid(slice)) {
+        LOG(WARNING) << "QuantileState deserialize failed: slice is invalid";
+        return false;
+    }
+
+    const uint8_t* ptr = (uint8_t*)slice.data;
+    compression = *reinterpret_cast<const float*>(ptr);
+    ptr += sizeof(float);
+    // first byte : type
+    _type = (QuantileStateType)*ptr++;
+    switch (_type) {
+    case EMPTY:
+        // 1: empty 
+        break;
+    case SINGLE: {
+        // 2: single_data value
+        _single_data = *reinterpret_cast<const T*>(ptr);
+        ptr += sizeof(T);
+        break;
+    }
+    case EXPLICIT: {
+        // 3: number of explicit values
+        // make sure that num_explicit is positive
+        uint16_t num_explicits = decode_fixed16_le(ptr);
+        ptr += sizeof(uint16_t);
+        _explicit_data.reserve(std::min(num_explicits*2, 
QUANTILE_STATE_EXPLICIT_NUM));
+        _explicit_data.resize(num_explicits);
+        memcpy(&_explicit_data[0], ptr, num_explicits*sizeof(T));
+        ptr += num_explicits*sizeof(T);
+        break;
+    }
+    case TDIGEST: {
+        // 4: Tdigest object value
+        tdigest_ptr = new TDigest(0);
+        tdigest_ptr->unserialize(ptr);
+        break;
+    }
+    default:
+        // revert type to EMPTY
+        _type = EMPTY;
+        return false;
+    }
+    return true;
+
+}
+
+template<typename T>
+size_t QuantileState<T>::serialize(uint8_t* dst) const{
+    uint8_t* ptr = dst;
+    *reinterpret_cast<float *>(ptr) = compression;
+    ptr += sizeof(float);
+    switch (_type) {
+    case EMPTY: {
+        *ptr++ = EMPTY;
+        break;
+    }
+    case SINGLE: {
+        *ptr++ = SINGLE;
+        *reinterpret_cast<T *>(ptr) = _single_data;
+        ptr += sizeof(T);
+        break;
+    }
+    case EXPLICIT: {
+        *ptr++ = EXPLICIT;
+        uint16_t size = _explicit_data.size();
+        *reinterpret_cast<uint16_t *>(ptr) = size;
+        ptr += sizeof(uint16_t);
+        memcpy(ptr, &_explicit_data[0], size*sizeof(T));
+        ptr += size*sizeof(T);
+        break;
+    }
+    case TDIGEST: {
+        *ptr++ = TDIGEST;
+        size_t tdigest_size = tdigest_ptr->serialize(ptr);
+        ptr += tdigest_size;
+        break;
+    }
+    default:
+        break;
+    }
+    return ptr - dst;
+}
+
+template<typename T>
+void QuantileState<T>::merge(QuantileState<T>& other) {
+    switch(other._type) {
+    case EMPTY:
+        break;
+    case SINGLE: {
+        add_value(other._single_data);
+        break;
+    }
+    case EXPLICIT: {
+        switch(_type) {
+        case EMPTY:
+            _type = EXPLICIT;
+            _explicit_data.swap(other._explicit_data);
+            break;
+        case SINGLE:
+            _type = EXPLICIT;
+            _explicit_data.swap(other._explicit_data);
+            add_value(_single_data);
+            break;
+        case EXPLICIT:
+            if (_explicit_data.size() + other._explicit_data.size() > 
QUANTILE_STATE_EXPLICIT_NUM) {
+                _type = TDIGEST;
+                tdigest_ptr = new TDigest(compression);
+                for (int i = 0; i < _explicit_data.size(); i++) {
+                    tdigest_ptr->add(_explicit_data[i]);
+                }
+                for (int i = 0; i < other._explicit_data.size(); i++) {
+                    tdigest_ptr->add(other._explicit_data[i]);
+                }
+            } else {
+                _explicit_data.insert(_explicit_data.end(), 
other._explicit_data.begin(), other._explicit_data.end());            
+            }
+            break;
+        case TDIGEST:
+            for (int i = 0; i < other._explicit_data.size(); i++) {
+                tdigest_ptr->add(other._explicit_data[i]);
+            }
+            break;
+        default:
+            break;
+        }
+        break;
+    }
+    case TDIGEST: {
+        switch(_type) {
+        case EMPTY:
+            _type = TDIGEST;
+            tdigest_ptr = other.tdigest_ptr;
+            other.tdigest_ptr = nullptr;
+            break;
+        case SINGLE:
+            _type = TDIGEST;
+            tdigest_ptr = other.tdigest_ptr;
+            other.tdigest_ptr = nullptr;
+            tdigest_ptr->add(_single_data);
+            break;
+        case EXPLICIT:
+            _type = TDIGEST;
+            tdigest_ptr = other.tdigest_ptr;
+            other.tdigest_ptr = nullptr;
+            for (int i = 0; i < _explicit_data.size(); i++) {
+                tdigest_ptr->add(_explicit_data[i]);
+            }
+            break;
+        case TDIGEST:
+            tdigest_ptr->merge(other.tdigest_ptr);
+            break;
+        default:
+            break;
+        }
+        break;
+    }
+    default:
+        return;
+    }
+
+
+}
+
+template<typename T>
+void QuantileState<T>::add_value(const T& value) {
+    switch(_type) {
+    case EMPTY:
+        _single_data = value;
+        _type = SINGLE;
+        break;
+    case SINGLE:
+        _explicit_data.emplace_back(_single_data);
+        _explicit_data.emplace_back(value);
+        _type = EXPLICIT;
+        break;
+    case EXPLICIT:
+        if (_explicit_data.size() == QUANTILE_STATE_EXPLICIT_NUM) {
+            tdigest_ptr = new TDigest(compression);
+            for (int i = 0; i < _explicit_data.size(); i++) {
+                tdigest_ptr->add(_explicit_data[i]);
+            }
+            _explicit_data.clear();
+            _explicit_data.shrink_to_fit();
+            _type = TDIGEST;
+
+        } else {
+            _explicit_data.emplace_back(value);
+        }
+        break;
+    case TDIGEST:
+        tdigest_ptr->add(value);
+        break;
+    }
+
+}
+
+template<typename T>
+void QuantileState<T>::clear() {
+    _type = EMPTY;
+    if (tdigest_ptr != nullptr) {
+        delete tdigest_ptr;

Review comment:
       It's safe to delete a `nullptr`, no need to check here.

##########
File path: be/src/util/quantile_state.h
##########
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#ifndef DORIS_BE_SRC_OLAP_QUANTILE_STATE_H
+#define DORIS_BE_SRC_OLAP_QUANTILE_STATE_H
+
+
+#include <string>
+#include <vector>
+#include <memory>
+#include "tdigest.h"
+#include "slice.h"
+
+namespace doris {
+
+
+class Slice;
+class TDigest;
+
+const static int QUANTILE_STATE_EXPLICIT_NUM = 2048;
+
+enum QuantileStateType {
+    EMPTY = 0,
+    SINGLE = 1,   // single element
+    EXPLICIT = 2, // more than one elements,stored in vector
+    TDIGEST = 3   // TDIGEST object
+};
+
+template<typename T>
+class QuantileState
+{
+public:
+    QuantileState();
+    explicit QuantileState(float compression);
+    explicit QuantileState(const Slice& slice);
+    void set_compression(float compression);
+    bool deserialize(const Slice& slice);
+    size_t serialize(uint8_t* dst) const;
+    void merge(QuantileState<T>& other);
+    void add_value(const T& value);
+    void clear();
+    bool is_valid(const Slice& slice);
+    size_t get_serialized_size();
+    T get_value_by_percentile(float percentile);
+    T get_explicit_value_by_percentile(float percentile);
+    ~QuantileState();
+
+private:
+    QuantileStateType _type = EMPTY;
+    TDigest* tdigest_ptr = nullptr;
+    T _single_data;
+    std::vector<T> _explicit_data;
+    float compression;

Review comment:
       ```suggestion
       float _compression;
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to