imay commented on a change in pull request #1756: Encapsulate HLL logic URL: https://github.com/apache/incubator-doris/pull/1756#discussion_r322046040
########## File path: be/src/olap/aggregate_func.h ########## @@ -399,64 +399,43 @@ struct AggregateFuncTraits<OLAP_FIELD_AGGREGATION_REPLACE, OLAP_FIELD_TYPE_CHAR> template <> struct AggregateFuncTraits<OLAP_FIELD_AGGREGATION_HLL_UNION, OLAP_FIELD_TYPE_HLL> { static void init(RowCursorCell* dst, const char* src, bool src_null, Arena* arena) { - // TODO(zc): refactor HLL implementation DCHECK_EQ(src_null, false); dst->set_not_null(); - auto* dest_slice = reinterpret_cast<Slice*>(dst->mutable_cell_ptr()); - char* mem = arena->Allocate(sizeof(HllContext)); - auto* context = new (mem) HllContext; - HllSetHelper::init_context(context); - HllSetHelper::fill_set(src, context); - context->has_value = true; - char* variable_ptr = arena->Allocate(sizeof(HllContext*) + HLL_COLUMN_DEFAULT_LEN); - *(size_t*)(variable_ptr) = (size_t)(context); - variable_ptr += sizeof(HllContext*); - dest_slice->data = variable_ptr; - dest_slice->size = HLL_COLUMN_DEFAULT_LEN; + + auto* src_slice = reinterpret_cast<const Slice*>(src); + auto* dst_slice = reinterpret_cast<Slice*>(dst->mutable_cell_ptr()); + + dst_slice->size = sizeof(HyperLogLog); + dst_slice->data = (char*)new HyperLogLog(src_slice->data);; } static void update(RowCursorCell* dst, const RowCursorCell& src, Arena* arena) { DCHECK_EQ(src.is_null(), false); - auto l_slice = reinterpret_cast<Slice*>(dst->mutable_cell_ptr()); - auto context = *reinterpret_cast<HllContext**>(l_slice->data - sizeof(HllContext*)); - HllSetHelper::fill_set((const char*)src.cell_ptr(), context); + + auto* dst_slice = reinterpret_cast<Slice*>(dst->mutable_cell_ptr()); + auto* src_slice = reinterpret_cast<const Slice*>(src.cell_ptr()); + auto* dst_hll = reinterpret_cast<HyperLogLog*>(dst_slice->data); + + // fixme(kks): trick here, need improve + if (arena == nullptr) { // for query + HyperLogLog src_hll = HyperLogLog(src_slice->data); + dst_hll->merge(src_hll); + } else { // for stream load + auto* src_hll = reinterpret_cast<HyperLogLog*>(src_slice->data); + dst_hll->merge(*src_hll); + + delete src_hll; Review comment: This is a dangerous to delete input source content. Better to leave a comment and try to find a better way to handle this ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org