mrhhsg commented on code in PR #63389:
URL: https://github.com/apache/doris/pull/63389#discussion_r3402015668


##########
be/src/exprs/function/functions_comparison.h:
##########
@@ -20,30 +20,143 @@
 
 #pragma once
 
+#include <compare>
 #include <limits>
+#include <optional>
+#include <string_view>
 #include <type_traits>
 
+#include "common/check.h"
 #include "common/logging.h"
 #include "core/accurate_comparison.h"
 #include "core/assert_cast.h"
 #include "core/column/column_const.h"
 #include "core/column/column_decimal.h"
 #include "core/column/column_nullable.h"
 #include "core/column/column_string.h"
+#include "core/data_type/data_type_nullable.h"
 #include "core/data_type/data_type_number.h"
 #include "core/data_type/data_type_string.h"
 #include "core/data_type/define_primitive_type.h"
 #include "core/decimal_comparison.h"
 #include "core/field.h"
 #include "core/memcmp_small.h"
 #include "core/value/vdatetime_value.h"
+#include "exprs/expr_zonemap_filter.h"
 #include "exprs/function/function.h"
 #include "exprs/function/function_helpers.h"
 #include "exprs/function/functions_logical.h"
+#include "exprs/vexpr.h"
 #include "storage/index/index_reader_helper.h"
 
 namespace doris {
 
+namespace comparison_zonemap_detail {
+
+enum class Op {
+    EQ,
+    NE,
+    LT,
+    LE,
+    GT,
+    GE,
+};
+
+inline Op symmetric_op(Op op) {
+    switch (op) {
+    case Op::EQ:
+    case Op::NE:
+        return op;
+    case Op::LT:
+        return Op::GT;
+    case Op::LE:
+        return Op::GE;
+    case Op::GT:
+        return Op::LT;
+    case Op::GE:
+        return Op::LE;
+    }
+    __builtin_unreachable();
+}
+
+inline ZoneMapFilterResult evaluate(const ZoneMapEvalContext& ctx, const 
VExprSPtrs& arguments,
+                                    Op op) {
+    auto slot_literal = expr_zonemap::extract_slot_and_literal(arguments);
+
+    auto slot_type = expr_zonemap::fetch_compatible_slot_type(ctx, 
slot_literal->slot_index,
+                                                              
slot_literal->slot_type);
+    if (slot_type == nullptr) {
+        return unsupported_zonemap_filter(ctx);
+    }
+    const auto* zone_map_ref = ctx.zone_map(slot_literal->slot_index);
+    if (zone_map_ref == nullptr) {
+        return unsupported_zonemap_filter(ctx);
+    }
+    const auto& zone_map = *zone_map_ref;
+    if (!zone_map.has_not_null) {
+        return ZoneMapFilterResult::kNoMatch;
+    }
+    if (!expr_zonemap::range_stats_usable_for_zonemap(zone_map, slot_type)) {
+        return unsupported_zonemap_filter(ctx);
+    }
+
+    const auto effective_op = slot_literal->literal_on_left ? symmetric_op(op) 
: op;
+    const auto& literal = slot_literal->literal;
+    switch (effective_op) {
+    case Op::EQ:
+        return expr_zonemap::field_less(literal, zone_map.min_value) ||
+                               expr_zonemap::field_greater(literal, 
zone_map.max_value)
+                       ? ZoneMapFilterResult::kNoMatch
+                       : ZoneMapFilterResult::kMayMatch;
+    case Op::NE:

Review Comment:
   实际上截断的时候会在最后一个字节 +1:
   ```cpp
   template <PrimitiveType Type>
   void TypedZoneMapIndexWriter<Type>::modify_index_before_flush(
           struct doris::segment_v2::ZoneMap& zone_map) {
       // Only varchar/string filed need modify zone map index when zone map 
max_value
       // For varchar/string type, the zone map buffer is truncated at 
MAX_ZONE_MAP_INDEX_SIZE (512 bytes).
       // When a string value is longer than 512 bytes, only the first 512 
bytes are stored.
       //
       // Truncating the max value creates a correctness problem: the truncated 
max is now smaller than the actual max.
       // This means the zone map could incorrectly skip pages that actually 
contain matching rows.
       //
       // So here we add one for the last byte if the max value is truncated, 
which makes the truncated max value
       // slightly larger than any real string that shares the same 512-byte 
prefix, ensuring no false negatives —
       // the zone map will never incorrectly skip a page that contains 
matching data.
       //
       // In UTF8 encoding, here do not appear 0xff in last byte
       if constexpr (Type == TYPE_CHAR || Type == TYPE_VARCHAR || Type == 
TYPE_STRING) {
           auto& str = zone_map.max_value.get<Type>();
           if (str.size() == MAX_ZONE_MAP_INDEX_SIZE) {
               str[str.size() - 1] += 1;
           }
       }
   }
   ```
   > be/src/storage/index/zone_map/zone_map_index.cpp



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to