mrhhsg commented on code in PR #63389:
URL: https://github.com/apache/doris/pull/63389#discussion_r3402015668
##########
be/src/exprs/function/functions_comparison.h:
##########
@@ -20,30 +20,143 @@
#pragma once
+#include <compare>
#include <limits>
+#include <optional>
+#include <string_view>
#include <type_traits>
+#include "common/check.h"
#include "common/logging.h"
#include "core/accurate_comparison.h"
#include "core/assert_cast.h"
#include "core/column/column_const.h"
#include "core/column/column_decimal.h"
#include "core/column/column_nullable.h"
#include "core/column/column_string.h"
+#include "core/data_type/data_type_nullable.h"
#include "core/data_type/data_type_number.h"
#include "core/data_type/data_type_string.h"
#include "core/data_type/define_primitive_type.h"
#include "core/decimal_comparison.h"
#include "core/field.h"
#include "core/memcmp_small.h"
#include "core/value/vdatetime_value.h"
+#include "exprs/expr_zonemap_filter.h"
#include "exprs/function/function.h"
#include "exprs/function/function_helpers.h"
#include "exprs/function/functions_logical.h"
+#include "exprs/vexpr.h"
#include "storage/index/index_reader_helper.h"
namespace doris {
+namespace comparison_zonemap_detail {
+
+enum class Op {
+ EQ,
+ NE,
+ LT,
+ LE,
+ GT,
+ GE,
+};
+
+inline Op symmetric_op(Op op) {
+ switch (op) {
+ case Op::EQ:
+ case Op::NE:
+ return op;
+ case Op::LT:
+ return Op::GT;
+ case Op::LE:
+ return Op::GE;
+ case Op::GT:
+ return Op::LT;
+ case Op::GE:
+ return Op::LE;
+ }
+ __builtin_unreachable();
+}
+
+inline ZoneMapFilterResult evaluate(const ZoneMapEvalContext& ctx, const
VExprSPtrs& arguments,
+ Op op) {
+ auto slot_literal = expr_zonemap::extract_slot_and_literal(arguments);
+
+ auto slot_type = expr_zonemap::fetch_compatible_slot_type(ctx,
slot_literal->slot_index,
+
slot_literal->slot_type);
+ if (slot_type == nullptr) {
+ return unsupported_zonemap_filter(ctx);
+ }
+ const auto* zone_map_ref = ctx.zone_map(slot_literal->slot_index);
+ if (zone_map_ref == nullptr) {
+ return unsupported_zonemap_filter(ctx);
+ }
+ const auto& zone_map = *zone_map_ref;
+ if (!zone_map.has_not_null) {
+ return ZoneMapFilterResult::kNoMatch;
+ }
+ if (!expr_zonemap::range_stats_usable_for_zonemap(zone_map, slot_type)) {
+ return unsupported_zonemap_filter(ctx);
+ }
+
+ const auto effective_op = slot_literal->literal_on_left ? symmetric_op(op)
: op;
+ const auto& literal = slot_literal->literal;
+ switch (effective_op) {
+ case Op::EQ:
+ return expr_zonemap::field_less(literal, zone_map.min_value) ||
+ expr_zonemap::field_greater(literal,
zone_map.max_value)
+ ? ZoneMapFilterResult::kNoMatch
+ : ZoneMapFilterResult::kMayMatch;
+ case Op::NE:
Review Comment:
实际上截断的时候会在最后一个字节 +1:
```cpp
template <PrimitiveType Type>
void TypedZoneMapIndexWriter<Type>::modify_index_before_flush(
struct doris::segment_v2::ZoneMap& zone_map) {
// Only varchar/string filed need modify zone map index when zone map
max_value
// For varchar/string type, the zone map buffer is truncated at
MAX_ZONE_MAP_INDEX_SIZE (512 bytes).
// When a string value is longer than 512 bytes, only the first 512
bytes are stored.
//
// Truncating the max value creates a correctness problem: the truncated
max is now smaller than the actual max.
// This means the zone map could incorrectly skip pages that actually
contain matching rows.
//
// So here we add one for the last byte if the max value is truncated,
which makes the truncated max value
// slightly larger than any real string that shares the same 512-byte
prefix, ensuring no false negatives —
// the zone map will never incorrectly skip a page that contains
matching data.
//
// In UTF8 encoding, here do not appear 0xff in last byte
if constexpr (Type == TYPE_CHAR || Type == TYPE_VARCHAR || Type ==
TYPE_STRING) {
auto& str = zone_map.max_value.get<Type>();
if (str.size() == MAX_ZONE_MAP_INDEX_SIZE) {
str[str.size() - 1] += 1;
}
}
}
```
> be/src/storage/index/zone_map/zone_map_index.cpp
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]