yiguolei commented on code in PR #60393:
URL: https://github.com/apache/doris/pull/60393#discussion_r2762032638


##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -535,86 +513,94 @@ Status ColumnReader::prune_predicates_by_zone_map(
     return Status::OK();
 }
 
-Status ColumnReader::_parse_zone_map(const ZoneMapPB& zone_map, WrapperField* 
min_value_container,
-                                     WrapperField* max_value_container) const {
+Status ColumnReader::_parse_zone_map(const ZoneMapPB& zone_map, ZoneMapInfo& 
zone_map_info) const {
     // min value and max value are valid if has_not_null is true
     if (zone_map.has_not_null()) {
-        min_value_container->set_not_null();
-        max_value_container->set_not_null();
+        zone_map_info.has_null = false;
 
         if (zone_map.has_negative_inf()) {
             if (FieldType::OLAP_FIELD_TYPE_FLOAT == _meta_type) {
                 static auto constexpr float_neg_inf = 
-std::numeric_limits<float>::infinity();
-                min_value_container->set_raw_value(&float_neg_inf, 
sizeof(float_neg_inf));
+                zone_map_info.min_value =
+                        
vectorized::Field::create_field<TYPE_FLOAT>(float_neg_inf);
             } else if (FieldType::OLAP_FIELD_TYPE_DOUBLE == _meta_type) {
                 static auto constexpr double_neg_inf = 
-std::numeric_limits<double>::infinity();
-                min_value_container->set_raw_value(&double_neg_inf, 
sizeof(double_neg_inf));
+                zone_map_info.min_value =
+                        
vectorized::Field::create_field<TYPE_DOUBLE>(double_neg_inf);
             } else {
                 return Status::InternalError("invalid zone map with negative 
Infinity");
             }
         } else {
-            RETURN_IF_ERROR(min_value_container->from_string(zone_map.min()));
+            vectorized::DataTypeSerDe::FormatOptions opt;
+            
RETURN_IF_ERROR(_data_type->get_serde()->from_string(zone_map.min(),
+                                                                 
zone_map_info.min_value, opt));
         }
 
         if (zone_map.has_nan()) {
             if (FieldType::OLAP_FIELD_TYPE_FLOAT == _meta_type) {
                 static auto constexpr float_nan = 
std::numeric_limits<float>::quiet_NaN();
-                max_value_container->set_raw_value(&float_nan, 
sizeof(float_nan));
+                zone_map_info.max_value = 
vectorized::Field::create_field<TYPE_FLOAT>(float_nan);
             } else if (FieldType::OLAP_FIELD_TYPE_DOUBLE == _meta_type) {
                 static auto constexpr double_nan = 
std::numeric_limits<double>::quiet_NaN();
-                max_value_container->set_raw_value(&double_nan, 
sizeof(double_nan));
+                zone_map_info.max_value = 
vectorized::Field::create_field<TYPE_DOUBLE>(double_nan);
             } else {
                 return Status::InternalError("invalid zone map with NaN");
             }
         } else if (zone_map.has_positive_inf()) {
             if (FieldType::OLAP_FIELD_TYPE_FLOAT == _meta_type) {
                 static auto constexpr float_pos_inf = 
std::numeric_limits<float>::infinity();
-                max_value_container->set_raw_value(&float_pos_inf, 
sizeof(float_pos_inf));
+                zone_map_info.max_value =
+                        
vectorized::Field::create_field<TYPE_FLOAT>(float_pos_inf);
             } else if (FieldType::OLAP_FIELD_TYPE_DOUBLE == _meta_type) {
                 static auto constexpr double_pos_inf = 
std::numeric_limits<double>::infinity();
-                max_value_container->set_raw_value(&double_pos_inf, 
sizeof(double_pos_inf));
+                zone_map_info.max_value =
+                        
vectorized::Field::create_field<TYPE_DOUBLE>(double_pos_inf);
             } else {
                 return Status::InternalError("invalid zone map with positive 
Infinity");
             }
         } else {
-            RETURN_IF_ERROR(max_value_container->from_string(zone_map.max()));
+            vectorized::DataTypeSerDe::FormatOptions opt;
+            
RETURN_IF_ERROR(_data_type->get_serde()->from_string(zone_map.max(),
+                                                                 
zone_map_info.max_value, opt));
         }
+    } else {
+        zone_map_info.is_all_null = true;
     }
     // for compatible original Cond eval logic
     if (zone_map.has_null()) {
         // for compatible, if exist null, original logic treat null as min
-        min_value_container->set_null();
+        zone_map_info.has_null = true;
         if (!zone_map.has_not_null()) {
             // for compatible OlapCond's 'is not null'
-            max_value_container->set_null();
+            zone_map_info.has_null = true;

Review Comment:
   这里应该设置 is all null  = true ?



##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -450,46 +446,34 @@ Status ColumnReader::next_batch_of_zone_map(size_t* n, 
vectorized::MutableColumn
         return Status::InternalError("segment zonemap not exist");
     }
     // TODO: this work to get min/max value seems should only do once
-    FieldType type = _type_info->type();
-    std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type, 
_meta_length));
-    std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type, 
_meta_length));
-    RETURN_IF_ERROR(
-            _parse_zone_map_skip_null(*_segment_zone_map, min_value.get(), 
max_value.get()));
+    ZoneMapInfo zone_map_info;
+    RETURN_IF_ERROR(_parse_zone_map_skip_null(*_segment_zone_map, 
zone_map_info));
 
     dst->reserve(*n);
-    bool is_string = is_olap_string_type(type);
-    if (max_value->is_null()) {
-        assert_cast<vectorized::ColumnNullable&>(*dst).insert_default();
-    } else {
-        if (is_string) {
-            auto sv = (StringRef*)max_value->cell_ptr();
-            if (type == FieldType::OLAP_FIELD_TYPE_CHAR) {
-                *sv = sv->trim_tail_padding_zero();
+    if (zone_map_info.is_all_null) {
+        
assert_cast<vectorized::ColumnNullable&>(*dst).insert_many_defaults(*n);
+        return Status::OK();
+    }
+    FieldType type = _type_info->type();
+    if (type == FieldType::OLAP_FIELD_TYPE_CHAR) {
+        auto s = zone_map_info.max_value.template get<TYPE_CHAR>();
+        while (!s.empty() && s.back() == '\0') {
+            s.pop_back();

Review Comment:
   我们这里为什么要把char 类型的padding 去掉?



##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -535,86 +513,94 @@ Status ColumnReader::prune_predicates_by_zone_map(
     return Status::OK();
 }
 
-Status ColumnReader::_parse_zone_map(const ZoneMapPB& zone_map, WrapperField* 
min_value_container,
-                                     WrapperField* max_value_container) const {
+Status ColumnReader::_parse_zone_map(const ZoneMapPB& zone_map, ZoneMapInfo& 
zone_map_info) const {
     // min value and max value are valid if has_not_null is true
     if (zone_map.has_not_null()) {
-        min_value_container->set_not_null();
-        max_value_container->set_not_null();
+        zone_map_info.has_null = false;
 
         if (zone_map.has_negative_inf()) {
             if (FieldType::OLAP_FIELD_TYPE_FLOAT == _meta_type) {
                 static auto constexpr float_neg_inf = 
-std::numeric_limits<float>::infinity();
-                min_value_container->set_raw_value(&float_neg_inf, 
sizeof(float_neg_inf));
+                zone_map_info.min_value =
+                        
vectorized::Field::create_field<TYPE_FLOAT>(float_neg_inf);
             } else if (FieldType::OLAP_FIELD_TYPE_DOUBLE == _meta_type) {
                 static auto constexpr double_neg_inf = 
-std::numeric_limits<double>::infinity();
-                min_value_container->set_raw_value(&double_neg_inf, 
sizeof(double_neg_inf));
+                zone_map_info.min_value =
+                        
vectorized::Field::create_field<TYPE_DOUBLE>(double_neg_inf);
             } else {
                 return Status::InternalError("invalid zone map with negative 
Infinity");
             }
         } else {
-            RETURN_IF_ERROR(min_value_container->from_string(zone_map.min()));
+            vectorized::DataTypeSerDe::FormatOptions opt;
+            
RETURN_IF_ERROR(_data_type->get_serde()->from_string(zone_map.min(),
+                                                                 
zone_map_info.min_value, opt));
         }
 
         if (zone_map.has_nan()) {
             if (FieldType::OLAP_FIELD_TYPE_FLOAT == _meta_type) {
                 static auto constexpr float_nan = 
std::numeric_limits<float>::quiet_NaN();
-                max_value_container->set_raw_value(&float_nan, 
sizeof(float_nan));
+                zone_map_info.max_value = 
vectorized::Field::create_field<TYPE_FLOAT>(float_nan);
             } else if (FieldType::OLAP_FIELD_TYPE_DOUBLE == _meta_type) {
                 static auto constexpr double_nan = 
std::numeric_limits<double>::quiet_NaN();
-                max_value_container->set_raw_value(&double_nan, 
sizeof(double_nan));
+                zone_map_info.max_value = 
vectorized::Field::create_field<TYPE_DOUBLE>(double_nan);
             } else {
                 return Status::InternalError("invalid zone map with NaN");
             }
         } else if (zone_map.has_positive_inf()) {
             if (FieldType::OLAP_FIELD_TYPE_FLOAT == _meta_type) {
                 static auto constexpr float_pos_inf = 
std::numeric_limits<float>::infinity();
-                max_value_container->set_raw_value(&float_pos_inf, 
sizeof(float_pos_inf));
+                zone_map_info.max_value =
+                        
vectorized::Field::create_field<TYPE_FLOAT>(float_pos_inf);
             } else if (FieldType::OLAP_FIELD_TYPE_DOUBLE == _meta_type) {
                 static auto constexpr double_pos_inf = 
std::numeric_limits<double>::infinity();
-                max_value_container->set_raw_value(&double_pos_inf, 
sizeof(double_pos_inf));
+                zone_map_info.max_value =
+                        
vectorized::Field::create_field<TYPE_DOUBLE>(double_pos_inf);
             } else {
                 return Status::InternalError("invalid zone map with positive 
Infinity");
             }
         } else {
-            RETURN_IF_ERROR(max_value_container->from_string(zone_map.max()));
+            vectorized::DataTypeSerDe::FormatOptions opt;
+            
RETURN_IF_ERROR(_data_type->get_serde()->from_string(zone_map.max(),
+                                                                 
zone_map_info.max_value, opt));
         }
+    } else {
+        zone_map_info.is_all_null = true;
     }
     // for compatible original Cond eval logic
     if (zone_map.has_null()) {
         // for compatible, if exist null, original logic treat null as min
-        min_value_container->set_null();
+        zone_map_info.has_null = true;
         if (!zone_map.has_not_null()) {
             // for compatible OlapCond's 'is not null'
-            max_value_container->set_null();
+            zone_map_info.has_null = true;

Review Comment:
   再就是,这个分支我觉得没用啊。。。



##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -623,25 +609,21 @@ Status ColumnReader::_get_filtered_pages(
         std::vector<uint32_t>* page_indexes, const ColumnIteratorOptions& 
iter_opts) {
     RETURN_IF_ERROR(_load_zone_map_index(_use_index_page_cache, 
_opts.kept_in_memory, iter_opts));
 
-    FieldType type = _type_info->type();
     const std::vector<ZoneMapPB>& zone_maps = 
_zone_map_index->page_zone_maps();
     size_t page_size = _zone_map_index->num_pages();
-    std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type, 
_meta_length));
-    std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type, 
_meta_length));
+    ZoneMapInfo zone_map_info;
     for (size_t i = 0; i < page_size; ++i) {
         if (zone_maps[i].pass_all()) {
             page_indexes->push_back(cast_set<uint32_t>(i));
         } else {
-            RETURN_IF_ERROR(_parse_zone_map(zone_maps[i], min_value.get(), 
max_value.get()));
-            if (_zone_map_match_condition(zone_maps[i], min_value.get(), 
max_value.get(),
-                                          col_predicates)) {
+            RETURN_IF_ERROR(_parse_zone_map(zone_maps[i], zone_map_info));

Review Comment:
   把614 行移动到616,防止重复使用,结果一些变量没有reset



##########
be/src/olap/accept_null_predicate.h:
##########
@@ -106,12 +106,12 @@ class AcceptNullPredicate : public ColumnPredicate {
         DCHECK(false) << "should not reach here";
     }
 
-    bool evaluate_and(const std::pair<WrapperField*, WrapperField*>& 
statistic) const override {
+    bool evaluate_and(const ZoneMapInfo& zone_map_info) const override {
         // there is null in range, accept it
-        if (statistic.first->is_null() || statistic.second->is_null()) {
+        if (zone_map_info.has_null) {

Review Comment:
   这里我们不需要管理nested opposite 吗?
   



##########
be/src/vec/data_types/serde/data_type_datev2_serde.cpp:
##########
@@ -229,6 +229,25 @@ Status DataTypeDateV2SerDe::from_string_batch(const 
ColumnString& col_str, Colum
     return Status::OK();
 }
 
+Status DataTypeDateV2SerDe::from_string(const std::string& str, Field& field,

Review Comment:
   这里的写法跟    static Status from_string(void* buf, const std::string& scan_key, 
const int precision,
                                 const int scale) {
           tm time_tm;
           char* res = strptime(scan_key.c_str(), "%Y-%m-%d %H:%M:%S", 
&time_tm);
   
           if (nullptr != res) {
               CppType value = ((time_tm.tm_year + 1900) * 10000L + 
(time_tm.tm_mon + 1) * 100L +
                                time_tm.tm_mday) *
                                       1000000L +
                               time_tm.tm_hour * 10000L + time_tm.tm_min * 100L 
+ time_tm.tm_sec;
               *reinterpret_cast<CppType*>(buf) = value;
           } else {
               // 1400 - 01 - 01
               *reinterpret_cast<CppType*>(buf) = 14000101000000L;
           }
   
           return Status::OK();
       }
       static std::string to_string(const void* src) {
           CppType tmp = *reinterpret_cast<const CppType*>(src);
           CppType part1 = (tmp / 1000000L);
           CppType part2 = (tmp - part1 * 1000000L);
   
           int year = (part1 / 10000L) % 10000;
           int mon = (part1 / 100) % 100;
           int mday = part1 % 100;
   
           int hour = (part2 / 10000L) % 10000;
           int min = (part2 / 100) % 100;
           int sec = part2 % 100;
   
           return fmt::format(FMT_COMPILE("{:04d}-{:02d}-{:02d} 
{:02d}:{:02d}:{:02d}"), year, mon,
                              mday, hour, min, sec);
       }
    这个是一样的吗?



##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -535,86 +513,94 @@ Status ColumnReader::prune_predicates_by_zone_map(
     return Status::OK();
 }
 
-Status ColumnReader::_parse_zone_map(const ZoneMapPB& zone_map, WrapperField* 
min_value_container,
-                                     WrapperField* max_value_container) const {
+Status ColumnReader::_parse_zone_map(const ZoneMapPB& zone_map, ZoneMapInfo& 
zone_map_info) const {
     // min value and max value are valid if has_not_null is true
     if (zone_map.has_not_null()) {
-        min_value_container->set_not_null();
-        max_value_container->set_not_null();
+        zone_map_info.has_null = false;
 
         if (zone_map.has_negative_inf()) {
             if (FieldType::OLAP_FIELD_TYPE_FLOAT == _meta_type) {
                 static auto constexpr float_neg_inf = 
-std::numeric_limits<float>::infinity();
-                min_value_container->set_raw_value(&float_neg_inf, 
sizeof(float_neg_inf));
+                zone_map_info.min_value =
+                        
vectorized::Field::create_field<TYPE_FLOAT>(float_neg_inf);
             } else if (FieldType::OLAP_FIELD_TYPE_DOUBLE == _meta_type) {
                 static auto constexpr double_neg_inf = 
-std::numeric_limits<double>::infinity();
-                min_value_container->set_raw_value(&double_neg_inf, 
sizeof(double_neg_inf));
+                zone_map_info.min_value =
+                        
vectorized::Field::create_field<TYPE_DOUBLE>(double_neg_inf);
             } else {
                 return Status::InternalError("invalid zone map with negative 
Infinity");
             }
         } else {
-            RETURN_IF_ERROR(min_value_container->from_string(zone_map.min()));
+            vectorized::DataTypeSerDe::FormatOptions opt;
+            
RETURN_IF_ERROR(_data_type->get_serde()->from_string(zone_map.min(),
+                                                                 
zone_map_info.min_value, opt));
         }
 
         if (zone_map.has_nan()) {
             if (FieldType::OLAP_FIELD_TYPE_FLOAT == _meta_type) {
                 static auto constexpr float_nan = 
std::numeric_limits<float>::quiet_NaN();
-                max_value_container->set_raw_value(&float_nan, 
sizeof(float_nan));
+                zone_map_info.max_value = 
vectorized::Field::create_field<TYPE_FLOAT>(float_nan);
             } else if (FieldType::OLAP_FIELD_TYPE_DOUBLE == _meta_type) {
                 static auto constexpr double_nan = 
std::numeric_limits<double>::quiet_NaN();
-                max_value_container->set_raw_value(&double_nan, 
sizeof(double_nan));
+                zone_map_info.max_value = 
vectorized::Field::create_field<TYPE_DOUBLE>(double_nan);
             } else {
                 return Status::InternalError("invalid zone map with NaN");
             }
         } else if (zone_map.has_positive_inf()) {
             if (FieldType::OLAP_FIELD_TYPE_FLOAT == _meta_type) {
                 static auto constexpr float_pos_inf = 
std::numeric_limits<float>::infinity();
-                max_value_container->set_raw_value(&float_pos_inf, 
sizeof(float_pos_inf));
+                zone_map_info.max_value =
+                        
vectorized::Field::create_field<TYPE_FLOAT>(float_pos_inf);
             } else if (FieldType::OLAP_FIELD_TYPE_DOUBLE == _meta_type) {
                 static auto constexpr double_pos_inf = 
std::numeric_limits<double>::infinity();
-                max_value_container->set_raw_value(&double_pos_inf, 
sizeof(double_pos_inf));
+                zone_map_info.max_value =
+                        
vectorized::Field::create_field<TYPE_DOUBLE>(double_pos_inf);
             } else {
                 return Status::InternalError("invalid zone map with positive 
Infinity");
             }
         } else {
-            RETURN_IF_ERROR(max_value_container->from_string(zone_map.max()));
+            vectorized::DataTypeSerDe::FormatOptions opt;
+            
RETURN_IF_ERROR(_data_type->get_serde()->from_string(zone_map.max(),
+                                                                 
zone_map_info.max_value, opt));
         }
+    } else {
+        zone_map_info.is_all_null = true;
     }
     // for compatible original Cond eval logic
     if (zone_map.has_null()) {
         // for compatible, if exist null, original logic treat null as min
-        min_value_container->set_null();
+        zone_map_info.has_null = true;
         if (!zone_map.has_not_null()) {
             // for compatible OlapCond's 'is not null'
-            max_value_container->set_null();
+            zone_map_info.has_null = true;
         }
     }
     return Status::OK();
 }
 
 Status ColumnReader::_parse_zone_map_skip_null(const ZoneMapPB& zone_map,

Review Comment:
   如果我们zone_map_info 能表达出来all null,has null,not null 之类的,这个函数感觉没用了啊



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to