compasses commented on code in PR #11631:
URL: https://github.com/apache/doris/pull/11631#discussion_r941925698


##########
be/src/olap/like_column_predicate.h:
##########
@@ -45,33 +47,115 @@ class LikeColumnPredicate : public ColumnPredicate {
         return Status::OK();
     }
 
+    uint16_t evaluate(const vectorized::IColumn& column, uint16_t* sel,
+                      uint16_t size) const override;
+
+    void evaluate_and_vec(const vectorized::IColumn& column, uint16_t size,
+                          bool* flags) const override;
+
 private:
     template <bool is_nullable>
     void _base_evaluate(const ColumnBlock* block, uint16_t* sel, uint16_t* 
size) const {
         uint16_t new_size = 0;
-        for (uint16_t i = 0; i < *size; ++i) {
-            uint16_t idx = sel[i];
-            sel[new_size] = idx;
-            const StringValue* cell_value =
-                    reinterpret_cast<const 
StringValue*>(block->cell(idx).cell_ptr());
-            doris_udf::StringVal target;
-            cell_value->to_string_val(&target);
-            if constexpr (is_nullable) {
-                new_size += _opposite ^ (!block->cell(idx).is_null() &&
-                                         (_state->function)(_fn_ctx, target, 
pattern).val);
-            } else {
-                new_size += _opposite ^ (_state->function)(_fn_ctx, target, 
pattern).val;
+        if constexpr (!is_vectorized) {
+            for (uint16_t i = 0; i < *size; ++i) {
+                uint16_t idx = sel[i];
+                sel[new_size] = idx;
+                const StringValue* cell_value =
+                        reinterpret_cast<const 
StringValue*>(block->cell(idx).cell_ptr());
+                doris_udf::StringVal target;
+                cell_value->to_string_val(&target);
+                if constexpr (is_nullable) {
+                    new_size += _opposite ^ (!block->cell(idx).is_null() &&
+                                             (_state->function)(_fn_ctx, 
target, pattern).val);
+                } else {
+                    new_size += _opposite ^ (_state->function)(_fn_ctx, 
target, pattern).val;
+                }
             }
         }
         *size = new_size;
     }
 
+    template <bool is_and>
+    void _evaluate_vec(const vectorized::IColumn& column, uint16_t size, bool* 
flags) const {
+        if constexpr (is_vectorized) {
+            if (column.is_nullable()) {
+                auto* nullable_col =
+                        
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
+                auto& null_map_data = 
nullable_col->get_null_map_column().get_data();
+                auto& nested_col = nullable_col->get_nested_column();
+                if (nested_col.is_column_dictionary()) {
+                    auto* nested_col_ptr = vectorized::check_and_get_column<
+                            
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col);
+                    auto& data_array = nested_col_ptr->get_data();
+                    for (uint16_t i = 0; i < size; i++) {
+                        if (null_map_data[i]) {
+                            if constexpr (is_and) {
+                                flags[i] &= _opposite;
+                            } else {
+                                flags[i] = _opposite;
+                            }
+                            continue;
+                        }
+
+                        StringValue cell_value = 
nested_col_ptr->get_value(data_array[i]);
+                        if constexpr (is_and) {
+                            unsigned char flag = 0;
+                            (_state->function)(
+                                    
const_cast<vectorized::LikeSearchState*>(&_like_state),
+                                    cell_value, pattern, &flag);
+                            flags[i] &= _opposite ^ flag;
+                        } else {
+                            unsigned char flag = 0;
+                            (_state->function)(
+                                    
const_cast<vectorized::LikeSearchState*>(&_like_state),
+                                    cell_value, pattern, &flag);
+                            flags[i] = _opposite ^ flag;
+                        }
+                    }
+                } else {
+                    LOG(FATAL) << "vectorized (not) like predicates should be 
dict column";
+                }
+            } else {
+                if (column.is_column_dictionary()) {
+                    auto* nested_col_ptr = vectorized::check_and_get_column<
+                            
vectorized::ColumnDictionary<vectorized::Int32>>(column);
+                    auto& data_array = nested_col_ptr->get_data();
+                    for (uint16_t i = 0; i < size; i++) {
+                        StringValue cell_value = 
nested_col_ptr->get_value(data_array[i]);
+                        if constexpr (is_and) {
+                            unsigned char flag = 0;
+                            (_state->function)(
+                                    
const_cast<vectorized::LikeSearchState*>(&_like_state),
+                                    cell_value, pattern, &flag);
+                            flags[i] &= _opposite ^ flag;
+                        } else {
+                            unsigned char flag = 0;
+                            (_state->function)(
+                                    
const_cast<vectorized::LikeSearchState*>(&_like_state),
+                                    cell_value, pattern, &flag);
+                            flags[i] = _opposite ^ flag;
+                        }
+                    }
+                } else {
+                    LOG(FATAL) << "vectorized (not) like predicates should be 
dict column";
+                }
+            }
+        }
+    }
+
     std::string _origin;
     // life time controlled by scan node
     doris_udf::FunctionContext* _fn_ctx;
-    doris_udf::StringVal pattern;
+    using PatternType = std::conditional_t<is_vectorized, StringValue, 
StringVal>;

Review Comment:
   Confusing for the type StringValue and StringVal, especially for the 
starter.  It's better to keep one of them.
   



##########
be/src/olap/like_column_predicate.cpp:
##########
@@ -23,74 +23,131 @@
 
 namespace doris {
 
-LikeColumnPredicate::LikeColumnPredicate(bool opposite, uint32_t column_id,
-                                         doris_udf::FunctionContext* fn_ctx,
-                                         doris_udf::StringVal val)
+template <>
+LikeColumnPredicate<true>::LikeColumnPredicate(bool opposite, uint32_t 
column_id,
+                                               doris_udf::FunctionContext* 
fn_ctx,
+                                               doris_udf::StringVal val)
+        : ColumnPredicate(column_id, opposite),
+          _fn_ctx(fn_ctx),
+          pattern(reinterpret_cast<char*>(val.ptr), val.len) {
+    _state = reinterpret_cast<StateType*>(
+            
_fn_ctx->get_function_state(doris_udf::FunctionContext::THREAD_LOCAL));
+    _state->search_state.clone(_like_state);

Review Comment:
   So here clone the state because of the Hyperscan? Not sure the overhead of 
clone is high or not.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to