Tanya-W commented on code in PR #19936:
URL: https://github.com/apache/doris/pull/19936#discussion_r1209063841


##########
be/src/vec/functions/match.cpp:
##########
@@ -15,134 +15,167 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <stddef.h>
-
-#include <algorithm>
-#include <boost/iterator/iterator_facade.hpp>
-#include <memory>
-#include <ostream>
-#include <string>
-#include <utility>
-
-#include "common/config.h"
-#include "common/consts.h"
-#include "common/logging.h"
-#include "common/status.h"
-#include "vec/aggregate_functions/aggregate_function.h"
-#include "vec/columns/column.h"
-#include "vec/core/block.h"
-#include "vec/core/column_numbers.h"
-#include "vec/core/column_with_type_and_name.h"
-#include "vec/core/types.h"
-#include "vec/data_types/data_type_number.h"
-#include "vec/functions/function.h"
-#include "vec/functions/simple_function_factory.h"
-
-namespace doris {
-class FunctionContext;
-} // namespace doris
+#include "vec/functions/match.h"
 
-namespace doris::vectorized {
-
-class FunctionMatchBase : public IFunction {
-public:
-    size_t get_number_of_arguments() const override { return 2; }
-
-    String get_name() const override { return "match"; }
+#include "olap/rowset/segment_v2/inverted_index_reader.h"
+#include "runtime/query_context.h"
+#include "runtime/runtime_state.h"
 
-    /// Get result types by argument types. If the function does not apply to 
these arguments, throw an exception.
-    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
-        return std::make_shared<DataTypeUInt8>();
-    }
+namespace doris::vectorized {
 
-    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
-                        size_t result, size_t input_rows_count) override {
-        auto match_query_str = 
block.get_by_position(arguments[1]).to_string(0);
-        std::string column_name = block.get_by_position(arguments[0]).name;
-        auto match_pred_column_name =
-                BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_match_" + 
match_query_str;
-        if (!block.has(match_pred_column_name)) {
-            if (!config::enable_index_apply_preds_except_leafnode_of_andnode) {
-                return Status::Cancelled(
-                        "please check whether turn on the configuration "
-                        
"'enable_index_apply_preds_except_leafnode_of_andnode'");
-            }
-            LOG(WARNING) << "execute match query meet error, block no column: "
-                         << match_pred_column_name;
-            return Status::InternalError(
-                    "match query meet error, no match predicate evaluate 
result column in block.");
+Status FunctionMatchBase::execute_impl(FunctionContext* context, Block& block, 
const ColumnNumbers& arguments,
+                    size_t result, size_t input_rows_count) {
+    auto match_query_str = block.get_by_position(arguments[1]).to_string(0);
+    std::string column_name = block.get_by_position(arguments[0]).name;
+    auto match_pred_column_name =
+            BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_match_" + 
match_query_str;
+    if (!block.has(match_pred_column_name)) {
+        LOG(INFO) << "begin to execute match directly, column_name=" << 
column_name
+                << ", match_query_str=" << match_query_str;
+        InvertedIndexCtx* inverted_index_ctx = 
reinterpret_cast<InvertedIndexCtx*>(
+                context->get_function_state(FunctionContext::THREAD_LOCAL));
+
+        const auto values_col =
+            
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+        const auto* values = 
check_and_get_column<ColumnString>(values_col.get());
+        if (!values) {
+            return Status::InternalError("Not supported input arguments 
types");
         }
+        // result column
+        auto res = ColumnUInt8::create();
+        ColumnUInt8::Container& vec_res = res->get_data();
+        // set default value to 0, and match functions only need to set 1/true
+        vec_res.resize_fill(input_rows_count);
+        RETURN_IF_ERROR(execute_match(column_name, match_query_str,
+                                input_rows_count, values, inverted_index_ctx, 
vec_res));
+        block.replace_by_position(result, std::move(res));
+    } else {
         auto match_pred_column =
                 
block.get_by_name(match_pred_column_name).column->convert_to_full_column_if_const();
-
         block.replace_by_position(result, std::move(match_pred_column));
-        return Status::OK();
     }
-};
-
-class FunctionMatchAny : public FunctionMatchBase {
-public:
-    static constexpr auto name = "match_any";
-    static FunctionPtr create() { return std::make_shared<FunctionMatchAny>(); 
}
-
-    String get_name() const override { return name; }
-};
 
-class FunctionMatchAll : public FunctionMatchBase {
-public:
-    static constexpr auto name = "match_all";
-    static FunctionPtr create() { return std::make_shared<FunctionMatchAll>(); 
}
-
-    String get_name() const override { return name; }
-};
-
-class FunctionMatchPhrase : public FunctionMatchBase {
-public:
-    static constexpr auto name = "match_phrase";
-    static FunctionPtr create() { return 
std::make_shared<FunctionMatchPhrase>(); }
-
-    String get_name() const override { return name; }
-};
-
-class FunctionMatchElementEQ : public FunctionMatchBase {
-public:
-    static constexpr auto name = "match_element_eq";
-    static FunctionPtr create() { return 
std::make_shared<FunctionMatchPhrase>(); }
+    return Status::OK();
+}
 
-    String get_name() const override { return name; }
-};
+Status FunctionMatchAny::execute_match(const std::string& column_name,
+                        const std::string& match_query_str,
+                        size_t input_rows_count,
+                        const ColumnString* query_values,
+                        InvertedIndexCtx* inverted_index_ctx,
+                        ColumnUInt8::Container& result) {
+    doris::InvertedIndexParserType parser_type = 
doris::InvertedIndexParserType::PARSER_UNKNOWN;
+    if (inverted_index_ctx) {
+        parser_type = 
get_inverted_index_parser_type_from_string(inverted_index_ctx->_parser_type);
+    }
+    LOG(INFO) << "begin to run FunctionMatchAny::execute_match, parser_type: "
+            << inverted_index_parser_type_to_string(parser_type);
+    std::vector<std::string> tokens =
+                doris::segment_v2::InvertedIndexReader::get_analyse_result(
+                column_name, match_query_str, 
doris::segment_v2::InvertedIndexQueryType::MATCH_ANY_QUERY, parser_type);
+    for (int i = 0; i < input_rows_count; i++) {
+        const auto& str_ref = query_values->get_data_at(i);
+        std::vector<std::string> values =
+                doris::segment_v2::InvertedIndexReader::get_analyse_result(
+                column_name, str_ref.to_string(), 
doris::segment_v2::InvertedIndexQueryType::MATCH_ANY_QUERY, parser_type);
+        // TODO: more efficient impl
+        for (auto& token : tokens) {
+            auto it = std::find(values.begin(), values.end(), token);
+            if (it != values.end()) {
+                result[i] = true;
+                break;
+            }
+        }
+    }
 
-class FunctionMatchElementLT : public FunctionMatchBase {
-public:
-    static constexpr auto name = "match_element_lt";
-    static FunctionPtr create() { return 
std::make_shared<FunctionMatchPhrase>(); }
+    return Status::OK();
+}
 
-    String get_name() const override { return name; }
-};
+Status FunctionMatchAll::execute_match(const std::string& column_name,
+                        const std::string& match_query_str,
+                        size_t input_rows_count,
+                        const ColumnString* query_values,
+                        InvertedIndexCtx* inverted_index_ctx,
+                        ColumnUInt8::Container& result) {
+    doris::InvertedIndexParserType parser_type = 
doris::InvertedIndexParserType::PARSER_UNKNOWN;
+    if (inverted_index_ctx) {
+        parser_type = 
get_inverted_index_parser_type_from_string(inverted_index_ctx->_parser_type);
+    }
+    LOG(INFO) << "begin to run FunctionMatchAll::execute_match, parser_type: "
+            << inverted_index_parser_type_to_string(parser_type);
+    std::vector<std::string> tokens =
+                doris::segment_v2::InvertedIndexReader::get_analyse_result(
+                column_name, match_query_str, 
doris::segment_v2::InvertedIndexQueryType::MATCH_ALL_QUERY, parser_type);
+
+    for (int i = 0; i < input_rows_count; i++) {
+        const auto& str_ref = query_values->get_data_at(i);
+        std::vector<std::string> values =
+                doris::segment_v2::InvertedIndexReader::get_analyse_result(
+                column_name, str_ref.to_string(), 
doris::segment_v2::InvertedIndexQueryType::MATCH_ALL_QUERY, parser_type);
+        // TODO: more efficient impl
+        auto find_count = 0;
+        for (auto& token : tokens) {
+            auto it = std::find(values.begin(), values.end(), token);
+            if (it != values.end()) {
+                ++find_count;
+            }

Review Comment:
   updated



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to