github-actions[bot] commented on code in PR #32404:
URL: https://github.com/apache/doris/pull/32404#discussion_r1528221916


##########
be/src/olap/delete_handler.cpp:
##########
@@ -195,43 +203,48 @@ Status DeleteHandler::check_condition_valid(const 
TabletSchema& schema, const TC
     return Status::OK();
 }
 
-bool DeleteHandler::_parse_condition(const std::string& condition_str, 
TCondition* condition) {
-    bool matched = true;
+// clang-format off
+// Condition string format, the format is (column_name)(op)(value)
+// eg: condition_str="c1 = 1597751948193618247 and length(source)<1;\n;\n"
+// column_name: matches "c1", must include FeNameFormat.java COLUMN_NAME_REGEX
+//              and compactible with any the lagacy
+// operator: matches "="
+// value: matches "1597751948193618247  and length(source)<1;\n;\n"
+//
+// For more info, see DeleteHandler::construct_sub_predicates
+// FIXME(gavin): support unicode. And this is a tricky implementation, it 
should
+//               not be the final resolution, refactor it.
+const char* const CONDITION_STR_PATTERN =
+    // .----------------- column-name --------------------.   
.----------------------- operator ------------------------.   .------------ 
value ----------.
+    
R"(([_a-zA-Z@0-9\s<>/][.a-zA-Z0-9_+-/><?@#$%^&*"\s,:]*)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:
 IS ))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))";
+    // '----------------- group 1 ------------------------'   
'--------------------- group 2 ---------------------------'   | '-- group 4--'  
            |
+    //                                                             match any 
of: = != >> << >= <= *= " IS "                 '----------- group 3 ---------'
+    //                                                                         
                                              match **ANY THING** without(4)
+    //                                                                         
                                              or with(3) single quote
+boost::regex DELETE_HANDLER_REGEX(CONDITION_STR_PATTERN);
+// clang-format on
+
+bool DeleteHandler::parse_delete_condition(const std::string& condition_str,
+                                           TCondition* condition) {
+    bool matched = false;
     boost::smatch what;
-
     try {
-        // Condition string format, the format is (column_name)(op)(value)
-        // eg:  condition_str="c1 = 1597751948193618247  and 
length(source)<1;\n;\n"
-        //  group1:  (\w+) matches "c1"
-        //  group2:  ((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:IS)) 
matches  "="
-        //  group3:  ((?:[\s\S]+)?) matches "1597751948193618247  and 
length(source)<1;\n;\n"
-        const char* const CONDITION_STR_PATTERN =
-                
R"(([\w$#%]+)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:IS))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))";
-        boost::regex ex(CONDITION_STR_PATTERN);
-        if (boost::regex_match(condition_str, what, ex)) {
-            if (condition_str.size() != what[0].str().size()) {
-                matched = false;
-            }
-        } else {
-            matched = false;
-        }
+        VLOG_NOTICE << "condition_str: " << condition_str;
+        matched = boost::regex_match(condition_str, what, 
DELETE_HANDLER_REGEX) &&
+                  condition_str.size() == what[0].str().size(); // exact match
     } catch (boost::regex_error& e) {
         VLOG_NOTICE << "fail to parse expr. [expr=" << condition_str << "; 
error=" << e.what()
                     << "]";
-        matched = false;
     }
+    if (!matched) return false;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
       if (!matched) { return false;
   }
   ```
   



##########
be/test/olap/delete_handler_test.cpp:
##########
@@ -1232,4 +1232,39 @@ TEST_F(TestDeleteHandler, FilterDataVersion) {
     _delete_handler.finalize();
 }
 
+// clang-format off
+TEST_F(TestDeleteHandler, test_parse_delete_condition) {
+    auto test = [](const std::tuple<std::string, bool, TCondition>& in) {
+        auto& [cond_str, exp_succ, exp_cond] = in;
+        TCondition parsed_cond;
+        EXPECT_EQ(DeleteHandler::parse_delete_condition(cond_str, 
&parsed_cond), exp_succ) << " unexpected result, cond_str: " << cond_str;
+        if (exp_succ) EXPECT_EQ(parsed_cond, exp_cond) << " unexpected result, 
cond_str: " << cond_str;
+    };
+
+    auto gen_cond = [](const std::string& col, const std::string& op, const 
std::string& val) {
+        TCondition cond;
+        cond.__set_column_name(col);
+        cond.__set_condition_op(op);
+        cond.__set_condition_values(std::vector<std::string>{val});
+        return cond;
+    };
+
+    // <cond_str, parsed, expect_value>>
+    std::vector<std::tuple<std::string, bool, TCondition>> test_input {
+        {R"(abc=b)"             , true,  gen_cond(R"(abc)"   , "=" , R"(b)"    
     )}, // normal case
+        {R"(abc=)"              , true, gen_cond(R"(abc)"    , "=" , R"()"     
     )}, // missing value, it means not to be parsed succefully, how every it's 
a ignorable bug
+        {R"(abc!='b')"          , true,  gen_cond(R"(abc)"   , "!=", R"(b)"    
     )}, // value surrounded by '
+        {R"(@a*<<10086)"        , true,  gen_cond(R"(@a*)"   , "<<", 
R"(10086)"     )}, // column ends with *
+        {R"(*a=b)"              , false, gen_cond(R"(*a)"    , "=" , R"(b)"    
     )}, // starts with *
+        {R"(a*a>>WTF(10086))"   , true,  gen_cond(R"(a*a)"   , ">>", 
R"(WTF(10086))")}, // function
+        {R"(a-b IS NULL)"       , true,  gen_cond(R"(a-b)"   , "IS", R"(NULL)" 
     )}, // - in col name and test IS NULL

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
   << cond_str; {
   }
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to