github-actions[bot] commented on code in PR #32404: URL: https://github.com/apache/doris/pull/32404#discussion_r1528221916
########## be/src/olap/delete_handler.cpp: ########## @@ -195,43 +203,48 @@ Status DeleteHandler::check_condition_valid(const TabletSchema& schema, const TC return Status::OK(); } -bool DeleteHandler::_parse_condition(const std::string& condition_str, TCondition* condition) { - bool matched = true; +// clang-format off +// Condition string format, the format is (column_name)(op)(value) +// eg: condition_str="c1 = 1597751948193618247 and length(source)<1;\n;\n" +// column_name: matches "c1", must include FeNameFormat.java COLUMN_NAME_REGEX +// and compactible with any the lagacy +// operator: matches "=" +// value: matches "1597751948193618247 and length(source)<1;\n;\n" +// +// For more info, see DeleteHandler::construct_sub_predicates +// FIXME(gavin): support unicode. And this is a tricky implementation, it should +// not be the final resolution, refactor it. +const char* const CONDITION_STR_PATTERN = + // .----------------- column-name --------------------. .----------------------- operator ------------------------. .------------ value ----------. + R"(([_a-zA-Z@0-9\s<>/][.a-zA-Z0-9_+-/><?@#$%^&*"\s,:]*)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?: IS ))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))"; + // '----------------- group 1 ------------------------' '--------------------- group 2 ---------------------------' | '-- group 4--' | + // match any of: = != >> << >= <= *= " IS " '----------- group 3 ---------' + // match **ANY THING** without(4) + // or with(3) single quote +boost::regex DELETE_HANDLER_REGEX(CONDITION_STR_PATTERN); +// clang-format on + +bool DeleteHandler::parse_delete_condition(const std::string& condition_str, + TCondition* condition) { + bool matched = false; boost::smatch what; - try { - // Condition string format, the format is (column_name)(op)(value) - // eg: condition_str="c1 = 1597751948193618247 and length(source)<1;\n;\n" - // group1: (\w+) matches "c1" - // group2: ((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:IS)) matches "=" - // group3: ((?:[\s\S]+)?) matches "1597751948193618247 and length(source)<1;\n;\n" - const char* const CONDITION_STR_PATTERN = - R"(([\w$#%]+)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:IS))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))"; - boost::regex ex(CONDITION_STR_PATTERN); - if (boost::regex_match(condition_str, what, ex)) { - if (condition_str.size() != what[0].str().size()) { - matched = false; - } - } else { - matched = false; - } + VLOG_NOTICE << "condition_str: " << condition_str; + matched = boost::regex_match(condition_str, what, DELETE_HANDLER_REGEX) && + condition_str.size() == what[0].str().size(); // exact match } catch (boost::regex_error& e) { VLOG_NOTICE << "fail to parse expr. [expr=" << condition_str << "; error=" << e.what() << "]"; - matched = false; } + if (!matched) return false; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (!matched) { return false; } ``` ########## be/test/olap/delete_handler_test.cpp: ########## @@ -1232,4 +1232,39 @@ TEST_F(TestDeleteHandler, FilterDataVersion) { _delete_handler.finalize(); } +// clang-format off +TEST_F(TestDeleteHandler, test_parse_delete_condition) { + auto test = [](const std::tuple<std::string, bool, TCondition>& in) { + auto& [cond_str, exp_succ, exp_cond] = in; + TCondition parsed_cond; + EXPECT_EQ(DeleteHandler::parse_delete_condition(cond_str, &parsed_cond), exp_succ) << " unexpected result, cond_str: " << cond_str; + if (exp_succ) EXPECT_EQ(parsed_cond, exp_cond) << " unexpected result, cond_str: " << cond_str; + }; + + auto gen_cond = [](const std::string& col, const std::string& op, const std::string& val) { + TCondition cond; + cond.__set_column_name(col); + cond.__set_condition_op(op); + cond.__set_condition_values(std::vector<std::string>{val}); + return cond; + }; + + // <cond_str, parsed, expect_value>> + std::vector<std::tuple<std::string, bool, TCondition>> test_input { + {R"(abc=b)" , true, gen_cond(R"(abc)" , "=" , R"(b)" )}, // normal case + {R"(abc=)" , true, gen_cond(R"(abc)" , "=" , R"()" )}, // missing value, it means not to be parsed succefully, how every it's a ignorable bug + {R"(abc!='b')" , true, gen_cond(R"(abc)" , "!=", R"(b)" )}, // value surrounded by ' + {R"(@a*<<10086)" , true, gen_cond(R"(@a*)" , "<<", R"(10086)" )}, // column ends with * + {R"(*a=b)" , false, gen_cond(R"(*a)" , "=" , R"(b)" )}, // starts with * + {R"(a*a>>WTF(10086))" , true, gen_cond(R"(a*a)" , ">>", R"(WTF(10086))")}, // function + {R"(a-b IS NULL)" , true, gen_cond(R"(a-b)" , "IS", R"(NULL)" )}, // - in col name and test IS NULL Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion << cond_str; { } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org