This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 092d81f88a [BugFix](functions) fix multi_search_all_positions #18682 092d81f88a is described below commit 092d81f88ae086867eea3c5ff830e7bd0715d6bb Author: zclllyybb <zhaochan...@selectdb.com> AuthorDate: Mon Apr 17 08:32:57 2023 +0800 [BugFix](functions) fix multi_search_all_positions #18682 --- be/src/vec/common/string_searcher.h | 66 ---------- .../functions/functions_multi_string_position.cpp | 140 +++++++++++---------- .../search/multi_search_all_positions.md | 16 +-- .../search/multi_search_all_positions.md | 16 +-- .../test_multi_string_position.out | 43 ++++--- .../test_multi_string_position.groovy | 47 +++++-- 6 files changed, 149 insertions(+), 179 deletions(-) diff --git a/be/src/vec/common/string_searcher.h b/be/src/vec/common/string_searcher.h index 97c5570a81..af76f2100d 100644 --- a/be/src/vec/common/string_searcher.h +++ b/be/src/vec/common/string_searcher.h @@ -416,70 +416,4 @@ struct LibCASCIICaseInsensitiveStringSearcher : public StringSearcherBase { return search(haystack, haystack + haystack_size); } }; - -template <typename StringSearcher> -class MultiStringSearcherBase { -private: - /// needles - const std::vector<StringRef>& needles; - /// searchers - std::vector<StringSearcher> searchers; - /// last index of needles that was not processed - size_t last; - -public: - explicit MultiStringSearcherBase(const std::vector<StringRef>& needles_) - : needles {needles_}, last {0} { - searchers.reserve(needles.size()); - - size_t size = needles.size(); - for (int i = 0; i < size; ++i) { - const char* cur_needle_data = needles[i].data; - const size_t cur_needle_size = needles[i].size; - - searchers.emplace_back(cur_needle_data, cur_needle_size); - } - } - - /** - * while (hasMoreToSearch()) - * { - * search inside the haystack with the known needles - * } - */ - bool hasMoreToSearch() { - if (last >= needles.size()) { - return false; - } - - return true; - } - - bool searchOne(const uint8_t* haystack, const uint8_t* haystack_end) { - const size_t size = needles.size(); - if (last >= size) { - return false; - } - - if (searchers[++last].search(haystack, haystack_end) != haystack_end) { - return true; - } - return false; - } - - template <typename CountCharsCallback, typename AnsType> - void searchOneAll(const uint8_t* haystack, const uint8_t* haystack_end, AnsType* answer, - const CountCharsCallback& count_chars) { - const size_t size = needles.size(); - for (; last < size; ++last) { - const uint8_t* ptr = searchers[last].search(haystack, haystack_end); - if (ptr != haystack_end) { - answer[last] = count_chars(haystack, ptr); - } - } - } -}; - -using MultiStringSearcher = MultiStringSearcherBase<ASCIICaseSensitiveStringSearcher>; - } // namespace doris diff --git a/be/src/vec/functions/functions_multi_string_position.cpp b/be/src/vec/functions/functions_multi_string_position.cpp index a3c3420acb..5b8dd13b9b 100644 --- a/be/src/vec/functions/functions_multi_string_position.cpp +++ b/be/src/vec/functions/functions_multi_string_position.cpp @@ -18,6 +18,9 @@ // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsMultiStringPosition.h // and modified by Doris +#include <cstdint> +#include <iterator> + #include "function.h" #include "function_helpers.h" #include "vec/columns/column_array.h" @@ -103,48 +106,53 @@ public: } }; -template <typename Impl> struct FunctionMultiSearchAllPositionsImpl { +public: using ResultType = Int32; - + using SingleSearcher = ASCIICaseSensitiveStringSearcher; static constexpr auto name = "multi_search_all_positions"; static Status vector_constant(const ColumnString::Chars& haystack_data, const ColumnString::Offsets& haystack_offsets, const Array& needles_arr, PaddedPODArray<Int32>& vec_res, PaddedPODArray<UInt64>& offsets_res) { - if (needles_arr.size() > std::numeric_limits<UInt8>::max()) + if (needles_arr.size() > std::numeric_limits<UInt8>::max()) { return Status::InvalidArgument( "number of arguments for function {} doesn't match: " "passed {}, should be at most 255", name, needles_arr.size()); + } - std::vector<StringRef> needles; - needles.reserve(needles_arr.size()); - for (const auto& needle : needles_arr) needles.emplace_back(needle.get<StringRef>()); - - auto res_callback = [](const UInt8* start, const UInt8* end) -> Int32 { - return 1 + Impl::count_chars(reinterpret_cast<const char*>(start), - reinterpret_cast<const char*>(end)); - }; - - auto searcher = Impl::create_multi_searcher(needles); + const size_t needles_size = needles_arr.size(); + std::vector<SingleSearcher> searchers; + searchers.reserve(needles_size); + for (const auto& needle : needles_arr) { + searchers.emplace_back(needle.get<StringRef>().data, needle.get<StringRef>().size); + } const size_t haystack_size = haystack_offsets.size(); - const size_t needles_size = needles.size(); - - vec_res.resize(haystack_size * needles.size()); + vec_res.resize(haystack_size * needles_size); offsets_res.resize(haystack_size); std::fill(vec_res.begin(), vec_res.end(), 0); - while (searcher.hasMoreToSearch()) { + // we traverse to generator answer by Vector's slot of ColumnVector, not by Vector. + // TODO: check if the order of loop is best. The large data may make us writing across the line which size out of L2 cache. + for (size_t ans_slot_in_row = 0; ans_slot_in_row < searchers.size(); ans_slot_in_row++) { + // is i.e. answer slot index in one Vector(row) of answer + auto& searcher = searchers[ans_slot_in_row]; size_t prev_haystack_offset = 0; - for (size_t j = 0, from = 0; j < haystack_size; ++j, from += needles_size) { + + for (size_t haystack_index = 0, res_index = ans_slot_in_row; + haystack_index < haystack_size; ++haystack_index, res_index += needles_size) { const auto* haystack = &haystack_data[prev_haystack_offset]; - const auto* haystack_end = haystack + haystack_offsets[j] - prev_haystack_offset; - searcher.searchOneAll(haystack, haystack_end, &vec_res[from], res_callback); - prev_haystack_offset = haystack_offsets[j]; + const auto* haystack_end = + haystack - prev_haystack_offset + haystack_offsets[haystack_index]; + + auto ans_now = searcher.search(haystack, haystack_end); + vec_res[res_index] = + ans_now >= haystack_end ? 0 : std::distance(haystack, ans_now) + 1; + prev_haystack_offset = haystack_offsets[haystack_index]; } } @@ -166,72 +174,72 @@ struct FunctionMultiSearchAllPositionsImpl { size_t prev_haystack_offset = 0; size_t prev_needles_offset = 0; - auto res_callback = [](const UInt8* start, const UInt8* end) -> Int32 { - return 1 + Impl::count_chars(reinterpret_cast<const char*>(start), - reinterpret_cast<const char*>(end)); - }; - - offsets_res.reserve(haystack_offsets.size()); + offsets_res.reserve(haystack_data.size()); + uint64_t offset_now = 0; auto& nested_column = vectorized::check_and_get_column<vectorized::ColumnNullable>(needles_data) ->get_nested_column(); const ColumnString* needles_data_string = check_and_get_column<ColumnString>(nested_column); - std::vector<StringRef> needles; - for (size_t i = 0; i < haystack_offsets.size(); ++i) { - needles.reserve(needles_offsets[i] - prev_needles_offset); - - for (size_t j = prev_needles_offset; j < needles_offsets[i]; ++j) { - needles.emplace_back(needles_data_string->get_data_at(j)); + std::vector<StringRef> needles_for_row; + // haystack first, row by row. + for (size_t haystack_index = 0; haystack_index < haystack_offsets.size(); + ++haystack_index) { + // get haystack for this row. + const auto* haystack = &haystack_data[prev_haystack_offset]; + const auto* haystack_end = + haystack - prev_haystack_offset + haystack_offsets[haystack_index]; + + // build needles for this row. + needles_for_row.reserve(needles_offsets[haystack_index] - prev_needles_offset); + for (size_t j = prev_needles_offset; j < needles_offsets[haystack_index]; ++j) { + needles_for_row.emplace_back(needles_data_string->get_data_at(j)); } - - const size_t needles_size = needles.size(); - if (needles_size > std::numeric_limits<UInt8>::max()) + const size_t needles_row_size = needles_for_row.size(); + if (needles_row_size > std::numeric_limits<UInt8>::max()) { return Status::InvalidArgument( "number of arguments for function {} doesn't match: " "passed {}, should be at most 255", - name, needles_size); - - vec_res.resize(vec_res.size() + needles_size); - - auto searcher = Impl::create_multi_searcher(needles); - - std::fill(vec_res.begin() + vec_res.size() - needles_size, vec_res.end(), 0); + name, needles_row_size); + } - while (searcher.hasMoreToSearch()) { - const auto* haystack = &haystack_data[prev_haystack_offset]; - const auto* haystack_end = haystack + haystack_offsets[i] - prev_haystack_offset; - searcher.searchOneAll(haystack, haystack_end, - &vec_res[vec_res.size() - needles_size], res_callback); + // each searcher search for one needle. + std::vector<SingleSearcher> searchers; + searchers.clear(); + searchers.reserve(needles_row_size); + for (auto needle : needles_for_row) { + searchers.emplace_back(needle.data, needle.size); } - if (offsets_res.empty()) - offsets_res.push_back(needles_size); - else - offsets_res.push_back(offsets_res.back() + needles_size); + // search for first so that the ans's size is constant for each row. + auto ans_row_begin = vec_res.size(); + vec_res.resize(vec_res.size() + needles_row_size); + offset_now += searchers.size(); + offsets_res.emplace_back(offset_now); + + //for now haystack, apply needle to search, generator answer by order. + for (size_t ans_slot_in_row = 0; ans_slot_in_row < searchers.size(); + ans_slot_in_row++) { + // is i.e. answer slot index in one Vector(row) of answer + auto& searcher = searchers[ans_slot_in_row]; + + auto ans_now = searcher.search(haystack, haystack_end); + vec_res[ans_row_begin + ans_slot_in_row] = + ans_now >= haystack_end ? 0 : std::distance(haystack, ans_now) + 1; + } - prev_haystack_offset = haystack_offsets[i]; - prev_needles_offset = needles_offsets[i]; - needles.clear(); + prev_haystack_offset = haystack_offsets[haystack_index]; + prev_needles_offset = needles_offsets[haystack_index]; + needles_for_row.clear(); } return Status::OK(); } }; -struct MultiSearcherImpl { - using MultiSearcher = MultiStringSearcher; - - static MultiSearcher create_multi_searcher(const std::vector<StringRef>& needles) { - return MultiSearcher(needles); - } - - static size_t count_chars(const char* begin, const char* end) { return end - begin; } -}; - using FunctionMultiSearchAllPositions = - FunctionMultiStringPosition<FunctionMultiSearchAllPositionsImpl<MultiSearcherImpl>>; + FunctionMultiStringPosition<FunctionMultiSearchAllPositionsImpl>; void register_function_multi_string_position(SimpleFunctionFactory& factory) { factory.register_function<FunctionMultiSearchAllPositions>(); diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md b/docs/en/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md index 9f5c07fdcc..db52923b6a 100644 --- a/docs/en/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md +++ b/docs/en/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md @@ -30,8 +30,7 @@ under the License. `ARRAY<INT> multi_search_all_positions(VARCHAR haystack, ARRAY<VARCHAR> needles)` - -Searches for the substrings `needles` in the string `haystack`, and returns array of positions of the found corresponding substrings in the string. Positions are indexed starting from 1. +Returns an `ARRAY` where the `i`-th element is the position of the `i`-th element in `needles`(i.e. `needle`)'s **first** occurrence in the string `haystack`. Positions are counted from 1, with 0 meaning the element was not found. **Case-sensitive**. ### example @@ -43,12 +42,13 @@ mysql> select multi_search_all_positions('Hello, World!', ['hello', '!', 'world' | [0,13,0] | +----------------------------------------------------------------------+ -mysql> select multi_search_all_positions('abc', ['a', 'bc', 'd']); -+-----------------------------------------------------+ -| multi_search_all_positions('abc', ['a', 'bc', 'd']) | -+-----------------------------------------------------+ -| [1,2,0] | -+-----------------------------------------------------+ +select multi_search_all_positions("Hello, World!", ['hello', '!', 'world', 'Hello', 'World']); ++---------------------------------------------------------------------------------------------+ +| multi_search_all_positions('Hello, World!', ARRAY('hello', '!', 'world', 'Hello', 'World')) | ++---------------------------------------------------------------------------------------------+ +| [0, 13, 0, 1, 8] | ++---------------------------------------------------------------------------------------------+ ``` + ### keywords MULTI_SEARCH,SEARCH,POSITIONS diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md index f93466587d..27f60f1b59 100644 --- a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md +++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md @@ -30,8 +30,7 @@ under the License. `ARRAY<INT> multi_search_all_positions(VARCHAR haystack, ARRAY<VARCHAR> needles)` - -搜索字符串 `haystack` 中的子字符串 `needles`,并返回在 `haystack` 中找到的相应 `needles` 的位置数组。位置的下标从 1 开始。 +返回一个 `ARRAY`,其中第 `i` 个元素为 `needles` 中第 `i` 个元素 `needle`,在字符串 `haystack` 中**首次**出现的位置。位置从1开始计数,0代表未找到该元素。**大小写敏感**。 ### example @@ -43,12 +42,13 @@ mysql> select multi_search_all_positions('Hello, World!', ['hello', '!', 'world' | [0,13,0] | +----------------------------------------------------------------------+ -mysql> select multi_search_all_positions('abc', ['a', 'bc', 'd']); -+-----------------------------------------------------+ -| multi_search_all_positions('abc', ['a', 'bc', 'd']) | -+-----------------------------------------------------+ -| [1,2,0] | -+-----------------------------------------------------+ +select multi_search_all_positions("Hello, World!", ['hello', '!', 'world', 'Hello', 'World']); ++---------------------------------------------------------------------------------------------+ +| multi_search_all_positions('Hello, World!', ARRAY('hello', '!', 'world', 'Hello', 'World')) | ++---------------------------------------------------------------------------------------------+ +| [0, 13, 0, 1, 8] | ++---------------------------------------------------------------------------------------------+ ``` + ### keywords MULTI_SEARCH,SEARCH,POSITIONS diff --git a/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out b/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out index 165a97dea6..017fa620c4 100644 --- a/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out +++ b/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out @@ -1,25 +1,30 @@ -- This file is automatically generated. You should know what you did if you want to edit this --- !select -- +-- !table_select1 -- +[0, 13, 0, 1, 8] +[0, 13, 0, 1, 8] +[1, 13, 8, 0, 0] +[1, 13, 8, 0, 0] +[0, 6, 0, 0, 0] + +-- !table_select2 -- +[0, 0] +[0, 0, 1, 13] +[0, 8] +[1, 8, 0, 13] +[1, 1, 4, 0] + +-- !select1 -- [4, 1, 1, 2, 6, 1, 1, 0, 4, 1, 14, 0, 10, 0, 16, 6] --- !select -- + +-- !select2 -- [0, 0, 0, 2, 3, 0, 1, 0, 5, 0, 0, 0, 11, 10, 6, 7] --- !select -- + +-- !select3 -- [1, 1, 0, 0, 0, 1, 1, 1, 4, 0, 6, 6, 0, 10, 1, 5] --- !select -- + +-- !select4 -- [1, 0, 0, 8, 6, 0, 7, 1, 3, 0, 0, 0, 0, 12] --- !select -- + +-- !select5 -- [7, 0, 0, 8, 0, 2, 0, 0, 6, 0, 2, 0, 3, 1] --- !select -- -[0, 0, 0, 19, 14, 22, 10, 0, 0, 13, 0, 8] --- !select -- -[6, 1, 1, 0, 0, 5, 1, 0, 8, 0, 5, 0, 2, 12, 0, 15, 0, 0] --- !select -- -[0, 0, 5, 1, 1, 0, 15, 1, 5, 10, 4, 0, 1, 0, 3, 0, 0, 0] --- !select -- -[0, 1, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 1] --- !select -- -[0, 0, 0, 3, 0, 15, 0, 0, 12, 7, 0, 0, 0, 0, 5, 0] --- !select -- -[0, 0, 12, 4, 4, 0, 13, 23, 0, 1, 0, 2, 0, 0, 0, 3, 0, 0] --- !select -- -[6, 8, 1, 4, 0, 10, 0, 1, 14, 0, 1, 0, 5, 0, 0, 0, 0, 15, 0, 1] \ No newline at end of file + diff --git a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy index 28cc08efb8..fa3ec92b66 100644 --- a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy +++ b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy @@ -16,16 +16,39 @@ // under the License. suite("test_multi_string_position") { - qt_select "select multi_search_all_positions('jmdqwjbrxlbatqeixknricfk', ['qwjbrxlba', 'jmd', '', 'mdqwjbrxlbatqe', 'jbrxlbatqeixknric', 'jmdqwjbrxlbatqeixknri', '', 'fdtmnwtts', 'qwjbrxlba', '', 'qeixknricfk', 'hzjjgrnoilfkvzxaemzhf', 'lb', 'kamz', 'ixknr', 'jbrxlbatq'])" - qt_select "select multi_search_all_positions('coxcctuehmzkbrsmodfvx', ['bkhnp', 'nlypjvriuk', 'rkslxwfqjjivcwdexrdtvjdtvuu', 'oxcctuehm', 'xcctuehmzkbrsm', 'kfrieuocovykjmkwxbdlkgwctwvcuh', 'coxc', 'lbwvetgxyndxjqqwthtkgasbafii', 'ctuehmzkbrsmodfvx', 'obzldxjldxowk', 'ngfikgigeyll', 'wdaejjukowgvzijnw', 'zkbr', 'mzkb', 'tuehm', 'ue'])" - qt_select "select multi_search_all_positions('mpswgtljbbrmivkcglamemayfn', ['', 'm', 'saejhpnfgfq', 'rzanrkdssmmkanqjpfi', 'oputeneprgoowg', 'mp', '', '', 'wgtljbbrmivkcglamemay', 'cbpthtrgrmgfypizi', 'tl', 'tlj', 'xuhs', 'brmivkcglamemayfn', '', 'gtljb'])" - qt_select "select multi_search_all_positions('arbphzbbecypbzsqsljurtddve', ['arbphzb', 'mnrboimjfijnti', 'cikcrd', 'becypbz', 'z', 'uocmqgnczhdcrvtqrnaxdxjjlhakoszuwc', 'bbe', '', 'bp', 'yhltnexlpdijkdzt', 'jkwjmrckvgmccmmrolqvy', 'vdxmicjmfbtsbqqmqcgtnrvdgaucsgspwg', 'witlfqwvhmmyjrnrzttrikhhsrd', 'pbzsqsljurt'])" - qt_select "select multi_search_all_positions('aizovxqpzcbbxuhwtiaaqhdqjdei', ['qpzcbbxuhw', 'jugrpglqbm', 'dspwhzpyjohhtizegrnswhjfpdz', 'pzcbbxuh', 'vayzeszlycke', 'i', 'gvrontcpqavsjxtjwzgwxugiyhkhmhq', 'gyzmeroxztgaurmrqwtmsxcqnxaezuoapatvu', 'xqpzc', 'mjiswsvlvlpqrhhptqq', 'iz', 'hmzjxxfjsvcvdpqwtrdrp', 'zovxqpzcbbxuhwtia', 'ai'])" - qt_select "select multi_search_all_positions('ydfgiluhyxwqdfiwtzobwzscyxhuov', ['srsoubrgghleyheujsbwwwykerzlqphgejpxvog', 'axchkyleddjwkvbuyhmekpbbbztxdlm', 'zqodzvlkmfe', 'obwz', 'fi', 'zsc', 'xwq', 'pvmurvrd', 'uulcdtexckmrsokmgdpkstlkoavyrmxeaacvydxf', 'dfi', 'mxcngttujzgtlssrmluaflmjuv', 'hyxwqdfiwtzobwzscyxhu'])" - qt_select "select multi_search_all_positions('pyepgwainvmwekwhhqxxvzdjw', ['w', '', '', 'gvvkllofjnxvcu', 'kmwwhboplctvzazcyfpxhwtaddfnhekei', 'gwainv', 'pyepgwain', 'ekpnogkzzmbpfynsunwqp', 'invmwe', 'hrxpiplfplqjsstuybksuteoz', 'gwa', 'akfpyduqrwosxcbdemtxrxvundrgse', 'yepgwainvmw', 'wekwhhqxxvzdjw', 'fyimzvedmyriubgoznmcav', 'whhq', 'ozxowbwdqfisuupyzaqynoprgsjhkwlum', 'vpoufrofekajksdp'])" - qt_select "select multi_search_all_positions('lqwahffxurkbhhzytequotkfk', ['rwjqudpuaiufle', 'livwgbnflvy', 'hffxurkbhh', '', '', 'xcajwbqbttzfzfowjubmmgnmssat', 'zytequ', 'lq', 'h', 'rkbhh', 'a', 'immejthwgdr', '', 'llhhnlhcvnxxorzzjt', 'w', 'cvjynqxcivmmmvc', 'wexjomdcmursppjtsweybheyxzleuz', 'fzronsnddfxwlkkzidiknhpjipyrcrzel'])" - qt_select "select multi_search_all_positions('nkddriylnakicwgdwrfxpodqea', ['izwdpgrgpmjlwkanjrffgela', '', 'kicw', 'hltmfymgmrjckdiylkzjlvvyuleksikdjrg', 'yigveskrbidknjxigwilmkgyizewikh', 'xyvzhsnqmuec', 'odcgzlavzrwesjks', 'oilvfgliktoujukpgzvhmokdgkssqgqot', 'llsfsurvimbahwqtbqbp', 'nxj', 'pimydixeobdxmdkvhcyzcgnbhzsydx', 'couzmvxedobuohibgxwoxvmpote', 'driylnakicwgdwrf', 'nkddr'])" - qt_select "select multi_search_all_positions('jnckhtjqwycyihuejibqmddrdxe', ['tajzx', 'vuddoylclxatcjvinusdwt', 'spxkhxvzsljkmnzpeubszjnhqczavgtqopxn', 'ckhtjqwycyi', 'xlbfzdxspldoes', 'u', 'czosfebeznt', 'gzhabdsuyreisxvyfrfrkq', 'yihuejibqmd', 'jqwycyihuejibqm', 'cfbvprgzx', 'hxu', 'vxbhrfpzacgd', 'afoaij', 'htjqwycyihu', 'httzbskqd'])" - qt_select "select multi_search_all_positions('dzejajvpoojdkqbnayahygidyrjmb', ['khwxxvtnqhobbvwgwkpusjlhlzifiuclycml', 'nzvuhtwdaivo', 'dkqbnayahygidyr', 'jajvpoo', 'j', 'wdtbvwmeqgyvetu', 'kqbn', 'idyrjmb', 'tsnxuxevsxrxpgpfdgrkhwqpkse', '', 'efsdgzuefhdzkmquxu', 'zejajvpoojdkqbnayahyg', 'ugwfuighbygrxyctop', 'fcbxzbdugc', 'dxmzzrcplob', 'ejaj', 'wmmupyxrylvawsyfccluiiene', 'ohzmsqhpzbafvbzqwzftbvftei'])" - qt_select "select multi_search_all_positions('ffaujlverosspbzaqefjzql', ['lvero', 'erossp', 'f', 'ujlverosspbz', 'btfimgklzzxlbkbuqyrmnud', 'osspb', 'muqexvtjuaar', 'f', 'bzaq', 'lprihswhwkdhqciqhfaowarn', 'ffaujlve', 'uhbbjrqjb', 'jlver', 'umucyhbbu', 'pjthtzmgxhvpbdphesnnztuu', 'xfqhfdfsbbazactpastzvzqudgk', 'lvovjfoatc', 'z', 'givejzhoqsd', ''])" + def table_name = "strings" + + sql """ DROP TABLE IF EXISTS ${table_name} """ + sql """ CREATE TABLE IF NOT EXISTS ${table_name} + ( + `col1` INT NOT NULL, + `content` TEXT NOT NULL, + `mode` ARRAY<TEXT> NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`col1`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`col1`) BUCKETS 3 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ); + """ + + sql """ INSERT INTO ${table_name} (col1, content, mode) VALUES + (1, 'Hello, World!', ['hello', 'world'] ), + (2, 'Hello, World!', ['hello', 'world', 'Hello', '!'] ), + (3, 'hello, world!', ['Hello', 'world'] ), + (4, 'hello, world!', ['hello', 'world', 'Hello', '!'] ), + (5, 'HHHHW!', ['H', 'HHHH', 'HW', 'WH'] ); + """ + + qt_table_select1 "select multi_search_all_positions(content, ['hello', '!', 'world', 'Hello', 'World']) from ${table_name} order by col1" + qt_table_select2 "select multi_search_all_positions(content, mode) from ${table_name} order by col1" + + qt_select1 "select multi_search_all_positions('jmdqwjbrxlbatqeixknricfk', ['qwjbrxlba', 'jmd', '', 'mdqwjbrxlbatqe', 'jbrxlbatqeixknric', 'jmdqwjbrxlbatqeixknri', '', 'fdtmnwtts', 'qwjbrxlba', '', 'qeixknricfk', 'hzjjgrnoilfkvzxaemzhf', 'lb', 'kamz', 'ixknr', 'jbrxlbatq'])" + qt_select2 "select multi_search_all_positions('coxcctuehmzkbrsmodfvx', ['bkhnp', 'nlypjvriuk', 'rkslxwfqjjivcwdexrdtvjdtvuu', 'oxcctuehm', 'xcctuehmzkbrsm', 'kfrieuocovykjmkwxbdlkgwctwvcuh', 'coxc', 'lbwvetgxyndxjqqwthtkgasbafii', 'ctuehmzkbrsmodfvx', 'obzldxjldxowk', 'ngfikgigeyll', 'wdaejjukowgvzijnw', 'zkbr', 'mzkb', 'tuehm', 'ue'])" + qt_select3 "select multi_search_all_positions('mpswgtljbbrmivkcglamemayfn', ['', 'm', 'saejhpnfgfq', 'rzanrkdssmmkanqjpfi', 'oputeneprgoowg', 'mp', '', '', 'wgtljbbrmivkcglamemay', 'cbpthtrgrmgfypizi', 'tl', 'tlj', 'xuhs', 'brmivkcglamemayfn', '', 'gtljb'])" + qt_select4 "select multi_search_all_positions('arbphzbbecypbzsqsljurtddve', ['arbphzb', 'mnrboimjfijnti', 'cikcrd', 'becypbz', 'z', 'uocmqgnczhdcrvtqrnaxdxjjlhakoszuwc', 'bbe', '', 'bp', 'yhltnexlpdijkdzt', 'jkwjmrckvgmccmmrolqvy', 'vdxmicjmfbtsbqqmqcgtnrvdgaucsgspwg', 'witlfqwvhmmyjrnrzttrikhhsrd', 'pbzsqsljurt'])" + qt_select5 "select multi_search_all_positions('aizovxqpzcbbxuhwtiaaqhdqjdei', ['qpzcbbxuhw', 'jugrpglqbm', 'dspwhzpyjohhtizegrnswhjfpdz', 'pzcbbxuh', 'vayzeszlycke', 'i', 'gvrontcpqavsjxtjwzgwxugiyhkhmhq', 'gyzmeroxztgaurmrqwtmsxcqnxaezuoapatvu', 'xqpzc', 'mjiswsvlvlpqrhhptqq', 'iz', 'hmzjxxfjsvcvdpqwtrdrp', 'zovxqpzcbbxuhwtia', 'ai'])" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org