This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new b627088e8c [Optimization](String) Optimize q20 q21 q22 q23 LIKE_SUBSTRING (like '%xxx%') (#18309) b627088e8c is described below commit b627088e8c89da2e8cdacb439f0608df4ee734df Author: ZhangYu0123 <67053339+zhangyu0...@users.noreply.github.com> AuthorDate: Mon Apr 3 18:09:15 2023 +0800 [Optimization](String) Optimize q20 q21 q22 q23 LIKE_SUBSTRING (like '%xxx%') (#18309) Optimize q20, q21, q22, q23 LIKE_SUBSTRING (like '%xxxx%'). Idea is from clickhouse stringsearcher: Stringsearcher is about 10%~20% faster than volnitsky algorithm when needle size is less than 10 using two chars at beginning search in SIMD . Stringsearcher is faster than volnitsky algorithm, when needle size is less than 21. The changes are as follows: Using first two chars of needle at beginning search. We can compare two chars of needle and [n:n+17) chars in haystack in SIMD in one loop. Filter efficiency will be higher. When env support SIMD, we use stringsearcher. Test result in clickbench: q20 is about 15% up. q20: SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; q21, q22 is about 1%~5% up. q21: SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; q22: SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; q23 is about 30%~40% up and not stable. q23: SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; --- be/src/vec/common/string_searcher.h | 68 +++++++++++++++++++++++++++++++------ be/src/vec/common/volnitsky.h | 4 +++ 2 files changed, 61 insertions(+), 11 deletions(-) diff --git a/be/src/vec/common/string_searcher.h b/be/src/vec/common/string_searcher.h index 895ba538a9..19fd3d2c86 100644 --- a/be/src/vec/common/string_searcher.h +++ b/be/src/vec/common/string_searcher.h @@ -77,8 +77,10 @@ private: uint8_t first {}; #ifdef __SSE4_1__ - /// vector filled `first` for determining leftmost position of the first symbol - __m128i pattern; + uint8_t second {}; + /// vector filled `first` or `second` for determining leftmost position of the first and second symbols + __m128i first_pattern; + __m128i second_pattern; /// vector of first 16 characters of `needle` __m128i cache = _mm_setzero_si128(); int cachemask {}; @@ -95,8 +97,11 @@ public: first = *needle; #ifdef __SSE4_1__ - pattern = _mm_set1_epi8(first); - + first_pattern = _mm_set1_epi8(first); + if (needle + 1 < needle_end) { + second = *(needle + 1); + second_pattern = _mm_set1_epi8(second); + } const auto* needle_pos = needle; //for (const auto i : collections::range(0, n)) @@ -155,16 +160,57 @@ public: const CharT* search(const CharT* haystack, const CharT* const haystack_end) const { if (needle == needle_end) return haystack; - while (haystack < haystack_end) { + const auto needle_size = needle_end - needle; #ifdef __SSE4_1__ - if (haystack + n <= haystack_end && page_safe(haystack)) { - /// find first character - const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i*>(haystack)); - const auto v_against_pattern = _mm_cmpeq_epi8(v_haystack, pattern); + /// Here is the quick path when needle_size is 1. + if (needle_size == 1) { + while (haystack < haystack_end) { + if (haystack + n <= haystack_end && page_safe(haystack)) { + const auto v_haystack = + _mm_loadu_si128(reinterpret_cast<const __m128i*>(haystack)); + const auto v_against_pattern = _mm_cmpeq_epi8(v_haystack, first_pattern); + const auto mask = _mm_movemask_epi8(v_against_pattern); + if (mask == 0) { + haystack += n; + continue; + } + + const auto offset = __builtin_ctz(mask); + haystack += offset; + + return haystack; + } - const auto mask = _mm_movemask_epi8(v_against_pattern); + if (haystack == haystack_end) { + return haystack_end; + } - /// first character not present in 16 octets starting at `haystack` + if (*haystack == first) { + return haystack; + } + ++haystack; + } + return haystack_end; + } +#endif + + while (haystack < haystack_end && haystack_end - haystack >= needle_size) { +#ifdef __SSE4_1__ + if ((haystack + 1 + n) <= haystack_end && page_safe(haystack)) { + /// find first and second characters + const auto v_haystack_block_first = + _mm_loadu_si128(reinterpret_cast<const __m128i*>(haystack)); + const auto v_haystack_block_second = + _mm_loadu_si128(reinterpret_cast<const __m128i*>(haystack + 1)); + + const auto v_against_pattern_first = + _mm_cmpeq_epi8(v_haystack_block_first, first_pattern); + const auto v_against_pattern_second = + _mm_cmpeq_epi8(v_haystack_block_second, second_pattern); + + const auto mask = _mm_movemask_epi8( + _mm_and_si128(v_against_pattern_first, v_against_pattern_second)); + /// first and second characters not present in 16 octets starting at `haystack` if (mask == 0) { haystack += n; continue; diff --git a/be/src/vec/common/volnitsky.h b/be/src/vec/common/volnitsky.h index 91daeffc83..c3d762e277 100644 --- a/be/src/vec/common/volnitsky.h +++ b/be/src/vec/common/volnitsky.h @@ -203,6 +203,10 @@ public: const auto* haystack_end = haystack + haystack_size; +#ifdef __SSE4_1__ + return fallback_searcher.search(haystack, haystack_end); +#endif + if (fallback || haystack_size <= needle_size || fallback_searcher.force_fallback) return fallback_searcher.search(haystack, haystack_end); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org