This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new b627088e8c [Optimization](String) Optimize q20 q21 q22 q23 
LIKE_SUBSTRING (like '%xxx%')  (#18309)
b627088e8c is described below

commit b627088e8c89da2e8cdacb439f0608df4ee734df
Author: ZhangYu0123 <67053339+zhangyu0...@users.noreply.github.com>
AuthorDate: Mon Apr 3 18:09:15 2023 +0800

    [Optimization](String) Optimize q20 q21 q22 q23 LIKE_SUBSTRING (like 
'%xxx%')  (#18309)
    
    Optimize q20, q21, q22, q23 LIKE_SUBSTRING (like '%xxxx%'). Idea is from 
clickhouse stringsearcher:
    
    Stringsearcher is about 10%~20% faster than volnitsky algorithm when needle 
size is less than 10 using two chars at beginning search in SIMD .
    Stringsearcher is faster than volnitsky algorithm, when needle size is less 
than 21.
    The changes are as follows:
    
    Using first two chars of needle at beginning search. We can compare two 
chars of needle and [n:n+17) chars in haystack in SIMD in one loop. Filter 
efficiency will be higher.
    When env support SIMD, we use stringsearcher.
    Test result in clickbench:
    
    q20 is about 15% up.
    q20: SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%';
    q21, q22 is about 1%~5% up.
    q21: SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE 
'%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 
10;
    q22: SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, 
COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE 
'%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 
10;
    q23 is about 30%~40% up and not stable.
    q23: SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 
10;
---
 be/src/vec/common/string_searcher.h | 68 +++++++++++++++++++++++++++++++------
 be/src/vec/common/volnitsky.h       |  4 +++
 2 files changed, 61 insertions(+), 11 deletions(-)

diff --git a/be/src/vec/common/string_searcher.h 
b/be/src/vec/common/string_searcher.h
index 895ba538a9..19fd3d2c86 100644
--- a/be/src/vec/common/string_searcher.h
+++ b/be/src/vec/common/string_searcher.h
@@ -77,8 +77,10 @@ private:
     uint8_t first {};
 
 #ifdef __SSE4_1__
-    /// vector filled `first` for determining leftmost position of the first 
symbol
-    __m128i pattern;
+    uint8_t second {};
+    /// vector filled `first` or `second` for determining leftmost position of 
the first and second symbols
+    __m128i first_pattern;
+    __m128i second_pattern;
     /// vector of first 16 characters of `needle`
     __m128i cache = _mm_setzero_si128();
     int cachemask {};
@@ -95,8 +97,11 @@ public:
         first = *needle;
 
 #ifdef __SSE4_1__
-        pattern = _mm_set1_epi8(first);
-
+        first_pattern = _mm_set1_epi8(first);
+        if (needle + 1 < needle_end) {
+            second = *(needle + 1);
+            second_pattern = _mm_set1_epi8(second);
+        }
         const auto* needle_pos = needle;
 
         //for (const auto i : collections::range(0, n))
@@ -155,16 +160,57 @@ public:
     const CharT* search(const CharT* haystack, const CharT* const 
haystack_end) const {
         if (needle == needle_end) return haystack;
 
-        while (haystack < haystack_end) {
+        const auto needle_size = needle_end - needle;
 #ifdef __SSE4_1__
-            if (haystack + n <= haystack_end && page_safe(haystack)) {
-                /// find first character
-                const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const 
__m128i*>(haystack));
-                const auto v_against_pattern = _mm_cmpeq_epi8(v_haystack, 
pattern);
+        /// Here is the quick path when needle_size is 1.
+        if (needle_size == 1) {
+            while (haystack < haystack_end) {
+                if (haystack + n <= haystack_end && page_safe(haystack)) {
+                    const auto v_haystack =
+                            _mm_loadu_si128(reinterpret_cast<const 
__m128i*>(haystack));
+                    const auto v_against_pattern = _mm_cmpeq_epi8(v_haystack, 
first_pattern);
+                    const auto mask = _mm_movemask_epi8(v_against_pattern);
+                    if (mask == 0) {
+                        haystack += n;
+                        continue;
+                    }
+
+                    const auto offset = __builtin_ctz(mask);
+                    haystack += offset;
+
+                    return haystack;
+                }
 
-                const auto mask = _mm_movemask_epi8(v_against_pattern);
+                if (haystack == haystack_end) {
+                    return haystack_end;
+                }
 
-                /// first character not present in 16 octets starting at 
`haystack`
+                if (*haystack == first) {
+                    return haystack;
+                }
+                ++haystack;
+            }
+            return haystack_end;
+        }
+#endif
+
+        while (haystack < haystack_end && haystack_end - haystack >= 
needle_size) {
+#ifdef __SSE4_1__
+            if ((haystack + 1 + n) <= haystack_end && page_safe(haystack)) {
+                /// find first and second characters
+                const auto v_haystack_block_first =
+                        _mm_loadu_si128(reinterpret_cast<const 
__m128i*>(haystack));
+                const auto v_haystack_block_second =
+                        _mm_loadu_si128(reinterpret_cast<const 
__m128i*>(haystack + 1));
+
+                const auto v_against_pattern_first =
+                        _mm_cmpeq_epi8(v_haystack_block_first, first_pattern);
+                const auto v_against_pattern_second =
+                        _mm_cmpeq_epi8(v_haystack_block_second, 
second_pattern);
+
+                const auto mask = _mm_movemask_epi8(
+                        _mm_and_si128(v_against_pattern_first, 
v_against_pattern_second));
+                /// first and second characters not present in 16 octets 
starting at `haystack`
                 if (mask == 0) {
                     haystack += n;
                     continue;
diff --git a/be/src/vec/common/volnitsky.h b/be/src/vec/common/volnitsky.h
index 91daeffc83..c3d762e277 100644
--- a/be/src/vec/common/volnitsky.h
+++ b/be/src/vec/common/volnitsky.h
@@ -203,6 +203,10 @@ public:
 
         const auto* haystack_end = haystack + haystack_size;
 
+#ifdef __SSE4_1__
+        return fallback_searcher.search(haystack, haystack_end);
+#endif
+
         if (fallback || haystack_size <= needle_size || 
fallback_searcher.force_fallback)
             return fallback_searcher.search(haystack, haystack_end);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to