This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new ada39dd [improvement][vec] better memequal impl to speed up string compare (#8229) ada39dd is described below commit ada39dd9adf6108a7015c192f4878766af71c488 Author: zbtzbtzbt <35688959+zbtzbt...@users.noreply.github.com> AuthorDate: Tue Mar 1 11:25:12 2022 +0800 [improvement][vec] better memequal impl to speed up string compare (#8229) like #8214 faster string compare operator in vec engine. --- be/src/runtime/string_value.hpp | 4 ++ be/src/vec/common/string_ref.h | 107 +++++++++++++--------------------------- 2 files changed, 37 insertions(+), 74 deletions(-) diff --git a/be/src/runtime/string_value.hpp b/be/src/runtime/string_value.hpp index c44115d..aac9e3a 100644 --- a/be/src/runtime/string_value.hpp +++ b/be/src/runtime/string_value.hpp @@ -22,6 +22,7 @@ #include "runtime/string_value.h" #include "util/cpu_info.h" +#include "vec/common/string_ref.h" #ifdef __SSE4_2__ #include "util/sse_util.hpp" #endif @@ -88,6 +89,9 @@ inline bool StringValue::eq(const StringValue& other) const { if (this->len != other.len) { return false; } +#if defined(__SSE2__) + return memequalSSE2Wide(this->ptr, other.ptr, this->len); +#endif return string_compare(this->ptr, this->len, other.ptr, other.len, this->len) == 0; } diff --git a/be/src/vec/common/string_ref.h b/be/src/vec/common/string_ref.h index 5dd146e..8ecbe07 100644 --- a/be/src/vec/common/string_ref.h +++ b/be/src/vec/common/string_ref.h @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. // This file is copied from -// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/StringRef.h +// https://github.com/ClickHouse/ClickHouse/blob/master/base/base/StringRef.h // and modified by Doris #pragma once @@ -98,6 +98,32 @@ inline bool compareSSE2x4(const char* p1, const char* p2) { } inline bool memequalSSE2Wide(const char* p1, const char* p2, size_t size) { + /** The order of branches and the trick with overlapping comparisons + * are the same as in memcpy implementation. + * See the comments in + * https://github.com/ClickHouse/ClickHouse/blob/master/base/glibc-compatibility/memcpy/memcpy.h + */ + + if (size <= 16) { + if (size >= 8) { + /// Chunks of [8,16] bytes. + return unaligned_load<uint64_t>(p1) == unaligned_load<uint64_t>(p2) && + unaligned_load<uint64_t>(p1 + size - 8) == unaligned_load<uint64_t>(p2 + size - 8); + } else if (size >= 4) { + /// Chunks of [4,7] bytes. + return unaligned_load<uint32_t>(p1) == unaligned_load<uint32_t>(p2) && + unaligned_load<uint32_t>(p1 + size - 4) == unaligned_load<uint32_t>(p2 + size - 4); + } else if (size >= 2) { + /// Chunks of [2,3] bytes. + return unaligned_load<uint16_t>(p1) == unaligned_load<uint16_t>(p2) && + unaligned_load<uint16_t>(p1 + size - 2) == unaligned_load<uint16_t>(p2 + size - 2); + } else if (size >= 1) { + /// A single byte. + return *p1 == *p2; + } + return true; + } + while (size >= 64) { if (compareSSE2x4(p1, p2)) { p1 += 64; @@ -107,74 +133,14 @@ inline bool memequalSSE2Wide(const char* p1, const char* p2, size_t size) { return false; } - switch ((size % 64) / 16) { - case 3: - if (!compareSSE2(p1 + 32, p2 + 32)) return false; - [[fallthrough]]; - case 2: - if (!compareSSE2(p1 + 16, p2 + 16)) return false; - [[fallthrough]]; - case 1: - if (!compareSSE2(p1, p2)) return false; - [[fallthrough]]; - case 0: - break; - } - - p1 += (size % 64) / 16 * 16; - p2 += (size % 64) / 16 * 16; - - switch (size % 16) { - case 15: - if (p1[14] != p2[14]) return false; - [[fallthrough]]; - case 14: - if (p1[13] != p2[13]) return false; - [[fallthrough]]; - case 13: - if (p1[12] != p2[12]) return false; - [[fallthrough]]; - case 12: - if (unaligned_load<uint32_t>(p1 + 8) == unaligned_load<uint32_t>(p2 + 8)) - goto l8; - else - return false; - case 11: - if (p1[10] != p2[10]) return false; - [[fallthrough]]; - case 10: - if (p1[9] != p2[9]) return false; - [[fallthrough]]; - case 9: - if (p1[8] != p2[8]) return false; - l8: - [[fallthrough]]; - case 8: - return unaligned_load<uint64_t>(p1) == unaligned_load<uint64_t>(p2); - case 7: - if (p1[6] != p2[6]) return false; - [[fallthrough]]; - case 6: - if (p1[5] != p2[5]) return false; - [[fallthrough]]; - case 5: - if (p1[4] != p2[4]) return false; - [[fallthrough]]; - case 4: - return unaligned_load<uint32_t>(p1) == unaligned_load<uint32_t>(p2); - case 3: - if (p1[2] != p2[2]) return false; - [[fallthrough]]; - case 2: - return unaligned_load<uint16_t>(p1) == unaligned_load<uint16_t>(p2); - case 1: - if (p1[0] != p2[0]) return false; - [[fallthrough]]; - case 0: - break; + switch (size / 16) + { + case 3: if (!compareSSE2(p1 + 32, p2 + 32)) return false; [[fallthrough]]; + case 2: if (!compareSSE2(p1 + 16, p2 + 16)) return false; [[fallthrough]]; + case 1: if (!compareSSE2(p1, p2)) return false; } - return true; + return compareSSE2(p1 + size - 16, p2 + size - 16); } #endif @@ -322,13 +288,6 @@ inline void set(StringRef& x) { } } // namespace ZeroTraits -inline bool operator==(StringRef lhs, const char* rhs) { - for (size_t pos = 0; pos < lhs.size; ++pos) - if (!rhs[pos] || lhs.data[pos] != rhs[pos]) return false; - - return true; -} - inline std::ostream& operator<<(std::ostream& os, const StringRef& str) { if (str.data) os.write(str.data, str.size); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org