HappenLee commented on code in PR #12819:
URL: https://github.com/apache/doris/pull/12819#discussion_r982457454


##########
be/src/util/hash_util.hpp:
##########
@@ -65,31 +96,29 @@ class HashUtil {
     // NOTE: Any changes made to this function need to be reflected in 
Codegen::GetHashFn.
     // TODO: crc32 hashes with different seeds do not result in different hash 
functions.
     // The resulting hashes are correlated.
-    static uint32_t crc_hash(const void* data, int32_t bytes, uint32_t hash) {
-        if (!CpuInfo::is_supported(CpuInfo::SSE4_2)) {
-            return zlib_crc_hash(data, bytes, hash);
-        }
-        uint32_t words = bytes / sizeof(uint32_t);
-        bytes = bytes % sizeof(uint32_t);
+    static uint32_t crc_hash(const void* data, size_t size, uint32_t hash) {
+        const char* pos = reinterpret_cast<const char*>(data);
 
-        const uint32_t* p = reinterpret_cast<const uint32_t*>(data);
+        if (size == 0) return 0;
 
-        while (words--) {
-            hash = _mm_crc32_u32(hash, *p);
-            ++p;
+        if (size < 8) {
+            return hash_less_than8(pos, size);
         }
 
-        const uint8_t* s = reinterpret_cast<const uint8_t*>(p);
+        const char* end = pos + size;
+        size_t res = -1ULL;
 
-        while (bytes--) {
-            hash = _mm_crc32_u8(hash, *s);
-            ++s;
-        }
+        do {
+            int64_t word = unaligned_load<int64_t>(pos);

Review Comment:
   `int64` and `int32` seems do not need `unaligned_load`  ?



##########
be/src/util/hash_util.hpp:
##########
@@ -55,6 +56,36 @@ class HashUtil {
         return crc32(hash, (const unsigned char*)(&INT_VALUE), 4);
     }
 
+    template <typename T>
+    static inline T unaligned_load(const void* address) {
+        T res {};
+        memcpy(&res, address, sizeof(res));
+        return res;
+    }
+
+    static inline uint64_t shift_mix(uint64_t val) { return val ^ (val >> 47); 
}
+
+    static inline size_t hash_less_than8(const char* data, size_t size) {
+        static constexpr uint64_t k2 = 0x9ae16a3b2f90404fULL;
+        static constexpr uint64_t k3 = 0xc949d7c7509e6557ULL;
+
+        if (size >= 4) {

Review Comment:
   this function only call when `size < 8`, so only need` if (size >= 4) else 
{}`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to