Dandandan commented on code in PR #19374:
URL: https://github.com/apache/datafusion/pull/19374#discussion_r2629716091


##########
datafusion/common/src/hash_utils.rs:
##########
@@ -269,6 +269,127 @@ fn hash_array<T>(
     }
 }
 
+/// Hash a StringView or BytesView array
+///
+/// Templated to optimize inner loop based on presence of nulls and external 
buffers.
+///
+/// HAS_NULLS: do we have to check null in the inner loop
+/// HAS_BUFFERS: if true, array has external buffers; if false, all strings 
are inlined/ less then 12 bytes
+/// REHASH: if true, combining with existing hash, otherwise initializing
+#[inline(never)]
+fn hash_string_view_array_inner<
+    T: ByteViewType,
+    const HAS_NULLS: bool,
+    const HAS_BUFFERS: bool,
+    const REHASH: bool,
+>(
+    array: &GenericByteViewArray<T>,
+    random_state: &RandomState,
+    hashes_buffer: &mut [u64],
+) {
+    assert_eq!(
+        hashes_buffer.len(),
+        array.len(),
+        "hashes_buffer and array should be of equal length"
+    );
+
+    let buffers = array.data_buffers();
+    let view_bytes = |view_len: u32, view: u128| {
+        let view = ByteView::from(view);
+        let offset = view.offset as usize;
+        // SAFETY: view is a valid view as it came from the array
+        unsafe {
+            let data = buffers.get_unchecked(view.buffer_index as usize);
+            data.get_unchecked(offset..offset + view_len as usize)
+        }
+    };
+
+    let hashes_and_views = hashes_buffer.iter_mut().zip(array.views().iter());

Review Comment:
   I think a cool next experiment could be fusing multi-column hashing (e.g. 
hash 2/3 columns at a time).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to