Copilot commented on code in PR #16434:
URL: https://github.com/apache/datafusion/pull/16434#discussion_r2156783610


##########
datafusion/physical-plan/src/joins/hash_join.rs:
##########
@@ -980,15 +981,26 @@ async fn collect_left_input(
         .await?;
 
     // Estimation of memory size, required for hashtable, prior to allocation.
-    // Final result can be verified using `RawTable.allocation_info()`
-    let fixed_size = size_of::<JoinHashMap>();
-    let estimated_hashtable_size =
-        estimate_memory_size::<(u64, u64)>(num_rows, fixed_size)?;
-
-    reservation.try_grow(estimated_hashtable_size)?;
-    metrics.build_mem_used.add(estimated_hashtable_size);
+    // Final result can be verifiedJoinHashMapTypele.allocation_info()`

Review Comment:
   Fix the typo in this comment. It should read something like: “Final result 
can be verified using `JoinHashMapType::allocation_info()`.”
   ```suggestion
       // Final result can be verified using 
`JoinHashMapType::allocation_info()`.
   ```



##########
datafusion/physical-plan/src/joins/stream_join_utils.rs:
##########
@@ -47,26 +51,45 @@ use hashbrown::HashTable;
 
 /// Implementation of `JoinHashMapType` for `PruningJoinHashMap`.
 impl JoinHashMapType for PruningJoinHashMap {
-    type NextType = VecDeque<u64>;
-
     // Extend with zero
     fn extend_zero(&mut self, len: usize) {
         self.next.resize(self.next.len() + len, 0)
     }
 
-    /// Get mutable references to the hash map and the next.
-    fn get_mut(&mut self) -> (&mut HashTable<(u64, u64)>, &mut Self::NextType) 
{
-        (&mut self.map, &mut self.next)
+    fn update_from_iter<'a>(
+        &mut self,
+        iter: Box<dyn Iterator<Item = (usize, &'a u64)> + Send + 'a>,
+        deleted_offset: usize,
+    ) {
+        let slice: &mut [u64] = self.next.make_contiguous();
+        update_from_iter::<u64>(&mut self.map, slice, iter, deleted_offset);
     }
 
-    /// Get a reference to the hash map.
-    fn get_map(&self) -> &HashTable<(u64, u64)> {
-        &self.map
+    fn get_matched_indices<'a>(
+        &self,
+        iter: Box<dyn Iterator<Item = (usize, &'a u64)> + 'a>,
+        deleted_offset: Option<usize>,
+    ) -> (Vec<u32>, Vec<u64>) {
+        // Flatten the deque
+        let next: Vec<u64> = self.next.iter().copied().collect();
+        get_matched_indices::<u64>(&self.map, &next, iter, deleted_offset)

Review Comment:
   [nitpick] Collecting the `VecDeque` into a new `Vec<u64>` on every call can 
be expensive. Consider reusing a buffer or accessing the deque’s contiguous 
slice when possible to reduce allocations.
   ```suggestion
           // Access the deque's slices directly
           let (head, tail) = self.next.as_slices();
           get_matched_indices::<u64>(&self.map, head, tail, iter, 
deleted_offset)
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to