comphead commented on code in PR #17300: URL: https://github.com/apache/datafusion/pull/17300#discussion_r2299349926
########## datafusion/physical-plan/src/joins/utils.rs: ########## @@ -1633,6 +1640,112 @@ pub fn swap_join_projection( } } +/// Updates `hash_map` with new entries from `batch` evaluated against the expressions `on` +/// using `offset` as a start value for `batch` row indices. +/// +/// `fifo_hashmap` sets the order of iteration over `batch` rows while updating hashmap, +/// which allows to keep either first (if set to true) or last (if set to false) row index +/// as a chain head for rows with equal hash values. +#[allow(clippy::too_many_arguments)] +pub fn update_hash( + on: &[PhysicalExprRef], + batch: &RecordBatch, + hash_map: &mut dyn JoinHashMapType, + offset: usize, + random_state: &RandomState, + hashes_buffer: &mut Vec<u64>, + deleted_offset: usize, + fifo_hashmap: bool, +) -> Result<()> { + // evaluate the keys + let keys_values = on + .iter() + .map(|c| c.evaluate(batch)?.into_array(batch.num_rows())) + .collect::<Result<Vec<_>>>()?; + + // calculate the hash values + let hash_values = create_hashes(&keys_values, random_state, hashes_buffer)?; + + // For usual JoinHashmap, the implementation is void. + hash_map.extend_zero(batch.num_rows()); + + // Updating JoinHashMap from hash values iterator + let hash_values_iter = hash_values + .iter() + .enumerate() + .map(|(i, val)| (i + offset, val)); + + if fifo_hashmap { + hash_map.update_from_iter(Box::new(hash_values_iter.rev()), deleted_offset); + } else { + hash_map.update_from_iter(Box::new(hash_values_iter), deleted_offset); + } + + Ok(()) +} + +pub fn equal_rows_arr( Review Comment: should it be pub or pub crate? if it is pub can we have description -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org