comphead commented on code in PR #16210: URL: https://github.com/apache/datafusion/pull/16210#discussion_r2125235875
########## datafusion/physical-plan/src/joins/nested_loop_join.rs: ########## @@ -810,6 +871,123 @@ fn build_join_indices( } } +// Find matching indices based on join `on` predicates +fn get_equijoin_match( + left_indices: UInt64Array, + right_indices: UInt32Array, + left_batch: &RecordBatch, + right_batch: &RecordBatch, + on: &Vec<(PhysicalExprRef, PhysicalExprRef)>, + null_equals_null: bool, +) -> Result<(UInt64Array, UInt32Array)> { + // Create the different `ArrayRef`s holding the values that were evaluated + // against each expression in the `on` predicate + let left_arrays: Vec<ArrayRef> = on + .iter() + .map(|(l, _)| l.evaluate(left_batch)) + .collect::<Result<Vec<ColumnarValue>>>()? + .into_iter() + .map(|cv| cv.into_array(left_batch.num_rows()).unwrap()) + .collect(); + let right_arrays: Vec<ArrayRef> = on + .iter() + .map(|(_, r)| r.evaluate(right_batch)) + .collect::<Result<Vec<ColumnarValue>>>()? + .into_iter() + .map(|cv| cv.into_array(right_batch.num_rows()).unwrap()) + .collect(); + + let mut out_l = UInt64Builder::new(); + let mut out_r = UInt32Builder::new(); + + // Goes through both left and right indices and compares the values + for (l_idx, r_idx) in left_indices.values().iter().zip(right_indices.values()) { + if compare_arrays( + &left_arrays, + *l_idx as usize, + &right_arrays, + *r_idx as usize, + null_equals_null, + )? { + out_l.append_value(*l_idx); + out_r.append_value(*r_idx); + } + } + + Ok((out_l.finish(), out_r.finish())) +} + +// Compares values in the array, returns true if match, false otherwise +fn compare_arrays( + left_arrays: &[ArrayRef], + left: usize, Review Comment: this reminds me of SMJ, the similar mechanism, probably we can factor it out? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org