rluvaton commented on code in PR #20482:
URL: https://github.com/apache/datafusion/pull/20482#discussion_r2845513653
##########
datafusion/physical-plan/src/joins/sort_merge_join/stream.rs:
##########
@@ -598,13 +600,49 @@ impl Stream for SortMergeJoinStream {
self.current_ordering = self.compare_streamed_buffered()?;
self.state = SortMergeJoinState::JoinOutput;
}
+ SortMergeJoinState::EmitReady { next_state } => {
+ // If have data to emit, emit it and if no more, change to
next
+
+ // Verify metadata alignment before checking if we have
batches to output
+ self.joined_record_batches
+ .filter_metadata
+ .debug_assert_metadata_aligned();
+
+ // For filtered joins, skip output and let Init state
handle it
+ if needs_deferred_filtering(&self.filter, self.join_type) {
+ self.state = next_state.as_ref().clone();
+ continue;
+ }
+
+ let maybe_next = next_state.as_ref().clone();
+
+ // For non-filtered joins, only output if we have a
completed batch
+ // (opportunistic output when target batch size is reached)
+ if self
+ .joined_record_batches
+ .joined_batches
+ .has_completed_batch()
+ {
+ let record_batch = self
Review Comment:
next refactor I have already
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]