alamb commented on code in PR #15017: URL: https://github.com/apache/datafusion/pull/15017#discussion_r1981241095
########## datafusion/core/tests/memory_limit/mod.rs: ########## @@ -468,6 +468,31 @@ async fn test_stringview_external_sort() { let _ = df.collect().await.expect("Query execution failed"); } +/// This test case is for the regression case: Review Comment: I don't understand the reference here to "regression" (which refers normally to something that stopped working when it worked before) Maybe a better description would be "test_in_mem_buffer_almost_full" or something 🤔 ########## datafusion/physical-plan/src/sorts/sort.rs: ########## @@ -332,18 +332,20 @@ impl ExternalSorter { /// /// 2. A combined streaming merge incorporating both in-memory /// batches and data from spill files on disk. - fn sort(&mut self) -> Result<SendableRecordBatchStream> { + async fn sort(&mut self) -> Result<SendableRecordBatchStream> { // Release the memory reserved for merge back to the pool so // there is some left when `in_mem_sort_stream` requests an // allocation. self.merge_reservation.free(); if self.spilled_before() { let mut streams = vec![]; + + // Sort `in_mem_batches` and spill it first. If there are many Review Comment: 👍 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org