Kontinuation commented on code in PR #14644: URL: https://github.com/apache/datafusion/pull/14644#discussion_r1955829354
########## datafusion/physical-plan/src/sorts/sort.rs: ########## @@ -408,50 +395,114 @@ impl ExternalSorter { debug!("Spilling sort data of ExternalSorter to disk whilst inserting"); - self.in_mem_sort().await?; - let spill_file = self.runtime.disk_manager.create_tmp_file("Sorting")?; let batches = std::mem::take(&mut self.in_mem_batches); - let spilled_rows = spill_record_batches( + let (spilled_rows, spilled_bytes) = spill_record_batches( batches, spill_file.path().into(), Arc::clone(&self.schema), )?; let used = self.reservation.free(); self.metrics.spill_count.add(1); - self.metrics.spilled_bytes.add(used); + self.metrics.spilled_bytes.add(spilled_bytes); self.metrics.spilled_rows.add(spilled_rows); self.spills.push(spill_file); Ok(used) } /// Sorts the in_mem_batches in place - async fn in_mem_sort(&mut self) -> Result<()> { - if self.in_mem_batches_sorted { - return Ok(()); - } - + /// + /// Sorting may have freed memory, especially if fetch is `Some`. If + /// the memory usage has dropped by a factor of 2, then we don't have + /// to spill. Otherwise, we spill to free up memory for inserting + /// more batches. + /// + /// The factor of 2 aims to avoid a degenerate case where the + /// memory required for `fetch` is just under the memory available, + // causing repeated re-sorting of data + async fn sort_or_spill_in_mem_batches(&mut self) -> Result<()> { // Release the memory reserved for merge back to the pool so - // there is some left when `in_memo_sort_stream` requests an + // there is some left when `in_mem_sort_stream` requests an // allocation. self.merge_reservation.free(); - self.in_mem_batches = self - .in_mem_sort_stream(self.metrics.baseline.intermediate())? - .try_collect() - .await?; + let before = self.reservation.size(); + + let mut sorted_stream = + self.in_mem_sort_stream(self.metrics.baseline.intermediate())?; + + // `self.in_mem_batches` is already taken away by the sort_stream, now it is empty. + // We'll gradually collect the sorted stream into self.in_mem_batches, or directly + // write sorted batches to disk when the memory is insufficient. + let mut spill_writer: Option<IPCWriter> = None; + // Leave at least 1/3 of spill reservation for sort/merge the next batch. Here the + // 1/3 is simply an arbitrary chosen number. Review Comment: I found that `SortPreservingMergeStream` will never reserve memory after producing the first merged batch when merging in-memory batches, so there's no need to reserve additional space for merging in this place. I have removed the additional memory reservation in a new commit. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org