Re: [PR] Round robin polling between tied winners in sort preserving merge [datafusion]

via GitHub Mon, 28 Oct 2024 02:02:56 -0700


jayzhan211 commented on code in PR #13133:
URL: https://github.com/apache/datafusion/pull/13133#discussion_r1818610267



##########
datafusion/physical-plan/src/sorts/sort_preserving_merge.rs:
##########
@@ -326,18 +343,87 @@ mod tests {
     use arrow::compute::SortOptions;
     use arrow::datatypes::{DataType, Field, Schema};
     use arrow::record_batch::RecordBatch;
+    use arrow_array::Int64Array;
     use arrow_schema::SchemaRef;
     use datafusion_common::{assert_batches_eq, assert_contains, 
DataFusionError};
     use datafusion_common_runtime::SpawnedTask;
     use datafusion_execution::config::SessionConfig;
+    use datafusion_execution::runtime_env::RuntimeEnvBuilder;
     use datafusion_execution::RecordBatchStream;
     use datafusion_physical_expr::expressions::Column;
     use datafusion_physical_expr::EquivalenceProperties;
     use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 
     use futures::{FutureExt, Stream, StreamExt};
+    use hashbrown::HashMap;
     use tokio::time::timeout;
 
+    fn generate_task_ctx_for_round_robin_tie_breaker() -> 
Result<Arc<TaskContext>> {
+        let mut pool_per_consumer = HashMap::new();
+        // Bytes from 660_000 to 30_000_000 (or even more) are all valid limits
+        pool_per_consumer.insert("RepartitionExec[0]".to_string(), 10_000_000);
+        pool_per_consumer.insert("RepartitionExec[1]".to_string(), 10_000_000);
+
+        let runtime = RuntimeEnvBuilder::new()
+            // Random large number for total mem limit, we only care about 
RepartitionExec only
+            .with_memory_limit_per_consumer(2_000_000_000, 1.0, 
pool_per_consumer)
+            .build_arc()?;
+        let config = SessionConfig::new();
+        let task_ctx = TaskContext::default()
+            .with_runtime(runtime)
+            .with_session_config(config);
+        Ok(Arc::new(task_ctx))
+    }
+    fn generate_spm_for_round_robin_tie_breaker(
+        enable_round_robin_repartition: bool,
+    ) -> Result<Arc<SortPreservingMergeExec>> {
+        let target_batch_size = 12500;
+        let row_size = 12500;
+        let a: ArrayRef = Arc::new(Int32Array::from(vec![1; row_size]));
+        let b: ArrayRef = Arc::new(StringArray::from_iter(vec![Some("a"); 
row_size]));
+        let c: ArrayRef = Arc::new(Int64Array::from_iter(vec![0; row_size]));
+        let rb = RecordBatch::try_from_iter(vec![("a", a), ("b", b), ("c", 
c)]).unwrap();
+
+        let rbs = (0..1024).map(|_| rb.clone()).collect::<Vec<_>>();
+
+        let schema = rb.schema();
+        let sort = vec![
+            PhysicalSortExpr {
+                expr: col("b", &schema).unwrap(),
+                options: Default::default(),
+            },
+            PhysicalSortExpr {
+                expr: col("c", &schema).unwrap(),
+                options: Default::default(),
+            },
+        ];
+
+        let exec = MemoryExec::try_new(&[rbs], schema, None).unwrap();
+        let repartition_exec =
+            RepartitionExec::try_new(Arc::new(exec), 
Partitioning::RoundRobinBatch(2))?;
+        let coalesce_batches_exec =
+            CoalesceBatchesExec::new(Arc::new(repartition_exec), 
target_batch_size);
+        let spm = SortPreservingMergeExec::new(sort, 
Arc::new(coalesce_batches_exec))
+            .with_round_robin_repartition(enable_round_robin_repartition);
+        Ok(Arc::new(spm))
+    }
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_round_robin_tie_breaker_success() -> Result<()> {
+        let task_ctx = generate_task_ctx_for_round_robin_tie_breaker()?;
+        let spm = generate_spm_for_round_robin_tie_breaker(true)?;
+        let _collected = collect(spm, task_ctx).await.unwrap();
+        Ok(())
+    }
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_round_robin_tie_breaker_fail() -> Result<()> {

Review Comment:
   It shrinks properly, but since repartition usually goes faster than SPM, so 
the data is accumulated and often larger than repartition. Even it shrinks 
properly at the end, the maximum memory is usually SPM not repartition.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Round robin polling between tied winners in sort preserving merge [datafusion]

Reply via email to