NGA-TRAN commented on code in PR #18776:
URL: https://github.com/apache/datafusion/pull/18776#discussion_r2582786224
##########
datafusion/sqllogictest/test_files/tpch/answers/q1.slt.part:
##########
@@ -39,7 +39,3 @@ order by
l_returnflag,
l_linestatus;
----
-A F 3774200 5320753880.69 5054096266.6828 5256751331.449234 25.537587
36002.123829 0.050144 147790
-N F 95257 133737795.84 127132372.6512 132286291.229445 25.300664 35521.326916
0.049394 3765
-N O 7459297 10512270008.9 9986238338.3847 10385578376.585467 25.545537
36000.924688 0.050095 292000
-R F 3785523 5337950526.47 5071818532.942 5274405503.049367 25.525943
35994.029214 0.049989 148301
Review Comment:
Can you explain this change?
##########
datafusion/core/src/physical_planner.rs:
##########
@@ -1579,6 +1583,25 @@ impl DefaultPhysicalPlanner {
}
}
+fn has_sufficient_rows_for_repartition(
+ input: &Arc<dyn ExecutionPlan>,
+ session_state: &SessionState,
+) -> Result<bool> {
+ // Get partition statistics, default to repartitioning if unavailable
+ let stats = match input.partition_statistics(None) {
+ Ok(s) => s,
+ Err(_) => return Ok(true),
+ };
+
+ if let Some(num_rows) = stats.num_rows.get_value().copied() {
+ let batch_size = session_state.config().batch_size();
+
+ return Ok(num_rows >= batch_size);
Review Comment:
Nice
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]