cloud-fan commented on code in PR #50402: URL: https://github.com/apache/spark/pull/50402#discussion_r2065848650
########## sql/core/src/main/scala/org/apache/spark/sql/execution/UnionLoopExec.scala: ########## @@ -107,9 +110,33 @@ case class UnionLoopExec( plan } val df = Dataset.ofRows(session, planWithLimit) - val materializedDF = df.repartition() - val count = materializedDF.queryExecution.toRdd.count() - (materializedDF, count) + + df.queryExecution.optimizedPlan match { + case l: LocalRelation => + (df, l.data.length.toLong) + case Project(_, _: OneRowRelation) => + if (localRelationLimit != 0) { + val local = LocalRelation.fromExternalRows(anchor.output, df.collect().toIndexedSeq) + (Dataset.ofRows(session, local), 1.toLong) + } else { + (df, 1.toLong) + } + case _ => + val materializedDF = df.repartition() + val count = materializedDF.queryExecution.toRdd.count() + + // In the case we return a sufficiently small number of rows when executing any step of the + // recursion we convert the result into a LocalRelation, so that, if the recursion doesn't + // reference any external tables, we are able to calculate everything in the optimizer, + // using the ConvertToLocalRelation rule, which significantly improves runtime. + if (count <= localRelationLimit) { + val local = LocalRelation.fromExternalRows(anchor.output, df.collect().toIndexedSeq) + (Dataset.ofRows(session, local), count) + } + else { Review Comment: ```suggestion } else { ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org