allisonwang-db commented on code in PR #49471: URL: https://github.com/apache/spark/pull/49471#discussion_r1984402797
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala: ########## @@ -2655,6 +2656,93 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor } } + /* + * This rule resolves SQL table functions. + */ + object ResolveSQLTableFunctions extends Rule[LogicalPlan] with AliasHelper { + + /** + * Check if a subquery plan is subject to the COUNT bug that can cause wrong results. + * A lateral correlation can only be removed if the lateral subquery is not subject to + * the COUNT bug. Currently only lateral correlation can handle it correctly. + */ + private def hasCountBug(sub: LogicalPlan): Boolean = sub.find { + // The COUNT bug occurs when there is an Aggregate that satisfies all the following + // conditions: + // 1) is on the correlation path + // 2) has non-empty group by expressions + // 3) has one or more output columns that evaluate to non-null values with empty input. + // E.g: COUNT(empty row) = 0. + // For simplicity, we use a stricter criteria (1 and 2 only) to determine if a query + // is subject to the COUNT bug. + case a: Aggregate if a.groupingExpressions.nonEmpty => hasOuterReferences(a.child) Review Comment: https://github.com/apache/spark/blob/1c14b06f8224d0f5db072bdeabb1f36d7359b05a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ValidateSubqueryExpression.scala#L434-L439 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org