milanisvet commented on code in PR #49518: URL: https://github.com/apache/spark/pull/49518#discussion_r1922678467
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala: ########## @@ -1043,6 +1044,75 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB if (Utils.isTesting) scrubOutIds(result) else result } + /** + * Recursion, according to SQL standard, comes with several limitations: + * 1. Recursive term can contain one recursive reference only. + * 2. Recursive reference can't be used in some kinds of joins and aggregations. + * This rule checks that these restrictions are not violated. + */ + private def checkRecursion( + plan: LogicalPlan, + references: mutable.Map[Long, (Int, Seq[DataType])] = mutable.Map.empty): Unit = { + plan match { + // The map is filled with UnionLoop id as key and 0 (number of Ref occasions) and datatype + // as value + case UnionLoop(id, anchor, recursion, _) => + checkRecursion(anchor, references) + checkRecursion(recursion, references += id -> (0, anchor.output.map(_.dataType))) + references -= id + case r @ UnionLoopRef(loopId, output, false) => + // If we encounter a recursive reference, it has to be present in the map + if (!references.contains(loopId)) { + r.failAnalysis( + errorClass = "INVALID_RECURSIVE_REFERENCE.PLACE", + messageParameters = Map.empty + ) + } + val (count, dataType) = references(loopId) + if (count > 0) { + r.failAnalysis( + errorClass = "INVALID_RECURSIVE_REFERENCE.NUMBER", + messageParameters = Map.empty + ) + } + val originalDataType = r.output.map(_.dataType) + if (!originalDataType.zip(dataType).forall { + case (odt, dt) => DataType.equalsStructurally(odt, dt, true) Review Comment: Now thinking about this, I am not sure which equality check should be actually be used. This one is taken from the initial Peter's code. I will try to consult somebody else to hear their opionion. Or if you have any opinion let me know. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org