milanisvet commented on code in PR #49518:
URL: https://github.com/apache/spark/pull/49518#discussion_r1925339756


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala:
##########
@@ -183,4 +184,52 @@ object ResolveWithCTE extends Rule[LogicalPlan] {
         columnNames.map(UnresolvedSubqueryColumnAliases(_, ref)).getOrElse(ref)
     }
   }
+
+  /**
+   * Checks if data types of anchor and recursive terms of a recursive CTE 
definition match.
+   */
+  def checkDataTypesAnchorAndRecursiveTerm(unionLoop: UnionLoop): Unit = {
+    val anchorOutputDatatypes = unionLoop.anchor.output.map(_.dataType)
+    val recursiveTermOutputDatatypes = 
unionLoop.recursion.output.map(_.dataType)
+
+    if (!anchorOutputDatatypes.zip(recursiveTermOutputDatatypes).forall {
+      case (anchorDT, recursionDT) => DataType.equalsStructurally(anchorDT, 
recursionDT, true)
+    }) {
+      throw new AnalysisException(
+        errorClass = "INVALID_RECURSIVE_REFERENCE.DATA_TYPE",
+        messageParameters = Map.empty)
+    }
+  }
+
+  /**
+   * Throws error if self-reference is placed in places which are not allowed:
+   * right side of left outer/semi/anti joins, left side of right outer joins,
+   * in full outer joins and in aggregates
+   */
+  def checkIfSelfReferenceIsPlacedCorrectly(unionLoop: UnionLoop): Unit = {
+    def unionLoopRefNotAllowedUnderCurrentNode(currentNode: LogicalPlan) : 
Unit =
+      currentNode.foreach {
+        case UnionLoopRef(unionLoop.id, _, _) =>
+          throw new AnalysisException(
+            errorClass = "INVALID_RECURSIVE_REFERENCE.PLACE",
+            messageParameters = Map.empty)
+        case other => ()
+      }
+    unionLoop.foreach {
+      case Join(left, right, LeftOuter, _, _) =>

Review Comment:
   Hm, but I think also SQL standard specifies where the recursion is not 
allowed rather than where it is allowed. 
   For me it seems that there are much more nodes which are allowed than not. 
Especially with the joins where only a specific side is not allowed depending 
on the join



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala:
##########
@@ -183,4 +184,52 @@ object ResolveWithCTE extends Rule[LogicalPlan] {
         columnNames.map(UnresolvedSubqueryColumnAliases(_, ref)).getOrElse(ref)
     }
   }
+
+  /**
+   * Checks if data types of anchor and recursive terms of a recursive CTE 
definition match.
+   */
+  def checkDataTypesAnchorAndRecursiveTerm(unionLoop: UnionLoop): Unit = {
+    val anchorOutputDatatypes = unionLoop.anchor.output.map(_.dataType)
+    val recursiveTermOutputDatatypes = 
unionLoop.recursion.output.map(_.dataType)
+
+    if (!anchorOutputDatatypes.zip(recursiveTermOutputDatatypes).forall {
+      case (anchorDT, recursionDT) => DataType.equalsStructurally(anchorDT, 
recursionDT, true)
+    }) {
+      throw new AnalysisException(
+        errorClass = "INVALID_RECURSIVE_REFERENCE.DATA_TYPE",
+        messageParameters = Map.empty)
+    }
+  }
+
+  /**
+   * Throws error if self-reference is placed in places which are not allowed:
+   * right side of left outer/semi/anti joins, left side of right outer joins,
+   * in full outer joins and in aggregates
+   */
+  def checkIfSelfReferenceIsPlacedCorrectly(unionLoop: UnionLoop): Unit = {
+    def unionLoopRefNotAllowedUnderCurrentNode(currentNode: LogicalPlan) : 
Unit =
+      currentNode.foreach {
+        case UnionLoopRef(unionLoop.id, _, _) =>
+          throw new AnalysisException(
+            errorClass = "INVALID_RECURSIVE_REFERENCE.PLACE",
+            messageParameters = Map.empty)
+        case other => ()
+      }
+    unionLoop.foreach {
+      case Join(left, right, LeftOuter, _, _) =>

Review Comment:
   Hm, but I think also SQL standard specifies also where the recursion is not 
allowed rather than where it is allowed. 
   For me it seems that there are much more nodes which are allowed than not. 
Especially with the joins where only a specific side is not allowed depending 
on the join



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to