dejankrak-db commented on code in PR #49103:
URL: https://github.com/apache/spark/pull/49103#discussion_r1899564426


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCoercion.scala:
##########
@@ -461,6 +421,66 @@ object CollationTypeCoercion {
         else right
     }
   }
+
+  /**
+   * Throws an analysis exception if the new data type has indeterminate 
collation,
+   * and the expression is not allowed to have inputs with indeterminate 
collations.
+   */
+  private def checkIndeterminateCollation(expression: Expression, newDataType: 
DataType): Unit = {
+    if (shouldFailWithIndeterminateCollation(expression, newDataType)) {
+      expression.failAnalysis(
+        errorClass = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+        messageParameters = Map("expr" -> toSQLExpr(expression)))
+    }
+  }
+
+  /**
+   * Returns whether the given expression which isn't allowed to have inputs 
with indeterminate
+   * collations has indeterminate collation.
+   */
+  private def shouldFailWithIndeterminateCollation(expression: Expression): 
Boolean = {
+    def getDataTypeSafe(e: Expression): DataType = try {
+      e.dataType
+    } catch {
+      case _: Throwable => NullType
+    }
+
+    expression.children.exists(child => expression.resolved &&
+      shouldFailWithIndeterminateCollation(expression, getDataTypeSafe(child)))
+  }
+
+  /**
+   * Returns whether the given expression should fail with indeterminate 
collation if it is cast
+   * to the given data type.
+   */
+  private def shouldFailWithIndeterminateCollation(
+      expression: Expression,
+      dataType: DataType): Boolean = {
+    !canContainIndeterminateCollation(expression) && 
hasIndeterminateCollation(dataType)
+  }
+
+  /**
+   * Returns whether the given data type has indeterminate collation.
+   */
+  private def hasIndeterminateCollation(dataType: DataType): Boolean = {
+    dataType.existsRecursively {
+      case IndeterminateStringType | StringTypeWithContext(_, Indeterminate) 
=> true
+      case _ => false
+    }
+  }
+
+  /**
+   * Returns whether the given expression can contain indeterminate collation.
+   */
+  private def canContainIndeterminateCollation(expr: Expression): Boolean = 
expr match {
+    // This is not an exhaustive list, and it's fine to miss some expressions. 
The only difference

Review Comment:
   Well, I wouldn't use the wording that 'it's fine to miss some expressions', 
as I assume that we want to encourage engineer adding a new expression that 
cannot contain indeterminate collation to add it to the list below. An 
additional explanation can be provided that otherwise, if the expression is not 
added to the list but cannot contain indeterminate collation, it will still 
fail at runtime, as already pointed out.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to