dejankrak-db commented on code in PR #49103: URL: https://github.com/apache/spark/pull/49103#discussion_r1899564426
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCoercion.scala: ########## @@ -461,6 +421,66 @@ object CollationTypeCoercion { else right } } + + /** + * Throws an analysis exception if the new data type has indeterminate collation, + * and the expression is not allowed to have inputs with indeterminate collations. + */ + private def checkIndeterminateCollation(expression: Expression, newDataType: DataType): Unit = { + if (shouldFailWithIndeterminateCollation(expression, newDataType)) { + expression.failAnalysis( + errorClass = "INDETERMINATE_COLLATION_IN_EXPRESSION", + messageParameters = Map("expr" -> toSQLExpr(expression))) + } + } + + /** + * Returns whether the given expression which isn't allowed to have inputs with indeterminate + * collations has indeterminate collation. + */ + private def shouldFailWithIndeterminateCollation(expression: Expression): Boolean = { + def getDataTypeSafe(e: Expression): DataType = try { + e.dataType + } catch { + case _: Throwable => NullType + } + + expression.children.exists(child => expression.resolved && + shouldFailWithIndeterminateCollation(expression, getDataTypeSafe(child))) + } + + /** + * Returns whether the given expression should fail with indeterminate collation if it is cast + * to the given data type. + */ + private def shouldFailWithIndeterminateCollation( + expression: Expression, + dataType: DataType): Boolean = { + !canContainIndeterminateCollation(expression) && hasIndeterminateCollation(dataType) + } + + /** + * Returns whether the given data type has indeterminate collation. + */ + private def hasIndeterminateCollation(dataType: DataType): Boolean = { + dataType.existsRecursively { + case IndeterminateStringType | StringTypeWithContext(_, Indeterminate) => true + case _ => false + } + } + + /** + * Returns whether the given expression can contain indeterminate collation. + */ + private def canContainIndeterminateCollation(expr: Expression): Boolean = expr match { + // This is not an exhaustive list, and it's fine to miss some expressions. The only difference Review Comment: Well, I wouldn't use the wording that 'it's fine to miss some expressions', as I assume that we want to encourage engineer adding a new expression that cannot contain indeterminate collation to add it to the list below. An additional explanation can be provided that otherwise, if the expression is not added to the list but cannot contain indeterminate collation, it will still fail at runtime, as already pointed out. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org