mihailom-db commented on code in PR #46003:
URL: https://github.com/apache/spark/pull/46003#discussion_r1568431472
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala:
##########
@@ -804,21 +804,26 @@ case class Overlay(input: Expression, replace:
Expression, pos: Expression, len:
override def dataType: DataType = input.dataType
- override def inputTypes: Seq[AbstractDataType] =
Seq(TypeCollection(StringType, BinaryType),
- TypeCollection(StringType, BinaryType), IntegerType, IntegerType)
+ override def inputTypes: Seq[AbstractDataType] = Seq(
Review Comment:
Should we add this one to CollationTypeCasts?
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala:
##########
@@ -804,21 +804,26 @@ case class Overlay(input: Expression, replace:
Expression, pos: Expression, len:
override def dataType: DataType = input.dataType
- override def inputTypes: Seq[AbstractDataType] =
Seq(TypeCollection(StringType, BinaryType),
- TypeCollection(StringType, BinaryType), IntegerType, IntegerType)
+ override def inputTypes: Seq[AbstractDataType] = Seq(
+ TypeCollection(StringTypeAnyCollation, BinaryType),
+ TypeCollection(StringTypeAnyCollation, BinaryType), IntegerType,
IntegerType)
override def checkInputDataTypes(): TypeCheckResult = {
val inputTypeCheck = super.checkInputDataTypes()
if (inputTypeCheck.isSuccess) {
- TypeUtils.checkForSameTypeInputExpr(
- input.dataType :: replace.dataType :: Nil, prettyName)
+ if ((input.dataType.isInstanceOf[StringType] &&
replace.dataType.isInstanceOf[StringType]) ||
Review Comment:
This should be fixed with addition to CollationTypeCasts.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala:
##########
@@ -1698,10 +1703,10 @@ case class FormatString(children: Expression*) extends
Expression with ImplicitC
override def foldable: Boolean = children.forall(_.foldable)
override def nullable: Boolean = children(0).nullable
- override def dataType: DataType = StringType
+ override def dataType: DataType = SQLConf.get.defaultStringType
Review Comment:
I would argue this should take the collation of children(0).dataType. Also
this might need to go to casting rules as well, as you can have multiple
strings in the format replacements.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala:
##########
@@ -1698,10 +1703,10 @@ case class FormatString(children: Expression*) extends
Expression with ImplicitC
override def foldable: Boolean = children.forall(_.foldable)
override def nullable: Boolean = children(0).nullable
- override def dataType: DataType = StringType
+ override def dataType: DataType = SQLConf.get.defaultStringType
Review Comment:
Also I am not sure if we should do any casting here in the end, as I would
argue only first parameter dictates the collation.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala:
##########
@@ -2320,9 +2328,9 @@ case class Levenshtein(
case class SoundEx(child: Expression)
extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
- override def dataType: DataType = StringType
+ override def dataType: DataType = SQLConf.get.defaultStringType
Review Comment:
Again not sure if this should return default or pass through. Maybe some
more input from others could help us resolve this discussion on when we want to
do pass through and when we want to return default.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]