varchar [spark]

via GitHub Tue, 31 Dec 2024 09:23:05 -0800


mihailom-db commented on code in PR #49340:
URL: https://github.com/apache/spark/pull/49340#discussion_r1900198425



##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala:
##########
@@ -91,7 +91,7 @@ class ToPrettyStringSuite extends SparkFunSuite with 
ExpressionEvalHelper {
   test("Char as pretty strings") {
     checkEvaluation(ToPrettyString(Literal.create('a', CharType(5))), "a")
     withSQLConf(SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key -> "true") {
-      checkEvaluation(ToPrettyString(Literal.create('a', CharType(5))), "a    
")
+      checkEvaluation(ToPrettyString(Literal.create('a', CharType(5))), "a")

Review Comment:
   Why this change? Shouldn't CharType stay padded as before.



##########
sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkCharVarcharUtils.scala:
##########
@@ -54,7 +54,8 @@ trait SparkCharVarcharUtils {
       StructType(fields.map { field =>
         field.copy(dataType = replaceCharVarcharWithString(field.dataType))
       })
-    case CharType(_) | VarcharType(_) if 
!SqlApiConf.get.preserveCharVarcharTypeInfo => StringType
+    case st: StringType if !SqlApiConf.get.preserveCharVarcharTypeInfo =>
+      new StringType(st.collationId, st.constraint)

Review Comment:
   Please, could you explain why this change is necessary? It does not seem 
anything intuitive why we do this, and also, it is a risky change, as we are 
chaning Char/Varchar from StringType object to something that previously 
returned StringType object and did not have any relation to the constraint. We 
should in general if we can, avoid changes like this.



##########
sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala:
##########
@@ -695,6 +695,42 @@ trait CharVarcharTestSuite extends QueryTest with 
SQLTestUtils {
       }
     }
   }
+
+  test(s"insert string literal into char/varchar column when " +
+    s"${SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key} is true") {
+    withSQLConf(SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key -> "true") {
+      withTable("t") {
+        sql(s"CREATE TABLE t(c1 CHAR(5), c2 VARCHAR(5)) USING $format")
+        sql("INSERT INTO t VALUES ('1234', '1234')")
+        checkAnswer(spark.table("t"), Row("1234 ", "1234"))
+        assertLengthCheckFailure("INSERT INTO t VALUES ('123456', '1')")
+        assertLengthCheckFailure("INSERT INTO t VALUES ('1', '123456')")
+      }
+    }
+  }
+
+  test(s"insert from string column into char/varchar column when " +
+    s"${SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key} is true") {
+    withSQLConf(SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key -> "true") {
+      withTable("a", "b") {
+        sql(s"CREATE TABLE a AS SELECT '1234' as c1, '1234' as c2")
+        sql(s"CREATE TABLE b(c1 CHAR(5), c2 VARCHAR(5)) USING $format")
+        sql("INSERT INTO b SELECT * FROM a")
+        checkAnswer(spark.table("b"), Row("1234 ", "1234"))
+        spark.table("b").show()
+      }
+    }
+  }
+
+  test("implicitly cast char/varchar into atomics") {

Review Comment:
   If you say atomics, could you add other types as well, other than boolean.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/PredicateResolver.scala:
##########
@@ -17,18 +17,7 @@
 
 package org.apache.spark.sql.catalyst.analysis.resolver
 
-import org.apache.spark.sql.catalyst.analysis.{
-  AnsiStringPromotionTypeCoercion,
-  AnsiTypeCoercion,
-  ApplyCharTypePaddingHelper,
-  BooleanEqualityTypeCoercion,
-  CollationTypeCoercion,
-  DecimalPrecisionTypeCoercion,
-  DivisionTypeCoercion,
-  IntegralDivisionTypeCoercion,
-  StringPromotionTypeCoercion,
-  TypeCoercion
-}
+import 
org.apache.spark.sql.catalyst.analysis.{AnsiStringPromotionTypeCoercion, 
AnsiTypeCoercion, ApplyCharTypePaddingHelper, BooleanEqualityTypeCoercion, 
CollationTypeCoercion, DecimalPrecisionTypeCoercion, DivisionTypeCoercion, 
IntegralDivisionTypeCoercion, StringPromotionTypeCoercion, TypeCoercion}

Review Comment:
   Please revet this change.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Re: [PR] [SPARK-50707] Enable casting to/from char/varchar [spark]

Reply via email to