Re: [PR] [SPARK-50883][SQL] Support altering multiple columns in the same command [spark]

via GitHub Tue, 21 Jan 2025 11:07:02 -0800


scovich commented on code in PR #49559:
URL: https://github.com/apache/spark/pull/49559#discussion_r1924223680



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala:
##########
@@ -1622,60 +1622,84 @@ trait CheckAnalysis extends PredicateHelper with 
LookupCatalog with QueryErrorsB
       case RenameColumn(table: ResolvedTable, col: ResolvedFieldName, newName) 
=>
         checkColumnNotExists("rename", col.path :+ newName, table.schema)
 
-      case a @ AlterColumn(table: ResolvedTable, col: ResolvedFieldName, _, _, 
_, _, _) =>
-        val fieldName = col.name.quoted
-        if (a.dataType.isDefined) {
-          val field = CharVarcharUtils.getRawType(col.field.metadata)
-            .map(dt => col.field.copy(dataType = dt))
-            .getOrElse(col.field)
-          val newDataType = a.dataType.get
-          newDataType match {
-            case _: StructType => alter.failAnalysis(
-              "CANNOT_UPDATE_FIELD.STRUCT_TYPE",
-              Map("table" -> toSQLId(table.name), "fieldName" -> 
toSQLId(fieldName)))
-            case _: MapType => alter.failAnalysis(
-              "CANNOT_UPDATE_FIELD.MAP_TYPE",
-              Map("table" -> toSQLId(table.name), "fieldName" -> 
toSQLId(fieldName)))
-            case _: ArrayType => alter.failAnalysis(
-              "CANNOT_UPDATE_FIELD.ARRAY_TYPE",
-              Map("table" -> toSQLId(table.name), "fieldName" -> 
toSQLId(fieldName)))
-            case u: UserDefinedType[_] => alter.failAnalysis(
-              "CANNOT_UPDATE_FIELD.USER_DEFINED_TYPE",
-              Map(
-                "table" -> toSQLId(table.name),
-                "fieldName" -> toSQLId(fieldName),
-                "udtSql" -> toSQLType(u)))
-            case _: CalendarIntervalType | _: AnsiIntervalType => 
alter.failAnalysis(
-              "CANNOT_UPDATE_FIELD.INTERVAL_TYPE",
-              Map("table" -> toSQLId(table.name), "fieldName" -> 
toSQLId(fieldName)))
-            case _ => // update is okay
-          }
-
-          // We don't need to handle nested types here which shall fail before.
-          def canAlterColumnType(from: DataType, to: DataType): Boolean = 
(from, to) match {
-            case (CharType(l1), CharType(l2)) => l1 == l2
-            case (CharType(l1), VarcharType(l2)) => l1 <= l2
-            case (VarcharType(l1), VarcharType(l2)) => l1 <= l2
-            case _ => Cast.canUpCast(from, to)
-          }
-          if (!canAlterColumnType(field.dataType, newDataType)) {
+      case AlterColumns(table: ResolvedTable, columns, specs) =>
+        val groupedColumns = columns.groupBy(_.name)
+        groupedColumns.collect {
+          case (name, occurrences) if occurrences.length > 1 =>
             alter.failAnalysis(
-              errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
+              errorClass = "NOT_SUPPORTED_CHANGE_SAME_COLUMN",
               messageParameters = Map(
                 "table" -> toSQLId(table.name),
-                "originName" -> toSQLId(fieldName),
-                "originType" -> toSQLType(field.dataType),
-                "newName" -> toSQLId(fieldName),
-                "newType" -> toSQLType(newDataType)))
-          }
+                "fieldName" -> toSQLId(name)))
         }
-        if (a.nullable.isDefined) {
-          if (!a.nullable.get && col.field.nullable) {
+        groupedColumns.keys.foreach { name =>
+          val child = groupedColumns.keys.find(child => child != name && 
child.startsWith(name))

Review Comment:
   I think this is trying to handle e.g. `x.y.z` vs. `x.y`, but wouldn't this 
check also flag two (non-nested) fields where one happens to be a prefix of the 
other? e.g. `customer` and `customer_status`



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala:
##########
@@ -201,49 +201,55 @@ case class RenameColumn(
     copy(table = newChild)
 }
 
-/**
- * The logical plan of the ALTER TABLE ... ALTER COLUMN command.
- */
-case class AlterColumn(
-    table: LogicalPlan,
-    column: FieldName,
+case class AlterColumnSpec(
     dataType: Option[DataType],
     nullable: Option[Boolean],
     comment: Option[String],
     position: Option[FieldPosition],
-    setDefaultExpression: Option[String]) extends AlterTableCommand {
+    setDefaultExpression: Option[String])
+
+/**
+ * The logical plan of the ALTER TABLE ... ALTER COLUMN command.
+ */
+case class AlterColumns(
+    table: LogicalPlan,
+    columns: Seq[FieldName],
+    specs: Seq[AlterColumnSpec]) extends AlterTableCommand {
   override def changes: Seq[TableChange] = {
-    require(column.resolved, "FieldName should be resolved before it's 
converted to TableChange.")
-    val colName = column.name.toArray
-    val typeChange = dataType.map { newDataType =>
-      TableChange.updateColumnType(colName, newDataType)
-    }
-    val nullabilityChange = nullable.map { nullable =>
-      TableChange.updateColumnNullability(colName, nullable)
-    }
-    val commentChange = comment.map { newComment =>
-      TableChange.updateColumnComment(colName, newComment)
-    }
-    val positionChange = position.map { newPosition =>
-      require(newPosition.resolved,
-        "FieldPosition should be resolved before it's converted to 
TableChange.")
-      TableChange.updateColumnPosition(colName, newPosition.position)
-    }
-    val defaultValueChange = setDefaultExpression.map { newDefaultExpression =>
-      if (newDefaultExpression.nonEmpty) {
-        // SPARK-45075: We call 'ResolveDefaultColumns.analyze' here to make 
sure that the default
-        // value parses successfully, and return an error otherwise
-        val newDataType = 
dataType.getOrElse(column.asInstanceOf[ResolvedFieldName].field.dataType)
-        ResolveDefaultColumns.analyze(column.name.last, newDataType, 
newDefaultExpression,
-          "ALTER TABLE ALTER COLUMN")
+    assert(columns.size == specs.size)
+    columns.zip(specs).flatMap { case (column, spec) =>
+      require(column.resolved, "FieldName should be resolved before it's 
converted to TableChange.")
+      val colName = column.name.toArray
+      val typeChange = spec.dataType.map { newDataType =>
+        TableChange.updateColumnType(colName, newDataType)
+      }
+      val nullabilityChange = spec.nullable.map { nullable =>
+        TableChange.updateColumnNullability(colName, nullable)
       }
-      TableChange.updateColumnDefaultValue(colName, newDefaultExpression)
+      val commentChange = spec.comment.map { newComment =>
+        TableChange.updateColumnComment(colName, newComment)
+      }
+      val positionChange = spec.position.map { newPosition =>
+        require(newPosition.resolved,
+          "FieldPosition should be resolved before it's converted to 
TableChange.")
+        TableChange.updateColumnPosition(colName, newPosition.position)
+      }
+      val defaultValueChange = spec.setDefaultExpression.map { 
newDefaultExpression =>
+        if (newDefaultExpression.nonEmpty) {
+          // SPARK-45075: We call 'ResolveDefaultColumns.analyze' here to make 
sure that the default
+          // value parses successfully, and return an error otherwise
+          val newDataType = spec.dataType.getOrElse(
+            column.asInstanceOf[ResolvedFieldName].field.dataType)
+          ResolveDefaultColumns.analyze(column.name.last, newDataType, 
newDefaultExpression,
+            "ALTER TABLE ALTER COLUMN")
+        }
+        TableChange.updateColumnDefaultValue(colName, newDefaultExpression)
+      }
+      typeChange.toSeq ++ nullabilityChange ++ commentChange ++ positionChange 
++ defaultValueChange
     }
-    typeChange.toSeq ++ nullabilityChange ++ commentChange ++ positionChange 
++ defaultValueChange
   }
 
-  override protected def withNewChildInternal(newChild: LogicalPlan): 
LogicalPlan =
-    copy(table = newChild)
+  override protected def withNewChildInternal(newChild: LogicalPlan): 
LogicalPlan = copy(newChild)

Review Comment:
   how does this change work? Is there some overload of `copy` available that 
only takes a table?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Re: [PR] [SPARK-50883][SQL] Support altering multiple columns in the same command [spark]

Reply via email to