davidm-db commented on code in PR #54223: URL: https://github.com/apache/spark/pull/54223#discussion_r2871943279
########## sql/api/src/main/scala/org/apache/spark/sql/types/ops/TypeApiOps.scala: ########## @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.types.ops + +import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder +import org.apache.spark.sql.internal.SqlApiConf +import org.apache.spark.sql.types.{DataType, TimeType} +import org.apache.spark.unsafe.types.UTF8String + +/** + * Client-side (spark-api) type operations for the Types Framework. + * + * This trait consolidates all client-side operations that a data type must implement to be usable + * in the Spark SQL API layer. All methods are mandatory because a type cannot function correctly + * without string formatting (needed for CAST to STRING, EXPLAIN, SHOW) or encoding (needed for + * Dataset[T] operations). + * + * This single-interface design was chosen over separate FormatTypeOps/EncodeTypeOps traits to + * make it clear what a new type must implement - there is one mandatory interface, and it + * contains everything required. Optional capabilities (e.g., proto, Arrow, JDBC) are defined as + * separate traits that can be mixed in incrementally. + * + * RELATIONSHIP TO TypeOps: + * - TypeOps (catalyst): Server-side operations - physical types, literals, conversions + * - TypeApiOps (spark-api): Client-side operations - formatting, encoding + * + * The split exists because sql/api cannot depend on sql/catalyst. For TimeType, TimeTypeOps + * (catalyst) extends TimeTypeApiOps (sql-api) to inherit both sets of operations. + * + * @see + * TimeTypeApiOps for reference implementation + * @since 4.2.0 + */ +trait TypeApiOps extends Serializable { + + /** The DataType this Ops instance handles. */ + def dataType: DataType + + // ==================== String Formatting ==================== + + /** + * Formats an internal value as a display string. + * + * Used by CAST to STRING, EXPLAIN output, SHOW commands. + * + * @param v + * the internal value (e.g., Long nanoseconds for TimeType) + * @return + * formatted string (e.g., "10:30:45.123456") + */ + def format(v: Any): String Review Comment: at the moment, it's only called from the `formatUTF8` below, but in one of the later phases it will be called from `Row.scala` (client/api) and `HiveResult.scala` (core). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
