srielau commented on code in PR #53570:
URL: https://github.com/apache/spark/pull/53570#discussion_r2870569034


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala:
##########
@@ -84,6 +94,224 @@ class SessionCatalog(
   import SessionCatalog._
   import CatalogTypes.TablePartitionSpec
 
+  // Marker database name for temporary functions to distinguish from builtin 
functions
+  /**
+   * Database qualifier used to store temporary functions in the function 
registry.
+   * Temporary functions use composite keys to coexist with builtin functions 
of the same name:
+   * - Builtin functions: FunctionIdentifier(name, None)
+   * - Extension functions: FunctionIdentifier(name, None, None) (Treated as 
builtin)
+   * - Temp functions: FunctionIdentifier(name, 
Some(CatalogManager.SESSION_NAMESPACE))
+   * This allows all three to exist in the same registry without conflicts.
+   */
+  private val TEMP_FUNCTION_DB = CatalogManager.SESSION_NAMESPACE
+  private val EXTENSION_FUNCTION_DB = CatalogManager.EXTENSION_NAMESPACE
+
+  /**
+   * Creates a FunctionIdentifier for an extension function.
+   * Extension functions are treated as builtins and stored unqualified.
+   * This allows them to overwrite existing builtins if registered by power 
users.
+   *
+   * @param name The function name (unqualified)
+   * @return FunctionIdentifier without qualification (same as builtins)
+   */
+  private def extensionFunctionIdentifier(name: String): FunctionIdentifier =
+    FunctionIdentifier(format(name))
+
+  /**
+   * Creates a FunctionIdentifier for a temporary function with the 
TEMP_FUNCTION_DB database
+   * qualifier. This enables temporary functions to coexist with builtin 
functions of the same name.
+   *
+   * @param name The function name (unqualified)
+   * @return FunctionIdentifier with database = TEMP_FUNCTION_DB
+   */
+  private def tempFunctionIdentifier(name: String): FunctionIdentifier =
+    FunctionIdentifier(
+      format(name),
+      Some(TEMP_FUNCTION_DB),
+      Some(CatalogManager.SYSTEM_CATALOG_NAME))
+
+  /**
+   * Checks if a FunctionIdentifier represents an extension function by 
checking for the
+   * EXTENSION_FUNCTION_DB database qualifier.
+   *
+   * @param identifier The FunctionIdentifier to check
+   * @return true if this is an extension function identifier
+   */
+  private def isExtensionFunctionIdentifier(identifier: FunctionIdentifier): 
Boolean =
+    identifier.database.contains(EXTENSION_FUNCTION_DB)
+
+  /**
+   * Checks if a FunctionIdentifier represents a temporary function by 
checking for the
+   * TEMP_FUNCTION_DB database qualifier.
+   *
+   * @param identifier The FunctionIdentifier to check
+   * @return true if this is a temporary function identifier
+   */
+  private def isTempFunctionIdentifier(identifier: FunctionIdentifier): 
Boolean =
+    identifier.database.contains(TEMP_FUNCTION_DB)
+
+  // --------------------------------
+  // | PATH-Based Resolution        |
+  // --------------------------------
+  // Functions are resolved by searching through an ordered PATH of namespaces.
+  // This provides a unified, data-driven resolution mechanism instead of 
hardcoded checks.
+
+  /**
+   * Function resolution PATH - ordered list of namespaces to search for 
unqualified functions.
+   * Each entry is a FunctionIdentifier representing a namespace (catalog + 
database).
+   * The funcName field is unused (empty string) as these represent namespace 
templates.
+   *
+   * Resolution order follows the configured path: builtin then session 
(default "second"),
+   * or session then builtin (legacy "first"), or builtin only ("last" - 
session tried after
+   * persistent in FunctionResolution). Extension functions are stored and 
resolved in the
+   * builtin namespace; they are not a separate path step.
+   *
+   * When resolving a view, the system.session namespace is included in the 
path, but
+   * handleViewContext filters to only return temporary functions that were 
referred to
+   * when the view was created.
+   *
+   * These identifiers are cached for performance since they're accessed 
frequently.
+   */
+  private val SESSION_NAMESPACE_TEMPLATE = FunctionIdentifier(
+    funcName = "",
+    database = Some(CatalogManager.SESSION_NAMESPACE),
+    catalog = Some(CatalogManager.SYSTEM_CATALOG_NAME))
+
+  private val BUILTIN_NAMESPACE_TEMPLATE = FunctionIdentifier(
+    funcName = "",
+    database = Some(CatalogManager.BUILTIN_NAMESPACE),
+    catalog = Some(CatalogManager.SYSTEM_CATALOG_NAME))
+
+  // The resolution path: builtin -> session (session "second")
+  private val RESOLUTION_PATH = Seq(
+    BUILTIN_NAMESPACE_TEMPLATE,
+    SESSION_NAMESPACE_TEMPLATE
+  )
+
+  // Legacy resolution path: session -> builtin (session "first")
+  private val LEGACY_RESOLUTION_PATH = Seq(
+    SESSION_NAMESPACE_TEMPLATE,
+    BUILTIN_NAMESPACE_TEMPLATE
+  )
+
+  // Session last: only builtin (session tried after persistent in 
FunctionResolution)
+  private val SESSION_LAST_RESOLUTION_PATH = Seq(BUILTIN_NAMESPACE_TEMPLATE)
+
+  /**
+   * Returns the resolution path for function lookup.
+   * @return Ordered sequence of namespace identifiers.
+   */
+  private def resolutionPath(): Seq[FunctionIdentifier] = {
+    conf.sessionFunctionResolutionOrder match {
+      case "first" => LEGACY_RESOLUTION_PATH
+      case "last" => SESSION_LAST_RESOLUTION_PATH
+      case _ => RESOLUTION_PATH // "second" (default)
+    }
+  }
+
+  /**
+   * Maps a namespace template to an actual storage identifier for a specific 
function.
+   * This handles the asymmetry between how builtins, extensions, and temp 
functions are stored.
+   *
+   * Storage conventions:
+   * - Builtin functions: FunctionIdentifier(name, None, None)
+   * - Extension functions: FunctionIdentifier(name, None, None) (Treated as 
builtin)
+   * - Temp functions: FunctionIdentifier(name, Some("session"), 
Some("system"))
+   * - Other: FunctionIdentifier(name, namespace.database, namespace.catalog)
+   *
+   * @param namespace The namespace template
+   * @param name The function name
+   * @return The actual identifier to use for registry lookup
+   */
+  private def namespaceToIdentifier(
+      namespace: FunctionIdentifier,
+      name: String): FunctionIdentifier = {
+    namespace.database match {
+      case Some(CatalogManager.SESSION_NAMESPACE) =>

Review Comment:
   Non needed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to