szehon-ho commented on code in PR #49962: URL: https://github.com/apache/spark/pull/49962#discussion_r1964194484
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala: ########## @@ -340,12 +341,44 @@ object ResolveDefaultColumns extends QueryErrorsBase throw QueryCompilationErrors.defaultValuesMayNotContainSubQueryExpressions( "", field.name, defaultSQL) } - if (!expr.resolved) { - throw QueryCompilationErrors.defaultValuesUnresolvedExprError( - "", field.name, defaultSQL, null) + + val resolvedExpr = expr match { + case _: ExprLiteral => expr + case c: Cast if c.resolved => expr + case _ => + fallbackResolveExistenceDefaultValue(field) } - coerceDefaultValue(expr, field.dataType, "", field.name, defaultSQL) + coerceDefaultValue(resolvedExpr, field.dataType, "", field.name, defaultSQL) + } + + // In most cases, column existsDefault should already be persisted as resolved + // and constant-folded literal sql, but because they are fetched from external catalog, + // it is possible that this assumption does not hold, so we fallback to full analysis + // if we encounter an unresolved existsDefault + private def fallbackResolveExistenceDefaultValue( + field: StructField): Expression = { + field.getExistenceDefaultValue().map { defaultSQL: String => + + logWarning(log"Encountered unresolved exists default value: " + + log"'${MDC(COLUMN_DEFAULT_VALUE, defaultSQL)}' " + + log"for column ${MDC(COLUMN_NAME, field.name)} " + + log"with ${MDC(COLUMN_DATA_TYPE_SOURCE, field.dataType)}, " + + log"falling back to full analysis.") + + val expr = analyze(field, "", EXISTS_DEFAULT_COLUMN_METADATA_KEY) + val literal = expr match { + case _: ExprLiteral | _: Cast => expr + case _ => throw SparkException.internalError(s"parse existence default as literal err," + + s" field name: ${field.name}, value: $defaultSQL") + } + // sanity check + if (!literal.resolved) { Review Comment: Yea the original code for resolving existenceDefaultValues, which called analyze(), went through this check. It's even indicated as extra in the comments. https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala#L310 So I was just keeping the original behavior, do you suggest to remove? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org