shehabgamin commented on code in PR #14440:
URL: https://github.com/apache/datafusion/pull/14440#discussion_r1948008922
##########
datafusion/expr-common/src/signature.rs:
##########
@@ -225,6 +229,45 @@ impl Display for TypeSignatureClass {
}
}
+impl TypeSignatureClass {
+ /// Returns the default cast type for the given `TypeSignatureClass`.
+ /// Be cautious to avoid adding specialized logic here, as this function
is public and intended for general use.
+ pub fn default_casted_type(
+ &self,
+ logical_type: &NativeType,
+ origin_type: &DataType,
+ ) -> Result<DataType> {
+ match self {
+ TypeSignatureClass::Native(logical_type) => {
+ logical_type.native().default_cast_for(origin_type)
+ }
+ // If the given type is already a timestamp, we don't change the
unit and timezone
+ TypeSignatureClass::Timestamp if logical_type.is_timestamp() => {
+ Ok(origin_type.to_owned())
+ }
+ // This is an existing use case for casting string to timestamp,
since we don't have specific unit and timezone from string,
+ // so we use the default timestamp type with nanosecond precision
and no timezone
+ // TODO: Consider allowing the user to specify the default
timestamp type instead of having it predefined in DataFusion when we have more
use cases
Review Comment:
The user should be allowed to specify the default timestamp type.
##########
datafusion/expr-common/src/signature.rs:
##########
@@ -431,6 +463,35 @@ impl TypeSignature {
}
}
+fn get_possible_types_from_signature_classes(
+ signature_classes: &TypeSignatureClass,
+) -> Vec<DataType> {
+ match signature_classes {
+ TypeSignatureClass::Native(l) => get_data_types(l.native()),
+ TypeSignatureClass::Timestamp => {
+ vec![
Review Comment:
The rest of the `TimeUnit`s are missing. It would be great if
`TIMEZONE_WILDCARD` encompassed `None` and it would also be great if there was
a wildcard for `TimeUnit`.
##########
datafusion/expr-common/src/signature.rs:
##########
@@ -431,6 +463,35 @@ impl TypeSignature {
}
}
+fn get_possible_types_from_signature_classes(
+ signature_classes: &TypeSignatureClass,
+) -> Vec<DataType> {
+ match signature_classes {
+ TypeSignatureClass::Native(l) => get_data_types(l.native()),
+ TypeSignatureClass::Timestamp => {
+ vec![
+ DataType::Timestamp(TimeUnit::Nanosecond, None),
+ DataType::Timestamp(TimeUnit::Nanosecond,
Some(TIMEZONE_WILDCARD.into())),
+ ]
+ }
+ TypeSignatureClass::Date => {
+ vec![DataType::Date64]
+ }
+ TypeSignatureClass::Time => {
+ vec![DataType::Time64(TimeUnit::Nanosecond)]
+ }
+ TypeSignatureClass::Interval => {
+ vec![DataType::Interval(IntervalUnit::DayTime)]
+ }
+ TypeSignatureClass::Duration => {
+ vec![DataType::Duration(TimeUnit::Nanosecond)]
Review Comment:
This should encompass all possible `DataType::Duration`. Also same comment
as above regarding `TimeUnit` wildcard.
##########
datafusion/expr-common/src/signature.rs:
##########
@@ -460,6 +521,44 @@ fn get_data_types(native_type: &NativeType) ->
Vec<DataType> {
}
}
+#[derive(Debug, Clone, Eq, PartialOrd)]
+pub struct Coercion {
Review Comment:
+1 this would be really great to have!
##########
datafusion/expr-common/src/signature.rs:
##########
@@ -431,6 +463,35 @@ impl TypeSignature {
}
}
+fn get_possible_types_from_signature_classes(
+ signature_classes: &TypeSignatureClass,
+) -> Vec<DataType> {
+ match signature_classes {
+ TypeSignatureClass::Native(l) => get_data_types(l.native()),
+ TypeSignatureClass::Timestamp => {
+ vec![
+ DataType::Timestamp(TimeUnit::Nanosecond, None),
+ DataType::Timestamp(TimeUnit::Nanosecond,
Some(TIMEZONE_WILDCARD.into())),
+ ]
+ }
+ TypeSignatureClass::Date => {
+ vec![DataType::Date64]
Review Comment:
This should be `DataType::Date32` and `DataType::Date64`
##########
datafusion/expr-common/src/signature.rs:
##########
@@ -431,6 +463,35 @@ impl TypeSignature {
}
}
+fn get_possible_types_from_signature_classes(
+ signature_classes: &TypeSignatureClass,
+) -> Vec<DataType> {
+ match signature_classes {
+ TypeSignatureClass::Native(l) => get_data_types(l.native()),
+ TypeSignatureClass::Timestamp => {
+ vec![
+ DataType::Timestamp(TimeUnit::Nanosecond, None),
+ DataType::Timestamp(TimeUnit::Nanosecond,
Some(TIMEZONE_WILDCARD.into())),
+ ]
+ }
+ TypeSignatureClass::Date => {
+ vec![DataType::Date64]
+ }
+ TypeSignatureClass::Time => {
+ vec![DataType::Time64(TimeUnit::Nanosecond)]
Review Comment:
This should encompass all possible `DataType::Time32` and
`DataType::Time64`. Also same comment as above regarding `TimeUnit` wildcard.
##########
datafusion/expr-common/src/signature.rs:
##########
@@ -460,6 +521,44 @@ fn get_data_types(native_type: &NativeType) ->
Vec<DataType> {
}
}
+#[derive(Debug, Clone, Eq, PartialOrd)]
+pub struct Coercion {
+ pub desired_type: TypeSignatureClass,
+ pub allowed_casts: Vec<TypeSignatureClass>,
Review Comment:
+1 for `allowed_source_types`
##########
datafusion/expr-common/src/signature.rs:
##########
@@ -431,6 +463,35 @@ impl TypeSignature {
}
}
+fn get_possible_types_from_signature_classes(
+ signature_classes: &TypeSignatureClass,
+) -> Vec<DataType> {
+ match signature_classes {
+ TypeSignatureClass::Native(l) => get_data_types(l.native()),
+ TypeSignatureClass::Timestamp => {
+ vec![
+ DataType::Timestamp(TimeUnit::Nanosecond, None),
+ DataType::Timestamp(TimeUnit::Nanosecond,
Some(TIMEZONE_WILDCARD.into())),
+ ]
+ }
+ TypeSignatureClass::Date => {
+ vec![DataType::Date64]
+ }
+ TypeSignatureClass::Time => {
+ vec![DataType::Time64(TimeUnit::Nanosecond)]
+ }
+ TypeSignatureClass::Interval => {
+ vec![DataType::Interval(IntervalUnit::DayTime)]
Review Comment:
This should encompass all possible `DataType::Interval`
##########
datafusion/expr-common/src/signature.rs:
##########
@@ -431,6 +463,35 @@ impl TypeSignature {
}
}
+fn get_possible_types_from_signature_classes(
+ signature_classes: &TypeSignatureClass,
+) -> Vec<DataType> {
+ match signature_classes {
+ TypeSignatureClass::Native(l) => get_data_types(l.native()),
+ TypeSignatureClass::Timestamp => {
+ vec![
+ DataType::Timestamp(TimeUnit::Nanosecond, None),
+ DataType::Timestamp(TimeUnit::Nanosecond,
Some(TIMEZONE_WILDCARD.into())),
+ ]
+ }
+ TypeSignatureClass::Date => {
+ vec![DataType::Date64]
+ }
+ TypeSignatureClass::Time => {
+ vec![DataType::Time64(TimeUnit::Nanosecond)]
+ }
+ TypeSignatureClass::Interval => {
+ vec![DataType::Interval(IntervalUnit::DayTime)]
+ }
+ TypeSignatureClass::Duration => {
+ vec![DataType::Duration(TimeUnit::Nanosecond)]
+ }
+ TypeSignatureClass::Integer => {
+ vec![DataType::Int64]
Review Comment:
This should encompass all possible ints.
##########
datafusion/common/src/types/builtin.rs:
##########
@@ -47,3 +49,11 @@ singleton!(LOGICAL_FLOAT64, logical_float64, Float64);
singleton!(LOGICAL_DATE, logical_date, Date);
singleton!(LOGICAL_BINARY, logical_binary, Binary);
singleton!(LOGICAL_STRING, logical_string, String);
+
+// TODO: Extend macro
+// TODO: Should we use LOGICAL_TIMESTAMP_NANO to distinguish unit and timzeone?
Review Comment:
It would be great if there was a wildcard to match all the units and
timezones.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]