paleolimbot commented on code in PR #21071:
URL: https://github.com/apache/datafusion/pull/21071#discussion_r2966405526


##########
datafusion/physical-expr/src/planner.rs:
##########
@@ -299,6 +325,31 @@ pub fn create_physical_expr(
                     ),
                     format_type_and_metadata(field.data_type(), 
Some(field.metadata()))
                 );
+            } else if let Some(registry) = &execution_props.extension_types
+                && let Some(extension_type) =
+                    registry.create_extension_type_for_field(&src_field)?
+            {
+                let cast_extension = extension_type.cast_to()?;
+                if cast_extension.can_cast(&src_field, &field, 
&DEFAULT_CAST_OPTIONS)? {
+                    return expressions::cast_with_extension(

Review Comment:
   This is where the cast from something else to an extension type is resolved 
to a physical expression.
   
   (I didn't handle the case where an extension type is getting cast to another 
extension type, but that should get handled if this is ever going to merge 
because we have to make sure the interface can handle either the right OR the 
left side defining this cast without erroring if the other side doesn't handle 
it)



##########
datafusion/core/tests/extension_types/pretty_printing.rs:
##########
@@ -73,3 +77,88 @@ async fn test_pretty_print_logical_plan() -> Result<()> {
 
     Ok(())
 }
+
+#[tokio::test]
+async fn create_cast_uuid_to_char() -> Result<()> {
+    let schema = test_schema();
+
+    // define data.
+    let batch = RecordBatch::try_new(
+        schema,
+        vec![Arc::new(FixedSizeBinaryArray::from(vec![
+            &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 5, 6],
+        ]))],
+    )?;
+
+    let state = SessionStateBuilder::default()
+        .with_canonical_extension_types()?
+        .with_type_planner(Arc::new(CustomTypePlanner {}))
+        .build();
+    let ctx = SessionContext::new_with_state(state);
+
+    ctx.register_batch("test", batch)?;
+
+    let df = ctx.sql("SELECT my_uuids::VARCHAR FROM test").await?;
+    let batches = df.collect().await?;
+
+    assert_batches_eq!(
+        vec![
+            "+--------------------------------------+",
+            "| test.my_uuids                        |",
+            "+--------------------------------------+",
+            "| 00000000-0000-0000-0000-000000000000 |",
+            "| 00010203-0405-0607-0809-000102030506 |",
+            "+--------------------------------------+",
+        ],
+        &batches
+    );
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn create_cast_char_to_uuid() -> Result<()> {
+    let state = SessionStateBuilder::default()
+        .with_canonical_extension_types()?
+        .with_type_planner(Arc::new(CustomTypePlanner {}))
+        .build();
+    let ctx = SessionContext::new_with_state(state);
+
+    let df = ctx
+        .sql("SELECT '00010203-0405-0607-0809-000102030506'::UUID AS uuid")
+        .await?;
+    let batches = df.collect().await?;
+    assert_batches_eq!(
+        vec![
+            "+----------------------------------+",
+            "| uuid                             |",
+            "+----------------------------------+",
+            "| 00010203040506070809000102030506 |",
+            "+----------------------------------+",
+        ],
+        &batches
+    );

Review Comment:
   We can also go the other direction!



##########
datafusion/common/src/types/extension.rs:
##########
@@ -68,4 +70,44 @@ pub trait DFExtensionType: Debug + Send + Sync {
     ) -> Result<Option<ArrayFormatter<'fmt>>> {
         Ok(None)
     }
+
+    fn cast_from(&self) -> Result<Arc<dyn CastExtension>> {
+        Ok(Arc::new(DefaultExtensionCast {}))
+    }
+
+    fn cast_to(&self) -> Result<Arc<dyn CastExtension>> {
+        Ok(Arc::new(DefaultExtensionCast {}))
+    }
+}
+
+pub trait CastExtension: Debug + Send + Sync {
+    fn can_cast(&self, from: &Field, to: &Field, options: &CastOptions) -> 
Result<bool>;
+
+    // None for fallback
+    fn cast(
+        &self,
+        value: ArrayRef,
+        from: &Field,
+        to: &Field,
+        options: &CastOptions,
+    ) -> Result<ArrayRef>;
+}

Review Comment:
   This is the interface an extension type can implement to define its 
interactions with other types



##########
datafusion/physical-expr/src/expressions/cast.rs:
##########
@@ -255,7 +272,35 @@ impl PhysicalExpr for CastExpr {
 
     fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
         let value = self.expr.evaluate(batch)?;
-        value.cast_to(self.cast_type(), Some(&self.cast_options))
+        if let Some(cast_extension) = &self.cast_extension {
+            let from_field = self.expr.return_field(&batch.schema())?;
+            let to_field = self.return_field(&batch.schema())?;
+            match value {
+                ColumnarValue::Array(array) => {
+                    Ok(ColumnarValue::Array(cast_extension.cast(
+                        array,
+                        &from_field,
+                        &to_field,
+                        &self.cast_options,
+                    )?))
+                }

Review Comment:
   This is where the cast is executed. I chose to tack on the `CastExtension` 
to the `CastExpr` but it could also be its own PhysicalExpr (maybe safer in 
case I haven't considered some of the things that could happen during physical 
optimizer passes)



##########
datafusion/core/tests/extension_types/pretty_printing.rs:
##########
@@ -73,3 +77,88 @@ async fn test_pretty_print_logical_plan() -> Result<()> {
 
     Ok(())
 }
+
+#[tokio::test]
+async fn create_cast_uuid_to_char() -> Result<()> {
+    let schema = test_schema();
+
+    // define data.
+    let batch = RecordBatch::try_new(
+        schema,
+        vec![Arc::new(FixedSizeBinaryArray::from(vec![
+            &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 5, 6],
+        ]))],
+    )?;
+
+    let state = SessionStateBuilder::default()
+        .with_canonical_extension_types()?
+        .with_type_planner(Arc::new(CustomTypePlanner {}))
+        .build();
+    let ctx = SessionContext::new_with_state(state);
+
+    ctx.register_batch("test", batch)?;
+
+    let df = ctx.sql("SELECT my_uuids::VARCHAR FROM test").await?;
+    let batches = df.collect().await?;
+
+    assert_batches_eq!(
+        vec![
+            "+--------------------------------------+",
+            "| test.my_uuids                        |",
+            "+--------------------------------------+",
+            "| 00000000-0000-0000-0000-000000000000 |",
+            "| 00010203-0405-0607-0809-000102030506 |",
+            "+--------------------------------------+",
+        ],
+        &batches
+    );

Review Comment:
   Casting from a UUID to something else works!



##########
datafusion/physical-expr/src/planner.rs:
##########
@@ -289,8 +291,32 @@ pub fn create_physical_expr(
             Ok(expressions::case(expr, when_then_expr, else_expr)?)
         }
         Expr::Cast(Cast { expr, field }) => {
+            let (_, src_field) = expr.to_field(input_dfschema)?;
+            const DEFAULT_CAST_OPTIONS: CastOptions<'static> = CastOptions {
+                safe: false,
+                format_options: DEFAULT_FORMAT_OPTIONS,
+            };
+
             if !field.metadata().is_empty() {
-                let (_, src_field) = expr.to_field(input_dfschema)?;
+                if let Some(registry) = &execution_props.extension_types
+                    && let Some(extension_type) =
+                        registry.create_extension_type_for_field(&field)?
+                {
+                    let cast_extension = extension_type.cast_from()?;

Review Comment:
   This is where the cast (to an extension type from something else) gets 
planned into a physical expression



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to