paleolimbot commented on code in PR #16170:
URL: https://github.com/apache/datafusion/pull/16170#discussion_r2126762149


##########
datafusion/expr/src/expr.rs:
##########
@@ -274,16 +275,16 @@ use sqlparser::ast::{
 /// assert!(rewritten.transformed);
 /// // to 42 = 5 AND b = 6
 /// assert_eq!(rewritten.data, lit(42).eq(lit(5)).and(col("b").eq(lit(6))));
-#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
+#[derive(Clone, PartialEq, PartialOrd, Eq, Debug, Hash)]
 pub enum Expr {
     /// An expression with a specific name.
     Alias(Alias),
     /// A named reference to a qualified field in a schema.
     Column(Column),
     /// A named reference to a variable in a registry.
     ScalarVariable(DataType, Vec<String>),
-    /// A constant value.
-    Literal(ScalarValue),
+    /// A constant value along with associated metadata
+    Literal(ScalarValue, Option<BTreeMap<String, String>>),

Review Comment:
   Just curious on the choice of a `BTreeMap` here (isn't `Field` using a 
`HashMap`?)



##########
datafusion/physical-expr/src/expressions/dynamic_filters.rs:
##########
@@ -342,15 +342,15 @@ mod test {
         )
         .unwrap();
         let snap = dynamic_filter_1.snapshot().unwrap().unwrap();
-        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: 
Column { name: "a", index: 0 }, op: Eq, right: Literal { value: Int32(42) }, 
fail_on_overflow: false }"#);
+        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: 
Column { name: "a", index: 0 }, op: Eq, right: Literal { value: Int32(42), 
field: Field { name: "42", data_type: Int32, nullable: false, dict_id: 0, 
dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }"#);

Review Comment:
   Not sure if this is `Display` or `Debug`, but would it be possible to omit 
the `Field` if it isn't carrying extra metadata?



##########
datafusion/physical-expr/src/planner.rs:
##########
@@ -111,14 +112,42 @@ pub fn create_physical_expr(
     let input_schema: &Schema = &input_dfschema.into();
 
     match e {
-        Expr::Alias(Alias { expr, .. }) => {
-            Ok(create_physical_expr(expr, input_dfschema, execution_props)?)
+        Expr::Alias(Alias { expr, metadata, .. }) => {
+            if let Expr::Literal(v, prior_metadata) = expr.as_ref() {
+                let mut new_metadata = prior_metadata
+                    .as_ref()
+                    .map(|m| {
+                        m.iter()
+                            .map(|(k, v)| (k.clone(), v.clone()))
+                            .collect::<HashMap<String, String>>()
+                    })
+                    .unwrap_or_default();

Review Comment:
   I assume there's some reason that `.cloned()` doesn't work here?



##########
datafusion/sqllogictest/test_files/array.slt:
##########
@@ -6061,7 +6061,7 @@ physical_plan
 04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
 05)--------ProjectionExec: expr=[]
 06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8)), 1, 32) IN 
([Literal { value: Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278") }, Literal { 
value: Utf8View("a") }, Literal { value: Utf8View("b") }, Literal { value: 
Utf8View("c") }])
+07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8)), 1, 32) IN 
([Literal { value: Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), field: Field { 
name: "7f4b18de3cfeb9b4ac78c381ee2ad278", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("a"), field: Field { name: "a", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("b"), field: Field { name: "b", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("c"), field: Field { name: "c", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }])

Review Comment:
   For those of us looking at physical plans all the time, I do think it might 
be worth the effort to make the `Literal` output more succinct (perhaps just 
the metadata part?)



##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -420,11 +420,18 @@ impl ExprSchemable for Expr {
             Expr::ScalarVariable(ty, _) => {
                 Ok(Arc::new(Field::new(&schema_name, ty.clone(), true)))
             }
-            Expr::Literal(l) => Ok(Arc::new(Field::new(
-                &schema_name,
-                l.data_type(),
-                l.is_null(),
-            ))),
+            Expr::Literal(l, metadata) => {
+                let mut field = Field::new(&schema_name, l.data_type(), 
l.is_null());
+                if let Some(metadata) = metadata {
+                    field = field.with_metadata(
+                        metadata
+                            .iter()
+                            .map(|(k, v)| (k.clone(), v.clone()))
+                            .collect(),

Review Comment:
   Does `metadata.clone()` or `metadata.into()` not work here?



##########
datafusion/expr/src/literal.rs:
##########
@@ -19,12 +19,37 @@
 
 use crate::Expr;
 use datafusion_common::ScalarValue;
+use std::collections::HashMap;
 
 /// Create a literal expression
 pub fn lit<T: Literal>(n: T) -> Expr {
     n.lit()
 }
 
+pub fn lit_with_metadata<T: Literal>(
+    n: T,
+    metadata: impl Into<Option<HashMap<String, String>>>,
+) -> Expr {

Review Comment:
   🙏 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to