alamb commented on code in PR #17459:
URL: https://github.com/apache/datafusion/pull/17459#discussion_r2330714653


##########
datafusion/common/src/dfschema.rs:
##########
@@ -863,6 +863,208 @@ impl DFSchema {
             .zip(self.inner.fields().iter())
             .map(|(qualifier, field)| (qualifier.as_ref(), field))
     }
+    /// Print schema in tree format
+    ///
+    /// This method formats the schema
+    /// with a tree-like structure showing field names, types, and nullability.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use datafusion_common::DFSchema;
+    /// use arrow::datatypes::{DataType, Field, Schema};
+    /// use std::collections::HashMap;
+    ///
+    /// let schema = DFSchema::from_unqualified_fields(
+    ///     vec![
+    ///         Field::new("id", DataType::Int32, false),
+    ///         Field::new("name", DataType::Utf8, true),
+    ///     ].into(),
+    ///     HashMap::new()
+    /// ).unwrap();
+    ///
+    /// println!("{}", schema.print_schema_tree());
+    /// ```
+    ///
+    /// Output:
+    /// ```text
+    /// root
+    ///  |-- id: int32 (nullable = false)
+    ///  |-- name: utf8 (nullable = true)

Review Comment:
   I recommend we change this to be an assert so we ensure the documentation is 
kept in sync, something like (untested):
   
   ```suggestion
       /// assert_eq!(schema.print_schema_tree().to_string(),
       /// r#"
       /// root
       ///  |-- id: int32 (nullable = false)
       ///  |-- name: utf8 (nullable = true)
       /// "
   ```



##########
datafusion/common/src/dfschema.rs:
##########
@@ -1734,4 +1936,471 @@ mod tests {
     fn test_metadata_n(n: usize) -> HashMap<String, String> {
         (0..n).map(|i| (format!("k{i}"), format!("v{i}"))).collect()
     }
+
+    #[test]
+    fn test_print_schema_unqualified() {
+        let schema = DFSchema::from_unqualified_fields(
+            vec![
+                Field::new("id", DataType::Int32, false),
+                Field::new("name", DataType::Utf8, true),
+                Field::new("age", DataType::Int64, true),
+                Field::new("active", DataType::Boolean, false),
+            ]
+            .into(),
+            HashMap::new(),
+        )
+        .unwrap();
+
+        let output = schema.print_schema_tree();
+        let expected = "root\n |-- id: int32 (nullable = false)\n |-- name: 
string (nullable = true)\n |-- age: int64 (nullable = true)\n |-- active: 
boolean (nullable = false)";
+
+        assert_eq!(output, expected);
+    }
+
+    #[test]
+    fn test_print_schema_qualified() {
+        let schema = DFSchema::try_from_qualified_schema(
+            "table1",
+            &Schema::new(vec![
+                Field::new("id", DataType::Int32, false),
+                Field::new("name", DataType::Utf8, true),
+            ]),
+        )
+        .unwrap();
+
+        let output = schema.print_schema_tree();
+        let expected = "root\n |-- table1.id: int32 (nullable = false)\n |-- 
table1.name: string (nullable = true)";
+
+        assert_eq!(output, expected);
+    }
+
+    #[test]
+    fn test_print_schema_complex_types() {
+        let struct_field = Field::new(
+            "address",
+            DataType::Struct(Fields::from(vec![
+                Field::new("street", DataType::Utf8, true),
+                Field::new("city", DataType::Utf8, true),
+            ])),
+            true,
+        );
+
+        let list_field = Field::new(
+            "tags",
+            DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))),
+            true,
+        );
+
+        let schema = DFSchema::from_unqualified_fields(
+            vec![
+                Field::new("id", DataType::Int32, false),
+                struct_field,
+                list_field,
+                Field::new("score", DataType::Decimal128(10, 2), true),
+            ]
+            .into(),
+            HashMap::new(),
+        )
+        .unwrap();
+
+        let output = schema.print_schema_tree();
+        let expected = "root\n |-- id: int32 (nullable = false)\n |-- address: 
struct (nullable = true)\n |    |-- street: string (nullable = true)\n |    |-- 
city: string (nullable = true)\n |-- tags: list (nullable = true)\n |    |-- 
item: string (nullable = true)\n |-- score: decimal (nullable = true)";
+
+        assert_eq!(output, expected);
+    }
+
+    #[test]
+    fn test_print_schema_empty() {
+        let schema = DFSchema::empty();
+        let output = schema.print_schema_tree();
+        let expected = "root";
+
+        assert_eq!(output, expected);
+    }
+
+    #[test]
+    fn test_print_schema_deeply_nested_types() {
+        // Create a deeply nested structure to test indentation and complex 
type formatting
+        let inner_struct = Field::new(
+            "inner",
+            DataType::Struct(Fields::from(vec![
+                Field::new("level1", DataType::Utf8, true),
+                Field::new("level2", DataType::Int32, false),
+            ])),
+            true,
+        );
+
+        let nested_list = Field::new(
+            "nested_list",
+            DataType::List(Arc::new(Field::new(
+                "item",
+                DataType::Struct(Fields::from(vec![
+                    Field::new("id", DataType::Int64, false),
+                    Field::new("value", DataType::Float64, true),
+                ])),
+                true,
+            ))),
+            true,
+        );
+
+        let map_field = Field::new(
+            "map_data",
+            DataType::Map(
+                Arc::new(Field::new(
+                    "entries",
+                    DataType::Struct(Fields::from(vec![
+                        Field::new("key", DataType::Utf8, false),
+                        Field::new(
+                            "value",
+                            DataType::List(Arc::new(Field::new(
+                                "item",
+                                DataType::Int32,
+                                true,
+                            ))),
+                            true,
+                        ),
+                    ])),
+                    false,
+                )),
+                false,
+            ),
+            true,
+        );
+
+        let schema = DFSchema::from_unqualified_fields(
+            vec![
+                Field::new("simple_field", DataType::Utf8, true),
+                inner_struct,
+                nested_list,
+                map_field,
+                Field::new(
+                    "timestamp_field",
+                    DataType::Timestamp(
+                        arrow::datatypes::TimeUnit::Microsecond,
+                        Some("UTC".into()),
+                    ),
+                    false,
+                ),
+            ]
+            .into(),
+            HashMap::new(),
+        )
+        .unwrap();
+
+        let output = schema.print_schema_tree();

Review Comment:
   i think an insta snapshot would be better here



##########
datafusion/common/src/dfschema.rs:
##########
@@ -1734,4 +1936,471 @@ mod tests {
     fn test_metadata_n(n: usize) -> HashMap<String, String> {
         (0..n).map(|i| (format!("k{i}"), format!("v{i}"))).collect()
     }
+
+    #[test]
+    fn test_print_schema_unqualified() {
+        let schema = DFSchema::from_unqualified_fields(
+            vec![
+                Field::new("id", DataType::Int32, false),
+                Field::new("name", DataType::Utf8, true),
+                Field::new("age", DataType::Int64, true),
+                Field::new("active", DataType::Boolean, false),
+            ]
+            .into(),
+            HashMap::new(),
+        )
+        .unwrap();
+
+        let output = schema.print_schema_tree();
+        let expected = "root\n |-- id: int32 (nullable = false)\n |-- name: 
string (nullable = true)\n |-- age: int64 (nullable = true)\n |-- active: 
boolean (nullable = false)";

Review Comment:
   using `insta` might make these cases easier to update / easier to see. 
Something like this perhaps
   
   
https://github.com/apache/datafusion/blob/b8bf7c5bf1ae46e57b23a9728b2d66b525a999bd/datafusion/core/tests/user_defined/user_defined_aggregates.rs#L214-L220



##########
datafusion/sql/src/statement.rs:
##########
@@ -2024,9 +2024,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             let mut value_indices = vec![None; table_schema.fields().len()];
             let fields = columns
                 .into_iter()
-                .map(|c| self.ident_normalizer.normalize(c))
                 .enumerate()
                 .map(|(i, c)| {
+                    let c = self.ident_normalizer.normalize(c);

Review Comment:
   It seems to me like this change just moves the call to normalize the 
identifier into the map function (I don't think there are any loops removed 🤔 )



##########
datafusion/common/src/dfschema.rs:
##########
@@ -863,6 +863,208 @@ impl DFSchema {
             .zip(self.inner.fields().iter())
             .map(|(qualifier, field)| (qualifier.as_ref(), field))
     }
+    /// Print schema in tree format
+    ///
+    /// This method formats the schema
+    /// with a tree-like structure showing field names, types, and nullability.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use datafusion_common::DFSchema;
+    /// use arrow::datatypes::{DataType, Field, Schema};
+    /// use std::collections::HashMap;
+    ///
+    /// let schema = DFSchema::from_unqualified_fields(
+    ///     vec![
+    ///         Field::new("id", DataType::Int32, false),
+    ///         Field::new("name", DataType::Utf8, true),
+    ///     ].into(),
+    ///     HashMap::new()
+    /// ).unwrap();
+    ///
+    /// println!("{}", schema.print_schema_tree());
+    /// ```
+    ///
+    /// Output:
+    /// ```text
+    /// root
+    ///  |-- id: int32 (nullable = false)
+    ///  |-- name: utf8 (nullable = true)
+    /// ```
+    pub fn print_schema_tree(&self) -> String {

Review Comment:
   As written this will require copying to an intermediate string, even when 
that could be avoided.
   
   ```suggestion
       pub fn print_schema_tree(&self) -> impl Display + '_ {
   ```
   
   Somethings similar to 
https://github.com/apache/datafusion/blob/3f422a1746a243d13f37c229c7b774af6d4552b1/datafusion/expr/src/expr.rs#L1491-L1490
 perhaps
   
   Even if you just return a String in this PR (which also implements Display), 
keeping the API as `impl Display` would allow improvements in the future



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to