westonpace commented on code in PR #16345:
URL: https://github.com/apache/datafusion/pull/16345#discussion_r2138834078


##########
datafusion/substrait/src/logical_plan/consumer/utils.rs:
##########
@@ -81,98 +81,167 @@ pub(super) fn next_struct_field_name(
     }
 }
 
-pub(super) fn rename_field(
+/// Traverse through the field, renaming the provided field itself and all its 
inner struct fields.
+pub fn rename_field(
     field: &Field,
     dfs_names: &Vec<String>,
     unnamed_field_suffix: usize, // If Substrait doesn't provide a name, we'll 
use this "c{unnamed_field_suffix}"
     name_idx: &mut usize,        // Index into dfs_names
-    rename_self: bool, // Some fields (e.g. list items) don't have names in 
Substrait and this will be false to keep old name
 ) -> datafusion::common::Result<Field> {
-    let name = if rename_self {
-        next_struct_field_name(unnamed_field_suffix, dfs_names, name_idx)?
-    } else {
-        field.name().to_string()
-    };
-    match field.data_type() {
+    let name = next_struct_field_name(unnamed_field_suffix, dfs_names, 
name_idx)?;
+    rename_fields_data_type(field.clone().with_name(name), dfs_names, name_idx)
+}
+
+/// Rename the field's data type but not the field itself.
+pub fn rename_fields_data_type(
+    field: Field,
+    dfs_names: &Vec<String>,
+    name_idx: &mut usize, // Index into dfs_names
+) -> datafusion::common::Result<Field> {
+    let dt = rename_data_type(field.data_type(), dfs_names, name_idx)?;
+    Ok(field.with_data_type(dt))
+}
+
+/// Traverse through the data type (incl. lists/maps/etc), renaming all inner 
struct fields.
+pub fn rename_data_type(
+    data_type: &DataType,
+    dfs_names: &Vec<String>,
+    name_idx: &mut usize, // Index into dfs_names
+) -> datafusion::common::Result<DataType> {
+    match data_type {
         DataType::Struct(children) => {
             let children = children
                 .iter()
                 .enumerate()
-                .map(|(child_idx, f)| {
-                    rename_field(
-                        f.as_ref(),
-                        dfs_names,
-                        child_idx,
-                        name_idx,
-                        /*rename_self=*/ true,
-                    )
+                .map(|(field_idx, f)| {
+                    rename_field(f.as_ref(), dfs_names, field_idx, name_idx)
                 })
                 .collect::<datafusion::common::Result<_>>()?;
-            Ok(field
-                .to_owned()
-                .with_name(name)
-                .with_data_type(DataType::Struct(children)))
+            Ok(DataType::Struct(children))
         }
-        DataType::List(inner) => {
-            let renamed_inner = rename_field(
-                inner.as_ref(),
+        DataType::List(inner) => 
Ok(DataType::List(Arc::new(rename_fields_data_type(
+            inner.as_ref().to_owned(),
+            dfs_names,
+            name_idx,
+        )?))),
+        DataType::LargeList(inner) => Ok(DataType::LargeList(Arc::new(
+            rename_fields_data_type(inner.as_ref().to_owned(), dfs_names, 
name_idx)?,
+        ))),
+        DataType::ListView(inner) => Ok(DataType::ListView(Arc::new(
+            rename_fields_data_type(inner.as_ref().to_owned(), dfs_names, 
name_idx)?,
+        ))),
+        DataType::LargeListView(inner) => Ok(DataType::LargeListView(Arc::new(
+            rename_fields_data_type(inner.as_ref().to_owned(), dfs_names, 
name_idx)?,
+        ))),
+        DataType::FixedSizeList(inner, len) => Ok(DataType::FixedSizeList(
+            Arc::new(rename_fields_data_type(
+                inner.as_ref().to_owned(),
                 dfs_names,
-                0,
                 name_idx,
-                /*rename_self=*/ false,
-            )?;
-            Ok(field
-                .to_owned()
-                .with_data_type(DataType::List(FieldRef::new(renamed_inner)))
-                .with_name(name))
+            )?),
+            *len,
+        )),
+        DataType::Map(entries, sorted) => {
+            let entries_data_type = match entries.data_type() {
+                DataType::Struct(fields) => {
+                    // This should be two fields, normally "key" and "value", 
but not guaranteed
+                    let fields = fields
+                        .iter()
+                        .map(|f| {
+                            rename_fields_data_type(
+                                f.as_ref().to_owned(),
+                                dfs_names,
+                                name_idx,
+                            )
+                        })
+                        .collect::<datafusion::common::Result<_>>()?;
+                    Ok(DataType::Struct(fields))
+                }
+                _ => exec_err!("Expected map type to contain an inner struct 
type"),
+            }?;
+            Ok(DataType::Map(
+                Arc::new(
+                    entries
+                        .as_ref()
+                        .to_owned()
+                        .with_data_type(entries_data_type),
+                ),
+                *sorted,
+            ))
         }
-        DataType::LargeList(inner) => {
-            let renamed_inner = rename_field(
-                inner.as_ref(),
+        DataType::Dictionary(key_type, value_type) => {
+            // Dicts probably shouldn't contain structs, but support them just 
in case one does
+            Ok(DataType::Dictionary(
+                Box::new(rename_data_type(key_type, dfs_names, name_idx)?),
+                Box::new(rename_data_type(value_type, dfs_names, name_idx)?),
+            ))
+        }
+        DataType::RunEndEncoded(run_ends_field, values_field) => {

Review Comment:
   Dictionary, REE, ListView, LargeList, LargeListView, are all "encodings" and 
not "types" as far as Substrait is concerned.  There is some discussion of this 
in 
https://github.com/apache/arrow/blob/main/format/substrait/extension_types.yaml
   
   There's nothing wrong with what you have here, and I think it's probably 
fine to keep it, but if I'm being pedantic I don't think a producer should ever 
produce a plan with these types.
   
   Encodings can be expressed as variations however, in which case they will 
use the base type plus a variation identifier.  So maybe it is still possible 
to hit this path if you are parsing one of those.



##########
datafusion/substrait/src/logical_plan/consumer/utils.rs:
##########
@@ -81,98 +81,167 @@ pub(super) fn next_struct_field_name(
     }
 }
 
-pub(super) fn rename_field(
+/// Traverse through the field, renaming the provided field itself and all its 
inner struct fields.
+pub fn rename_field(
     field: &Field,
     dfs_names: &Vec<String>,
     unnamed_field_suffix: usize, // If Substrait doesn't provide a name, we'll 
use this "c{unnamed_field_suffix}"
     name_idx: &mut usize,        // Index into dfs_names
-    rename_self: bool, // Some fields (e.g. list items) don't have names in 
Substrait and this will be false to keep old name
 ) -> datafusion::common::Result<Field> {
-    let name = if rename_self {
-        next_struct_field_name(unnamed_field_suffix, dfs_names, name_idx)?
-    } else {
-        field.name().to_string()
-    };
-    match field.data_type() {
+    let name = next_struct_field_name(unnamed_field_suffix, dfs_names, 
name_idx)?;
+    rename_fields_data_type(field.clone().with_name(name), dfs_names, name_idx)
+}
+
+/// Rename the field's data type but not the field itself.
+pub fn rename_fields_data_type(
+    field: Field,
+    dfs_names: &Vec<String>,
+    name_idx: &mut usize, // Index into dfs_names
+) -> datafusion::common::Result<Field> {
+    let dt = rename_data_type(field.data_type(), dfs_names, name_idx)?;
+    Ok(field.with_data_type(dt))
+}
+
+/// Traverse through the data type (incl. lists/maps/etc), renaming all inner 
struct fields.
+pub fn rename_data_type(

Review Comment:
   Do these two functions need to be `pub` (not a problem if yes, just curious)?



##########
datafusion/substrait/src/logical_plan/consumer/utils.rs:
##########
@@ -81,98 +81,167 @@ pub(super) fn next_struct_field_name(
     }
 }
 
-pub(super) fn rename_field(
+/// Traverse through the field, renaming the provided field itself and all its 
inner struct fields.
+pub fn rename_field(
     field: &Field,
     dfs_names: &Vec<String>,
     unnamed_field_suffix: usize, // If Substrait doesn't provide a name, we'll 
use this "c{unnamed_field_suffix}"
     name_idx: &mut usize,        // Index into dfs_names
-    rename_self: bool, // Some fields (e.g. list items) don't have names in 
Substrait and this will be false to keep old name
 ) -> datafusion::common::Result<Field> {
-    let name = if rename_self {
-        next_struct_field_name(unnamed_field_suffix, dfs_names, name_idx)?
-    } else {
-        field.name().to_string()
-    };
-    match field.data_type() {
+    let name = next_struct_field_name(unnamed_field_suffix, dfs_names, 
name_idx)?;
+    rename_fields_data_type(field.clone().with_name(name), dfs_names, name_idx)
+}
+
+/// Rename the field's data type but not the field itself.
+pub fn rename_fields_data_type(
+    field: Field,
+    dfs_names: &Vec<String>,
+    name_idx: &mut usize, // Index into dfs_names
+) -> datafusion::common::Result<Field> {
+    let dt = rename_data_type(field.data_type(), dfs_names, name_idx)?;
+    Ok(field.with_data_type(dt))
+}
+
+/// Traverse through the data type (incl. lists/maps/etc), renaming all inner 
struct fields.
+pub fn rename_data_type(
+    data_type: &DataType,
+    dfs_names: &Vec<String>,
+    name_idx: &mut usize, // Index into dfs_names
+) -> datafusion::common::Result<DataType> {
+    match data_type {
         DataType::Struct(children) => {
             let children = children
                 .iter()
                 .enumerate()
-                .map(|(child_idx, f)| {
-                    rename_field(
-                        f.as_ref(),
-                        dfs_names,
-                        child_idx,
-                        name_idx,
-                        /*rename_self=*/ true,
-                    )
+                .map(|(field_idx, f)| {
+                    rename_field(f.as_ref(), dfs_names, field_idx, name_idx)
                 })
                 .collect::<datafusion::common::Result<_>>()?;
-            Ok(field
-                .to_owned()
-                .with_name(name)
-                .with_data_type(DataType::Struct(children)))
+            Ok(DataType::Struct(children))
         }
-        DataType::List(inner) => {
-            let renamed_inner = rename_field(
-                inner.as_ref(),
+        DataType::List(inner) => 
Ok(DataType::List(Arc::new(rename_fields_data_type(
+            inner.as_ref().to_owned(),
+            dfs_names,
+            name_idx,
+        )?))),
+        DataType::LargeList(inner) => Ok(DataType::LargeList(Arc::new(
+            rename_fields_data_type(inner.as_ref().to_owned(), dfs_names, 
name_idx)?,
+        ))),
+        DataType::ListView(inner) => Ok(DataType::ListView(Arc::new(
+            rename_fields_data_type(inner.as_ref().to_owned(), dfs_names, 
name_idx)?,
+        ))),
+        DataType::LargeListView(inner) => Ok(DataType::LargeListView(Arc::new(
+            rename_fields_data_type(inner.as_ref().to_owned(), dfs_names, 
name_idx)?,
+        ))),
+        DataType::FixedSizeList(inner, len) => Ok(DataType::FixedSizeList(
+            Arc::new(rename_fields_data_type(
+                inner.as_ref().to_owned(),
                 dfs_names,
-                0,
                 name_idx,
-                /*rename_self=*/ false,
-            )?;
-            Ok(field
-                .to_owned()
-                .with_data_type(DataType::List(FieldRef::new(renamed_inner)))
-                .with_name(name))
+            )?),
+            *len,
+        )),
+        DataType::Map(entries, sorted) => {
+            let entries_data_type = match entries.data_type() {
+                DataType::Struct(fields) => {
+                    // This should be two fields, normally "key" and "value", 
but not guaranteed
+                    let fields = fields

Review Comment:
   Ah, I got a bit confused here but I think you're doing the right thing.  You 
are _not_ pulling an item from `dfs_names` for the `key` or `value` entry which 
is correct.  You will simply reuse whatever names those fields happen to have 
which is good.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to