gabotechs commented on code in PR #16345:
URL: https://github.com/apache/datafusion/pull/16345#discussion_r2139335258


##########
datafusion/substrait/src/logical_plan/consumer/utils.rs:
##########
@@ -81,98 +81,167 @@ pub(super) fn next_struct_field_name(
     }
 }
 
-pub(super) fn rename_field(
+/// Traverse through the field, renaming the provided field itself and all its 
inner struct fields.
+pub fn rename_field(

Review Comment:
   For 2) Given that these are going to be public, maybe there's a way of 
generalizing the function signatures so that they are less coupled to 
specifically how are they used here?
   
   <details><summary>Here's a draft idea of a potential way of making them more 
flexible</summary>
   
   ```rust
   pub fn rename_field(
       field: &Field,
   +   new_name: &mut impl FnMut() -> datafusion::common::Result<String>,
   ) -> datafusion::common::Result<Field> {
       rename_fields_data_type(field.clone().with_name(new_name()?), new_name)
   }
   
   pub fn rename_fields_data_type(
       field: Field,
   +   new_name: &mut impl FnMut() -> datafusion::common::Result<String>,
   ) -> datafusion::common::Result<Field> {
   +   let dt = rename_data_type(field.data_type(), new_name)?;
       Ok(field.with_data_type(dt))
   }
   
   pub fn rename_data_type(
       data_type: &DataType,
   +   new_name: &mut impl FnMut() -> datafusion::common::Result<String>,
   ) -> datafusion::common::Result<DataType> {
       match data_type {
           DataType::Struct(children) => {
               let children = children
                   .iter()
   +               .map(|f| rename_field(f.as_ref(), new_name))
                   .collect::<datafusion::common::Result<_>>()?;
               Ok(DataType::Struct(children))
           }
           DataType::List(inner) => 
Ok(DataType::List(Arc::new(rename_fields_data_type(
               inner.as_ref().to_owned(),
   +           new_name,
           )?))),
           DataType::LargeList(inner) => Ok(DataType::LargeList(Arc::new(
   +           rename_fields_data_type(inner.as_ref().to_owned(), new_name)?,
           ))),
           DataType::ListView(inner) => Ok(DataType::ListView(Arc::new(
   +           rename_fields_data_type(inner.as_ref().to_owned(), new_name)?,
           ))),
           DataType::LargeListView(inner) => 
Ok(DataType::LargeListView(Arc::new(
   +           rename_fields_data_type(inner.as_ref().to_owned(), new_name)?,
           ))),
           DataType::FixedSizeList(inner, len) => Ok(DataType::FixedSizeList(
               Arc::new(rename_fields_data_type(
                   inner.as_ref().to_owned(),
   +               new_name,
               )?),
   ...
                   DataType::Struct(fields) => {
                       // This should be two fields, normally "key" and 
"value", but not guaranteed
                       let fields = fields
                           .iter()
   +                       .map(|f| 
rename_fields_data_type(f.as_ref().to_owned(), new_name))
                           .collect::<datafusion::common::Result<_>>()?;
                       Ok(DataType::Struct(fields))
   ...
           }
           DataType::Dictionary(key_type, value_type) => {
               // Dicts probably shouldn't contain structs, but support them 
just in case one does
               Ok(DataType::Dictionary(
   +               Box::new(rename_data_type(key_type, new_name)?),
   +               Box::new(rename_data_type(value_type, new_name)?),
               ))
           }
           DataType::RunEndEncoded(run_ends_field, values_field) => {
               // At least the run_ends_field shouldn't contain names (since it 
should be i16/i32/i64),
               // but we'll try renaming its datatype just in case.
               let run_ends_field =
   +               rename_fields_data_type(run_ends_field.as_ref().clone(), 
new_name)?;
               let values_field =
   +               rename_fields_data_type(values_field.as_ref().clone(), 
new_name)?;
   
   ...
           DataType::Union(fields, mode) => {
               let fields = fields
                   .iter()
                   .map(|(i, f)| {
                       Ok((
                           i,
   +                       Arc::new(rename_fields_data_type(f.as_ref().clone(), 
new_name)?),
                       ))
                   })
   
   ```
   
   And then wired up to the still internal `next_struct_field_name` method like 
this
   
   ```rust
           let output_field = rename_field(&output_field, &mut || {
   +           next_struct_field_name(expr_idx, &substrait_expr.output_names, 
&mut names_idx)
           })?;
   
   ```
   
   </details>



##########
datafusion/substrait/src/logical_plan/consumer/utils.rs:
##########
@@ -81,98 +81,167 @@ pub(super) fn next_struct_field_name(
     }
 }
 
-pub(super) fn rename_field(
+/// Traverse through the field, renaming the provided field itself and all its 
inner struct fields.
+pub fn rename_field(

Review Comment:
   IMO neither 1) or 2) are a big deal, just something to at least keep in mind



##########
datafusion/substrait/src/logical_plan/consumer/utils.rs:
##########
@@ -81,98 +81,167 @@ pub(super) fn next_struct_field_name(
     }
 }
 
-pub(super) fn rename_field(
+/// Traverse through the field, renaming the provided field itself and all its 
inner struct fields.
+pub fn rename_field(

Review Comment:
   A couple of thoughts about these functions being public:
   1) Isn't it a bit weird that these functions, which are unrelated to 
Substrait and are just pure Arrow, are publicly exposed through the 
`datafusion_substrait` crate?
   2) Isn't the current function signature a bit too coupled to the specific 
way they are used here for them to be served to the public as is?



##########
datafusion/substrait/src/logical_plan/consumer/utils.rs:
##########
@@ -81,98 +81,167 @@ pub(super) fn next_struct_field_name(
     }
 }
 
-pub(super) fn rename_field(
+/// Traverse through the field, renaming the provided field itself and all its 
inner struct fields.
+pub fn rename_field(

Review Comment:
   For 1) I think it's not a big deal, but I wonder if there's any other place 
within DataFusion that's more like a "collection of utilities for working with 
arrow" that could better host these functions. But again, not a big deal IMO



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to