alamb commented on code in PR #15438: URL: https://github.com/apache/datafusion/pull/15438#discussion_r2023216031
########## datafusion/physical-expr/src/equivalence/projection.rs: ########## @@ -66,9 +66,9 @@ impl ProjectionMapping { let idx = col.index(); let matching_input_field = input_schema.field(idx); if col.name() != matching_input_field.name() { - return internal_err!("Input field name {} does not match with the projection expression {}", - matching_input_field.name(),col.name()) - } + let fixed_col = Column::new(col.name(), idx); Review Comment: FYI @berkaysynnada and @akurmustafa ########## datafusion/expr/src/logical_plan/builder.rs: ########## @@ -1468,19 +1468,37 @@ impl ValuesFields { } } +// `name_map` tracks a mapping between a field name and the number of appearances of that field. +// +// Some field names might already come to this function with the count (number of times it appeared) +// as a sufix e.g. id:1, so there's still a chance of name collisions, for example, +// if these three fields passed to this function: "col:1", "col" and "col", the function +// would rename them to -> col:1, col, col:1 causing a posteriror error when building the DFSchema. +// that's why we need the `seen` set, so the fields are always unique. +// pub fn change_redundant_column(fields: &Fields) -> Vec<Field> { let mut name_map = HashMap::new(); + let mut seen: HashSet<String> = HashSet::new(); + fields .into_iter() .map(|field| { - let counter = name_map.entry(field.name().to_string()).or_insert(0); - *counter += 1; - if *counter > 1 { - let new_name = format!("{}:{}", field.name(), *counter - 1); - Field::new(new_name, field.data_type().clone(), field.is_nullable()) - } else { - field.as_ref().clone() + let base_name = field.name(); + let count = name_map.entry(base_name.clone()).or_insert(0); + let mut new_name = base_name.clone(); Review Comment: I played around with trying to avoid this `clone`, but I could not come up with anything that was reasonable Since this function is only called when creating subqueries I think it is fine https://github.com/search?q=repo%3Aapache%2Fdatafusion%20change_redundant_column&type=code -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org