jayzhan211 commented on code in PR #15268: URL: https://github.com/apache/datafusion/pull/15268#discussion_r2000156746
########## datafusion/datasource-parquet/src/file_format.rs: ########## @@ -465,7 +465,116 @@ impl FileFormat for ParquetFormat { } } +/// Apply necessary schema type coercions to make file schema match table schema. +/// +/// This function performs two main types of transformations in a single pass: +/// 1. Binary types to string types conversion - Converts binary data types to their +/// corresponding string types when the table schema expects string data +/// 2. Regular to view types conversion - Converts standard string/binary types to +/// view types when the table schema uses view types +/// +/// # Arguments +/// * `table_schema` - The table schema containing the desired types +/// * `file_schema` - The file schema to be transformed +/// +/// # Returns +/// * `Some(Schema)` - If any transformations were applied, returns the transformed schema +/// * `None` - If no transformations were needed +pub fn apply_file_schema_type_coercions( + table_schema: &Schema, + file_schema: &Schema, +) -> Option<Schema> { + let mut needs_view_transform = false; + let mut needs_string_transform = false; + + // Create a mapping of table field names to their data types for fast lookup + // and simultaneously check if we need any transformations + let table_fields: HashMap<_, _> = table_schema + .fields() + .iter() + .map(|f| { + let dt = f.data_type(); + // Check if we need view type transformation + if dt.equals_datatype(&DataType::Utf8View) Review Comment: `equals_datatype` is used for nested type, in this case `matches!()` is enough -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org