zhuqi-lucas commented on code in PR #15239: URL: https://github.com/apache/datafusion/pull/15239#discussion_r2032837806
########## datafusion/common/src/dfschema.rs: ########## @@ -604,26 +605,57 @@ impl DFSchema { }) } - /// Returns true if the two schemas have the same qualified named - /// fields with the same data types. Returns false otherwise. + #[deprecated(since = "47.0.0", note = "Use has_equivalent_names_and_types` instead")] + pub fn equivalent_names_and_types(&self, other: &Self) -> bool { + self.has_equivalent_names_and_types(other).is_ok() + } + + /// Returns Ok if the two schemas have the same qualified named + /// fields with the compatible data types. /// - /// This is a specialized version of Eq that ignores differences - /// in nullability and metadata. + /// Returns an `Err` with a message otherwise. + /// + /// This is a specialized version of Eq that ignores differences in + /// nullability and metadata. /// /// Use [DFSchema]::logically_equivalent_names_and_types for a weaker /// logical type checking, which for example would consider a dictionary /// encoded UTF8 array to be equivalent to a plain UTF8 array. - pub fn equivalent_names_and_types(&self, other: &Self) -> bool { + pub fn has_equivalent_names_and_types(&self, other: &Self) -> Result<()> { + // case 1 : schema length mismatch if self.fields().len() != other.fields().len() { - return false; + _plan_err!( + "Schema mismatch: the schema length are not same \ + Expected schema length: {}, got: {}", + self.fields().len(), + other.fields().len() + ) + } else { + // case 2 : schema length match, but fields mismatch + // check if the fields name are the same and have the same data types + self.fields() + .iter() + .zip(other.fields().iter()) + .try_for_each(|(f1, f2)| { + if f1.name() != f2.name() + || (!DFSchema::datatype_is_semantically_equal( + f1.data_type(), + f2.data_type(), + ) && !can_cast_types(f2.data_type(), f1.data_type())) Review Comment: Thank you @Blizzara for testing and feedback, i am sorry for the broken Substrait consumer which i was not aware. I will help review when you submit the fix. > @zhuqi-lucas this addition of "can_cast_types" broken Substrait consumer (since we use this logic to decide if we _should_ cast things to get names to match - this now always passes). > > But also overall using `can_cast_types` here doesn't feel right. Can_cast_types is very lenient. I think what you'd want to do is to allow Utf8 -> Utf8View in `logically_equivalent_names_and_types` and switch the call site you're interested to use that one. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org