alamb commented on code in PR #14439: URL: https://github.com/apache/datafusion/pull/14439#discussion_r1943361006
########## datafusion/common/src/error.rs: ########## @@ -334,6 +343,14 @@ impl Error for DataFusionError { DataFusionError::Context(_, e) => Some(e.as_ref()), DataFusionError::Substrait(_) => None, DataFusionError::Diagnostic(_, e) => Some(e.as_ref()), + // Can't really make a Collection fit into the mold of "an error has Review Comment: 👍 ########## datafusion/sql/src/select.rs: ########## @@ -574,10 +574,19 @@ impl<S: ContextProvider> SqlToRel<'_, S> { empty_from: bool, planner_context: &mut PlannerContext, ) -> Result<Vec<Expr>> { - projection - .into_iter() - .map(|expr| self.sql_select_to_rex(expr, plan, empty_from, planner_context)) - .collect::<Result<Vec<Expr>>>() + let mut prepared_select_exprs = vec![]; + let mut errors = vec![]; + for expr in projection { + match self.sql_select_to_rex(expr, plan, empty_from, planner_context) { + Ok(expr) => prepared_select_exprs.push(expr), + Err(err) => errors.push(err), + } + } + if !errors.is_empty() { Review Comment: It would be nice to only wrap with a DataFusionError::Collection if this actually more than one error Maybe you could make some sort of builder like ```rust let mut builder = DataFusionErrorBuilder::new(); ... let builder = builder.add_error(err) ... builder.build()?; // returns Err(DataFusionError if any have been pushed) Ok(prepared_select_exprs) ``` And handle the unwapping there ########## datafusion/sql/tests/cases/collection.rs: ########## @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_common::DataFusionError; +use datafusion_sql::planner::SqlToRel; +use sqlparser::{dialect::GenericDialect, parser::Parser}; + +use crate::{MockContextProvider, MockSessionState}; + +fn do_query(sql: &'static str) -> DataFusionError { + let dialect = GenericDialect {}; + let statement = Parser::new(&dialect) + .try_with_sql(sql) + .expect("unable to create parser") + .parse_statement() + .expect("unable to parse query"); + let state = MockSessionState::default(); + let context = MockContextProvider { state }; + let sql_to_rel = SqlToRel::new(&context); + sql_to_rel + .sql_statement_to_plan(statement) + .expect_err("expected error") +} + +#[test] +fn test_collect_select_items() { + let query = "SELECT first_namex, last_namex FROM person"; + let error = do_query(query); + let errors = error.iter().collect::<Vec<_>>(); + assert_eq!(errors.len(), 2); + assert!(errors[0] Review Comment: DFWIW there is also `assert_contains!` (that makes a nicer error message on failure) ########## datafusion/common/src/error.rs: ########## @@ -540,6 +570,37 @@ impl DataFusionError { DiagnosticsIterator { head: self }.next() } + + /// Sometimes DataFusion is able to collect multiple errors in a SQL query + /// before terminating, e.g. across different expressions in a SELECT + /// statements or different sides of a UNION. This method returns an + /// iterator over all the errors in the collection. + /// + /// For this to work, the top-level error must be a + /// `DataFusionError::Collection`, not something that contains it. + pub fn iter(&self) -> impl Iterator<Item = &DataFusionError> { + struct ErrorIterator<'a> { + queue: VecDeque<&'a DataFusionError>, + } + + impl<'a> Iterator for ErrorIterator<'a> { + type Item = &'a DataFusionError; + + fn next(&mut self) -> Option<Self::Item> { + loop { + let popped = self.queue.pop_front()?; + match popped { + DataFusionError::Collection(errs) => self.queue.extend(errs), + _ => return Some(popped), + } + } + } + } + + let mut queue = VecDeque::new(); + queue.push_back(self); + ErrorIterator { queue } + } Review Comment: I think you can implement the same thing more simply via ```rust pub fn iter(&self) -> impl Iterator<Item = &DataFusionError> { let mut current_err = self; let errors: Vec<_> = match self { DataFusionError::Collection(errs) => errs.iter().collect(), _ => vec![self], }; errors.into_iter() } ``` That doesn't handle recursive Collections but I think that is ok ########## datafusion/common/src/error.rs: ########## @@ -425,29 +442,38 @@ impl DataFusionError { "".to_owned() } - fn error_prefix(&self) -> &'static str { + fn error_prefix(&self) -> Cow<str> { match self { - DataFusionError::ArrowError(_, _) => "Arrow error: ", + DataFusionError::ArrowError(_, _) => Cow::Borrowed("Arrow error: "), #[cfg(feature = "parquet")] - DataFusionError::ParquetError(_) => "Parquet error: ", + DataFusionError::ParquetError(_) => Cow::Borrowed("Parquet error: "), #[cfg(feature = "avro")] - DataFusionError::AvroError(_) => "Avro error: ", + DataFusionError::AvroError(_) => Cow::Borrowed("Avro error: "), #[cfg(feature = "object_store")] - DataFusionError::ObjectStore(_) => "Object Store error: ", - DataFusionError::IoError(_) => "IO error: ", - DataFusionError::SQL(_, _) => "SQL error: ", - DataFusionError::NotImplemented(_) => "This feature is not implemented: ", - DataFusionError::Internal(_) => "Internal error: ", - DataFusionError::Plan(_) => "Error during planning: ", - DataFusionError::Configuration(_) => "Invalid or Unsupported Configuration: ", - DataFusionError::SchemaError(_, _) => "Schema error: ", - DataFusionError::Execution(_) => "Execution error: ", - DataFusionError::ExecutionJoin(_) => "ExecutionJoin error: ", - DataFusionError::ResourcesExhausted(_) => "Resources exhausted: ", - DataFusionError::External(_) => "External error: ", - DataFusionError::Context(_, _) => "", - DataFusionError::Substrait(_) => "Substrait error: ", - DataFusionError::Diagnostic(_, _) => "", + DataFusionError::ObjectStore(_) => Cow::Borrowed("Object Store error: "), + DataFusionError::IoError(_) => Cow::Borrowed("IO error: "), + DataFusionError::SQL(_, _) => Cow::Borrowed("SQL error: "), + DataFusionError::NotImplemented(_) => { + Cow::Borrowed("This feature is not implemented: ") + } + DataFusionError::Internal(_) => Cow::Borrowed("Internal error: "), + DataFusionError::Plan(_) => Cow::Borrowed("Error during planning: "), + DataFusionError::Configuration(_) => { + Cow::Borrowed("Invalid or Unsupported Configuration: ") + } + DataFusionError::SchemaError(_, _) => Cow::Borrowed("Schema error: "), + DataFusionError::Execution(_) => Cow::Borrowed("Execution error: "), + DataFusionError::ExecutionJoin(_) => Cow::Borrowed("ExecutionJoin error: "), + DataFusionError::ResourcesExhausted(_) => { + Cow::Borrowed("Resources exhausted: ") + } + DataFusionError::External(_) => Cow::Borrowed("External error: "), + DataFusionError::Context(_, _) => Cow::Borrowed(""), + DataFusionError::Substrait(_) => Cow::Borrowed("Substrait error: "), + DataFusionError::Diagnostic(_, _) => Cow::Borrowed(""), + DataFusionError::Collection(errs) => { + Cow::Owned(format!("{} errors, first error: {}", errs.len(), errs.first().expect("cannot construct DataFusionError::Collection with 0 errors, but got one such case").error_prefix())) Review Comment: We could reduce the size of this diff by simply returning the first error's prefix here too (as done below). I am not sure how much benefit a user would get also seeing the number of errors in addition to the first one -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org