alamb commented on code in PR #14877: URL: https://github.com/apache/datafusion/pull/14877#discussion_r1970315221
########## datafusion-cli/src/exec.rs: ########## @@ -250,37 +245,43 @@ pub(super) async fn exec_and_print( } // As the input stream comes, we can generate results. // However, memory safety is not guaranteed. Review Comment: not related to this PR, but I don't understand this comment about Memory safety ########## datafusion-cli/src/print_format.rs: ########## @@ -539,6 +680,329 @@ mod tests { .run(); } + #[test] + fn test_compute_column_widths() { + let schema = three_column_schema(); + let batches = vec![three_column_batch()]; + let format = PrintFormat::Table; + let widths = format.compute_column_widths(&batches, schema).unwrap(); + assert_eq!(widths, vec![1, 1, 1]); + + let schema = one_column_schema(); + let batches = vec![one_column_batch()]; + let format = PrintFormat::Table; + let widths = format.compute_column_widths(&batches, schema).unwrap(); + assert_eq!(widths, vec![1]); + + let schema = three_column_schema(); + let batches = vec![three_column_batch_with_widths()]; + let format = PrintFormat::Table; + let widths = format.compute_column_widths(&batches, schema).unwrap(); + assert_eq!(widths, [7, 5, 6]); + } + + #[test] + fn test_print_header() { + let schema = three_column_schema(); + let widths = vec![1, 1, 1]; + let mut writer = Vec::new(); + let format = PrintFormat::Table; + format.print_header(&schema, &widths, &mut writer).unwrap(); + let expected = &["+---+---+---+", "| a | b | c |", "+---+---+---+"]; + let binding = String::from_utf8(writer.clone()).unwrap(); + let actual: Vec<_> = binding.trim_end().split('\n').collect(); + assert_eq!(actual, expected); + } + + #[test] + fn test_print_batch_with_same_widths() { + let batch = three_column_batch(); + let widths = vec![1, 1, 1]; + let mut writer = Vec::new(); + let format = PrintFormat::Table; + format + .print_batch_with_widths(&batch, &widths, &mut writer) + .unwrap(); + let expected = &["| 1 | 4 | 7 |", "| 2 | 5 | 8 |", "| 3 | 6 | 9 |"]; + let binding = String::from_utf8(writer.clone()).unwrap(); + let actual: Vec<_> = binding.trim_end().split('\n').collect(); + assert_eq!(actual, expected); + } + + #[test] + fn test_print_batch_with_different_widths() { + let batch = three_column_batch_with_widths(); + let widths = vec![7, 5, 6]; + let mut writer = Vec::new(); + let format = PrintFormat::Table; + format + .print_batch_with_widths(&batch, &widths, &mut writer) + .unwrap(); + let expected = &[ + "| 1 | 42222 | 7 |", + "| 2222222 | 5 | 8 |", + "| 3 | 6 | 922222 |", + ]; + let binding = String::from_utf8(writer.clone()).unwrap(); + let actual: Vec<_> = binding.trim_end().split('\n').collect(); + assert_eq!(actual, expected); + } + + #[test] + fn test_print_dotted_line() { + let widths = vec![1, 1, 1]; + let mut writer = Vec::new(); + let format = PrintFormat::Table; + format.print_dotted_line(&widths, &mut writer).unwrap(); + let expected = &["| . | . | . |"]; + let binding = String::from_utf8(writer.clone()).unwrap(); + let actual: Vec<_> = binding.trim_end().split('\n').collect(); + assert_eq!(actual, expected); + } + + #[test] + fn test_print_bottom_border() { + let widths = vec![1, 1, 1]; + let mut writer = Vec::new(); + let format = PrintFormat::Table; + format.print_bottom_border(&widths, &mut writer).unwrap(); + let expected = &["+---+---+---+"]; + let binding = String::from_utf8(writer.clone()).unwrap(); + let actual: Vec<_> = binding.trim_end().split('\n').collect(); + assert_eq!(actual, expected); + } + + #[test] + fn test_print_batches_with_maxrows() { + let batch = one_column_batch(); + let schema = one_column_schema(); + let format = PrintFormat::Table; + + // should print out entire output with no truncation if unlimited or + // limit greater than number of batches or equal to the number of batches + for max_rows in [MaxRows::Unlimited, MaxRows::Limited(5), MaxRows::Limited(3)] { + let mut writer = Vec::new(); + format + .print_batches( + &mut writer, + schema.clone(), + &[batch.clone()], + max_rows, + true, + ) + .unwrap(); + let expected = &[ + "+---+", "| a |", "+---+", "| 1 |", "| 2 |", "| 3 |", "+---+", + ]; + let binding = String::from_utf8(writer.clone()).unwrap(); + let actual: Vec<_> = binding.trim_end().split('\n').collect(); + assert_eq!(actual, expected); + } + + // should truncate output if limit is less than number of batches + let mut writer = Vec::new(); + format + .print_batches( + &mut writer, + schema.clone(), + &[batch.clone()], + MaxRows::Limited(1), + true, + ) + .unwrap(); + let expected = &[ + "+---+", "| a |", "+---+", "| 1 |", "| . |", "| . |", "| . |", "+---+", + ]; + let binding = String::from_utf8(writer.clone()).unwrap(); + let actual: Vec<_> = binding.trim_end().split('\n').collect(); + assert_eq!(actual, expected); + } + + // test print_batch with different batch widths + // and preview count is less than the first batch Review Comment: 👍 ########## datafusion-cli/src/print_format.rs: ########## @@ -209,6 +211,145 @@ impl PrintFormat { } Ok(()) } + + #[allow(clippy::too_many_arguments)] Review Comment: most of the newly added functions don't use fields on `PrintFormat` I think we could probably avoid the ```rust #[allow(clippy::too_many_arguments)] ``` By encapsulating all the `mut` variables here in a new struct Maybe something like ```rust struct OutputStreamState { preview_batches: Vec<RecordBatch>, preview_row_count: usize, preview_limit: usize, precomputed_widths: Option<Vec<usize>>, header_printed: bool, writer: &mut dyn std::io::Write, } impl OutputStreamState { fn process_batch(&mut self, batch: RecordBatch) { ... } ... fn compute_column_widths(&self, batches: &Vec<RecordBatch>, schema: SchemaRef, ) {...} } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org