BlakeOrth commented on code in PR #18085:
URL: https://github.com/apache/datafusion/pull/18085#discussion_r2434171476
##########
datafusion-cli/src/object_storage/instrumented.rs:
##########
@@ -252,21 +265,109 @@ impl fmt::Display for RequestDetails {
}
}
-/// Summary statistics for an [`InstrumentedObjectStore`]'s [`RequestDetails`]
+/// Summary statistics for all requests recorded in an
[`InstrumentedObjectStore`]
#[derive(Default)]
-pub struct RequestSummary {
- count: usize,
- duration_stats: Option<Stats<Duration>>,
- size_stats: Option<Stats<usize>>,
+pub struct RequestSummaries {
+ summaries: Vec<RequestSummary>,
}
-impl RequestSummary {
- /// Generates a set of [RequestSummaries](RequestSummary) from the input
[`RequestDetails`]
- /// grouped by the input's [`Operation`]
- pub fn summarize_by_operation(
- requests: &[RequestDetails],
- ) -> HashMap<Operation, Self> {
- let mut summaries: HashMap<Operation, Self> = HashMap::new();
+/// Display the summary as a table
+impl Display for RequestSummaries {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result {
+ // Format it like
+ //
+------------+-----------+-----------+-----------+-----------+---------+
+ // | Operation | Metric | min | max | avg |
sum |
+ //
+------------+-----------+-----------+-----------+-----------+---------+
+ // | GET | duration | 0.030742s | 0.033060s | 0.031901s | N/A
|
+ // | GET | size | 8 B | 34322 B | 17165 B |
34330 B |
+ //
+------------+-----------+-----------+-----------+-----------+---------+
+
+ let operations: StringArray = self
+ .summaries
+ .iter()
+ .flat_map(|s| std::iter::repeat_n(Some(s.operation.to_string()),
2))
+ .collect();
+
+ let metrics: StringArray = std::iter::repeat([Some("duration"),
Some("size")])
+ .flatten()
+ .take(self.summaries.len() * 2)
+ .collect();
+ let mins: StringArray = self
+ .summaries
+ .iter()
+ .flat_map(|s| {
+ let dur_min =
s.duration_stats.as_ref().map_or("N/A".to_string(), |d| {
+ format!("{:.6}s", d.min.as_secs_f32())
+ });
+ let size_min = s
+ .size_stats
+ .as_ref()
+ .map_or("N/A".to_string(), |s| format!("{} B", s.min));
+ vec![Some(dur_min), Some(size_min)]
+ })
+ .collect();
+ let maxs: StringArray = self
+ .summaries
+ .iter()
+ .flat_map(|s| {
+ let dur_max =
s.duration_stats.as_ref().map_or("N/A".to_string(), |d| {
+ format!("{:.6}s", d.max.as_secs_f32())
+ });
+ let size_max = s
+ .size_stats
+ .as_ref()
+ .map_or("N/A".to_string(), |s| format!("{} B", s.max));
+ vec![Some(dur_max), Some(size_max)]
+ })
+ .collect();
+ let avgs: StringArray = self
+ .summaries
+ .iter()
+ .flat_map(|s| {
+ let count = s.count as f32;
+ let dur_avg =
s.duration_stats.as_ref().map_or("N/A".to_string(), |d| {
+ let avg = d.sum.as_secs_f32() / count;
+ format!("{:.6}s", avg)
+ });
+ let size_avg = s.size_stats.as_ref().map_or("N/A".to_string(),
|s| {
+ let avg = s.sum as f32 / count;
+ format!("{} B", avg)
+ });
+ vec![Some(dur_avg), Some(size_avg)]
+ })
+ .collect();
+ let sums: StringArray = self
+ .summaries
+ .iter()
+ .flat_map(|s| {
+ let dur_sum =
s.duration_stats.as_ref().map_or("N/A".to_string(), |d| {
+ format!("{:.6}s", d.sum.as_secs_f32())
Review Comment:
I specifically omitted a sum stat for `duration` in the initial
implementation because I'm afraid it can be a bit misleading (at least at first
glance). In my initial testing I found that for particularly large queries the
sum of the durations was often larger than the total time of the query itself,
which initially doesn't make much sense. The number for the sum is technically
accurate, in the sense that it's the total "real" time across all CPU cores
that participated in the IO operations, but communicating that technicality in
a concise output like this is difficult.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]