Chen-Yuan-Lai commented on code in PR #16324:
URL: https://github.com/apache/datafusion/pull/16324#discussion_r2140683717


##########
datafusion/core/tests/sql/explain_analyze.rs:
##########
@@ -52,43 +54,45 @@ async fn explain_analyze_baseline_metrics() {
     let formatted = arrow::util::pretty::pretty_format_batches(&results)
         .unwrap()
         .to_string();
+
     println!("Query Output:\n\n{formatted}");
 
-    assert_metrics!(
-        &formatted,
-        "AggregateExec: mode=Partial, gby=[]",
-        "metrics=[output_rows=3, elapsed_compute="
-    );
-    assert_metrics!(
-        &formatted,
-        "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1]",
-        "metrics=[output_rows=5, elapsed_compute="
-    );
-    assert_metrics!(
-        &formatted,
-        "FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434",
-        "metrics=[output_rows=99, elapsed_compute="
-    );
-    assert_metrics!(
-        &formatted,
-        "ProjectionExec: expr=[]",
-        "metrics=[output_rows=5, elapsed_compute="
-    );
-    assert_metrics!(
-        &formatted,
-        "CoalesceBatchesExec: target_batch_size=4096",
-        "metrics=[output_rows=5, elapsed_compute"
-    );
-    assert_metrics!(
-        &formatted,
-        "UnionExec",
-        "metrics=[output_rows=3, elapsed_compute="
-    );
-    assert_metrics!(
-        &formatted,
-        "WindowAggExec",
-        "metrics=[output_rows=1, elapsed_compute="
-    );
+    let re = Regex::new(r"\|[^|]*\|([^|]*)\|").unwrap();
+    let actual = formatted
+        .lines()
+        .map(|line| re.replace_all(line, "$1").trim_end().to_string())
+        .filter(|line| !line.is_empty() && !line.starts_with('+'))
+        .collect::<Vec<_>>()
+        .join("\n");
+    insta::with_settings!({filters => vec![
+    (r"\d+\.?\d*[µmn]?s", "[TIME]"),

Review Comment:
   Agree, current pattern can't match the longer time (ex. min, h, d). However, 
a broader pattern might match irrelevant strings. For example, match on `[ ]`:  
metrics=[output_rows=1, elapsed_compute=110.947µs] -> metrics=[TIME] 
   
   I'm currently considering to add more time units in the regex pattern:
   ```regex
   r"\d+\.?\d*(?:µs|ms|ns|s|min|h)\b"
   ```
   
   Or, do you have any suggestions for a more precise way to match time-related 
strings? many thanks.
   
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to