alamb commented on code in PR #15367: URL: https://github.com/apache/datafusion/pull/15367#discussion_r2012933341
########## datafusion/core/benches/distinct_query_sql.rs: ########## @@ -144,59 +141,50 @@ pub async fn create_context_sampled_data( } fn criterion_benchmark_limited_distinct_sampled(c: &mut Criterion) { - let rt = Runtime::new().unwrap(); - let limit = 10; let partitions = 100; let samples = 100_000; let sql = format!("select DISTINCT trace_id from traces group by trace_id limit {limit};"); - - let distinct_trace_id_100_partitions_100_000_samples_limit_100 = rt.block_on(async { - create_context_sampled_data(sql.as_str(), partitions, samples) - .await - .unwrap() - }); - c.bench_function( format!("distinct query with {} partitions and {} samples per partition with limit {}", partitions, samples, limit).as_str(), - |b| b.iter(|| run(distinct_trace_id_100_partitions_100_000_samples_limit_100.0.clone(), - distinct_trace_id_100_partitions_100_000_samples_limit_100.1.clone())), + |b| b.iter(|| { + let rt = Runtime::new().unwrap(); Review Comment: I think this means that the benchmark will include the time to create each tokio runtime (with a bunch of threads, etc) To avoid this I think you can create the runtime once, and then use it for each tieration: ```rist let rt = Runtime::new().unwrap(); c.bench_function( format!("distinct query with {} partitions and {} samples per partition with limit {}", partitions, samples, limit).as_str(), |b| b.iter(|| { ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org