(datafusion) branch main updated: chore: Add microbenchmark (compared to ExprOrExpr) (#20076)

github-bot Fri, 30 Jan 2026 11:08:06 -0800

This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new b50e1f755b chore: Add microbenchmark (compared to ExprOrExpr) (#20076)
b50e1f755b is described below

commit b50e1f755b729495a98c24af0d23e5582976f46d
Author: Yu-Chuan Hung <[email protected]>
AuthorDate: Sat Jan 31 03:07:37 2026 +0800

    chore: Add microbenchmark (compared to ExprOrExpr) (#20076)
    
    ## Which issue does this PR close?
    
    Related to #19994 - This PR extracts the benchmark code to allow
    performance comparison.
    
    ## Rationale for this change
    
    As pointed out by @alamb in #19994, this separates the microbenchmark
    code so that the benchmarking scripts can compare the optimization PR
    against main with the benchmark already in place.
    
    ## What changes are included in this PR?
    
    Adds a microbenchmark for the divide-by-zero protection pattern in
    `case_when.rs`:
    - Benchmarks with varying percentages of zeros (0%, 10%, 50%, 90%)
    - Compares `DivideByZeroProtection` pattern (where checked column
    matches divisor) vs `ExpressionOrExpression` fallback (where they don't
    match)
    
    ## Are these changes tested?
    
    benchmark code only.
    
    ## Are there any user-facing changes?
    
    No.
---
 datafusion/physical-expr/benches/case_when.rs | 109 ++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)

diff --git a/datafusion/physical-expr/benches/case_when.rs 
b/datafusion/physical-expr/benches/case_when.rs
index eb0886a31e..d9b1b56572 100644
--- a/datafusion/physical-expr/benches/case_when.rs
+++ b/datafusion/physical-expr/benches/case_when.rs
@@ -20,6 +20,7 @@ use arrow::datatypes::{ArrowNativeTypeOp, Field, Schema};
 use arrow::record_batch::RecordBatch;
 use arrow::util::test_util::seedable_rng;
 use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
+use datafusion_common::ScalarValue;
 use datafusion_expr::Operator;
 use datafusion_physical_expr::expressions::{BinaryExpr, case, col, lit};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
@@ -93,6 +94,7 @@ fn criterion_benchmark(c: &mut Criterion) {
     run_benchmarks(c, &make_batch(8192, 100));
 
     benchmark_lookup_table_case_when(c, 8192);
+    benchmark_divide_by_zero_protection(c, 8192);
 }
 
 fn run_benchmarks(c: &mut Criterion, batch: &RecordBatch) {
@@ -517,5 +519,112 @@ fn benchmark_lookup_table_case_when(c: &mut Criterion, 
batch_size: usize) {
     }
 }
 
+fn benchmark_divide_by_zero_protection(c: &mut Criterion, batch_size: usize) {
+    let mut group = c.benchmark_group("divide_by_zero_protection");
+
+    for zero_percentage in [0.0, 0.1, 0.5, 0.9] {
+        let rng = &mut seedable_rng();
+
+        let numerator: Int32Array =
+            (0..batch_size).map(|_| Some(rng.random::<i32>())).collect();
+
+        let divisor_values: Vec<Option<i32>> = (0..batch_size)
+            .map(|_| {
+                let roll: f32 = rng.random();
+                if roll < zero_percentage {
+                    Some(0)
+                } else {
+                    let mut val = rng.random::<i32>();
+                    while val == 0 {
+                        val = rng.random::<i32>();
+                    }
+                    Some(val)
+                }
+            })
+            .collect();
+
+        let divisor: Int32Array = divisor_values.iter().cloned().collect();
+        let divisor_copy: Int32Array = 
divisor_values.iter().cloned().collect();
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("numerator", numerator.data_type().clone(), true),
+            Field::new("divisor", divisor.data_type().clone(), true),
+            Field::new("divisor_copy", divisor_copy.data_type().clone(), true),
+        ]));
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(numerator),
+                Arc::new(divisor),
+                Arc::new(divisor_copy),
+            ],
+        )
+        .unwrap();
+
+        let numerator_col = col("numerator", &batch.schema()).unwrap();
+        let divisor_col = col("divisor", &batch.schema()).unwrap();
+        let divisor_copy_col = col("divisor_copy", &batch.schema()).unwrap();
+
+        // DivideByZeroProtection: WHEN condition checks `divisor_col > 0` and 
division
+        // uses `divisor_col` as divisor. Since the checked column matches the 
divisor,
+        // this triggers the DivideByZeroProtection optimization.
+        group.bench_function(
+            format!(
+                "{} rows, {}% zeros: DivideByZeroProtection",
+                batch_size,
+                (zero_percentage * 100.0) as i32
+            ),
+            |b| {
+                let when = Arc::new(BinaryExpr::new(
+                    Arc::clone(&divisor_col),
+                    Operator::Gt,
+                    lit(0i32),
+                ));
+                let then = Arc::new(BinaryExpr::new(
+                    Arc::clone(&numerator_col),
+                    Operator::Divide,
+                    Arc::clone(&divisor_col),
+                ));
+                let else_null: Arc<dyn PhysicalExpr> = 
lit(ScalarValue::Int32(None));
+                let expr =
+                    Arc::new(case(None, vec![(when, then)], 
Some(else_null)).unwrap());
+
+                b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+            },
+        );
+
+        // ExpressionOrExpression: WHEN condition checks `divisor_copy_col > 
0` but
+        // division uses `divisor_col` as divisor. Since the checked column 
does NOT
+        // match the divisor, this falls back to ExpressionOrExpression 
evaluation.
+        group.bench_function(
+            format!(
+                "{} rows, {}% zeros: ExpressionOrExpression",
+                batch_size,
+                (zero_percentage * 100.0) as i32
+            ),
+            |b| {
+                let when = Arc::new(BinaryExpr::new(
+                    Arc::clone(&divisor_copy_col),
+                    Operator::Gt,
+                    lit(0i32),
+                ));
+                let then = Arc::new(BinaryExpr::new(
+                    Arc::clone(&numerator_col),
+                    Operator::Divide,
+                    Arc::clone(&divisor_col),
+                ));
+                let else_null: Arc<dyn PhysicalExpr> = 
lit(ScalarValue::Int32(None));
+                let expr =
+                    Arc::new(case(None, vec![(when, then)], 
Some(else_null)).unwrap());
+
+                b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+            },
+        );
+    }
+
+    group.finish();
+}
+
 criterion_group!(benches, criterion_benchmark);
 criterion_main!(benches);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion) branch main updated: chore: Add microbenchmark (compared to ExprOrExpr) (#20076)

Reply via email to