Re: [PR] Move Covariance (Population) covar_pop to be a User Defined Aggregate Function [datafusion]

via GitHub Thu, 09 May 2024 21:33:23 -0700


yyy1000 commented on code in PR #10418:
URL: https://github.com/apache/datafusion/pull/10418#discussion_r1596242627



##########
datafusion/physical-expr/src/aggregate/covariance.rs:
##########
@@ -319,281 +225,3 @@ impl Accumulator for CovarianceAccumulator {
         std::mem::size_of_val(self)
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::aggregate::utils::get_accum_scalar_values_as_arrays;
-    use crate::expressions::col;
-    use crate::expressions::tests::aggregate;
-    use crate::generic_test_op2;
-    use arrow::{array::*, datatypes::*};
-
-    #[test]
-    fn covariance_f64_1() -> Result<()> {
-        let a: ArrayRef = Arc::new(Float64Array::from(vec![1_f64, 2_f64, 
3_f64]));
-        let b: ArrayRef = Arc::new(Float64Array::from(vec![4_f64, 5_f64, 
6_f64]));
-
-        generic_test_op2!(
-            a,
-            b,
-            DataType::Float64,
-            DataType::Float64,
-            CovariancePop,
-            ScalarValue::from(0.6666666666666666_f64)
-        )
-    }
-
-    #[test]
-    fn covariance_f64_5() -> Result<()> {
-        let a: ArrayRef = Arc::new(Float64Array::from(vec![1.1_f64, 2_f64, 
3_f64]));
-        let b: ArrayRef = Arc::new(Float64Array::from(vec![4.1_f64, 5_f64, 
6_f64]));
-
-        generic_test_op2!(
-            a,
-            b,
-            DataType::Float64,
-            DataType::Float64,
-            CovariancePop,
-            ScalarValue::from(0.6022222222222223_f64)
-        )
-    }
-
-    #[test]
-    fn covariance_f64_6() -> Result<()> {
-        let a = Arc::new(Float64Array::from(vec![
-            1_f64, 2_f64, 3_f64, 1.1_f64, 2.2_f64, 3.3_f64,
-        ]));
-        let b = Arc::new(Float64Array::from(vec![
-            4_f64, 5_f64, 6_f64, 4.4_f64, 5.5_f64, 6.6_f64,
-        ]));
-
-        generic_test_op2!(
-            a,
-            b,
-            DataType::Float64,
-            DataType::Float64,
-            CovariancePop,
-            ScalarValue::from(0.7616666666666666_f64)
-        )
-    }
-
-    #[test]
-    fn covariance_i32() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
-        let b: ArrayRef = Arc::new(Int32Array::from(vec![4, 5, 6]));
-
-        generic_test_op2!(
-            a,
-            b,
-            DataType::Int32,
-            DataType::Int32,
-            CovariancePop,
-            ScalarValue::from(0.6666666666666666_f64)
-        )
-    }
-
-    #[test]
-    fn covariance_u32() -> Result<()> {
-        let a: ArrayRef = Arc::new(UInt32Array::from(vec![1_u32, 2_u32, 
3_u32]));
-        let b: ArrayRef = Arc::new(UInt32Array::from(vec![4_u32, 5_u32, 
6_u32]));
-        generic_test_op2!(
-            a,
-            b,
-            DataType::UInt32,
-            DataType::UInt32,
-            CovariancePop,
-            ScalarValue::from(0.6666666666666666_f64)
-        )
-    }
-
-    #[test]
-    fn covariance_f32() -> Result<()> {
-        let a: ArrayRef = Arc::new(Float32Array::from(vec![1_f32, 2_f32, 
3_f32]));
-        let b: ArrayRef = Arc::new(Float32Array::from(vec![4_f32, 5_f32, 
6_f32]));
-        generic_test_op2!(
-            a,
-            b,
-            DataType::Float32,
-            DataType::Float32,
-            CovariancePop,
-            ScalarValue::from(0.6666666666666666_f64)
-        )
-    }
-
-    #[test]
-    fn covariance_i32_with_nulls_1() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), None, 
Some(3)]));
-        let b: ArrayRef = Arc::new(Int32Array::from(vec![Some(4), None, 
Some(6)]));
-
-        generic_test_op2!(
-            a,
-            b,
-            DataType::Int32,
-            DataType::Int32,
-            CovariancePop,
-            ScalarValue::from(1_f64)
-        )
-    }
-
-    #[test]
-    fn covariance_i32_with_nulls_2() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            None,
-            Some(2),
-            None,
-            Some(3),
-            None,
-        ]));
-        let b: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(4),
-            Some(9),
-            Some(5),
-            Some(8),
-            Some(6),
-            None,
-        ]));
-
-        generic_test_op2!(
-            a,
-            b,
-            DataType::Int32,
-            DataType::Int32,
-            CovariancePop,
-            ScalarValue::from(0.6666666666666666_f64)
-        )
-    }
-
-    #[test]
-    fn covariance_pop_i32_all_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![None, None]));
-        let b: ArrayRef = Arc::new(Int32Array::from(vec![None, None]));
-
-        generic_test_op2!(
-            a,
-            b,
-            DataType::Int32,
-            DataType::Int32,
-            CovariancePop,
-            ScalarValue::Float64(None)
-        )
-    }
-
-    #[test]
-    fn covariance_pop_1_input() -> Result<()> {
-        let a: ArrayRef = Arc::new(Float64Array::from(vec![1_f64]));
-        let b: ArrayRef = Arc::new(Float64Array::from(vec![2_f64]));
-
-        generic_test_op2!(
-            a,
-            b,
-            DataType::Float64,
-            DataType::Float64,
-            CovariancePop,
-            ScalarValue::from(0_f64)
-        )
-    }
-
-    #[test]
-    fn covariance_f64_merge_1() -> Result<()> {
-        let a = Arc::new(Float64Array::from(vec![1_f64, 2_f64, 3_f64]));
-        let b = Arc::new(Float64Array::from(vec![4_f64, 5_f64, 6_f64]));
-        let c = Arc::new(Float64Array::from(vec![1.1_f64, 2.2_f64, 3.3_f64]));
-        let d = Arc::new(Float64Array::from(vec![4.4_f64, 5.5_f64, 6.6_f64]));
-
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Float64, true),
-            Field::new("b", DataType::Float64, true),
-        ]);
-
-        let batch1 = RecordBatch::try_new(Arc::new(schema.clone()), vec![a, 
b])?;
-        let batch2 = RecordBatch::try_new(Arc::new(schema.clone()), vec![c, 
d])?;
-
-        let agg1 = Arc::new(CovariancePop::new(
-            col("a", &schema)?,
-            col("b", &schema)?,
-            "bla".to_string(),
-            DataType::Float64,
-        ));
-
-        let agg2 = Arc::new(CovariancePop::new(
-            col("a", &schema)?,
-            col("b", &schema)?,
-            "bla".to_string(),
-            DataType::Float64,
-        ));
-
-        let actual = merge(&batch1, &batch2, agg1, agg2)?;
-        assert!(actual == ScalarValue::from(0.7616666666666666));
-
-        Ok(())
-    }
-
-    #[test]
-    fn covariance_f64_merge_2() -> Result<()> {
-        let a = Arc::new(Float64Array::from(vec![1_f64, 2_f64, 3_f64]));
-        let b = Arc::new(Float64Array::from(vec![4_f64, 5_f64, 6_f64]));
-        let c = Arc::new(Float64Array::from(vec![None]));
-        let d = Arc::new(Float64Array::from(vec![None]));
-
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Float64, true),
-            Field::new("b", DataType::Float64, true),
-        ]);
-
-        let batch1 = RecordBatch::try_new(Arc::new(schema.clone()), vec![a, 
b])?;
-        let batch2 = RecordBatch::try_new(Arc::new(schema.clone()), vec![c, 
d])?;
-
-        let agg1 = Arc::new(CovariancePop::new(
-            col("a", &schema)?,
-            col("b", &schema)?,
-            "bla".to_string(),
-            DataType::Float64,
-        ));
-
-        let agg2 = Arc::new(CovariancePop::new(
-            col("a", &schema)?,
-            col("b", &schema)?,
-            "bla".to_string(),
-            DataType::Float64,
-        ));
-
-        let actual = merge(&batch1, &batch2, agg1, agg2)?;
-        assert!(actual == ScalarValue::from(0.6666666666666666));
-
-        Ok(())
-    }
-
-    fn merge(
-        batch1: &RecordBatch,
-        batch2: &RecordBatch,
-        agg1: Arc<dyn AggregateExpr>,
-        agg2: Arc<dyn AggregateExpr>,
-    ) -> Result<ScalarValue> {
-        let mut accum1 = agg1.create_accumulator()?;
-        let mut accum2 = agg2.create_accumulator()?;
-        let expr1 = agg1.expressions();
-        let expr2 = agg2.expressions();
-
-        let values1 = expr1
-            .iter()
-            .map(|e| {
-                e.evaluate(batch1)
-                    .and_then(|v| v.into_array(batch1.num_rows()))
-            })
-            .collect::<Result<Vec<_>>>()?;
-        let values2 = expr2
-            .iter()
-            .map(|e| {
-                e.evaluate(batch2)
-                    .and_then(|v| v.into_array(batch2.num_rows()))
-            })
-            .collect::<Result<Vec<_>>>()?;
-        accum1.update_batch(&values1)?;
-        accum2.update_batch(&values2)?;
-        let state2 = get_accum_scalar_values_as_arrays(accum2.as_mut())?;
-        accum1.merge_batch(&state2)?;
-        accum1.evaluate()

Review Comment:
   I added all unit tests to sqllogictest excluding `covariance_f64_merge_1` 
and `covariance_f64_merge_2`
   Don't know how to mock these merge operations. 🥲



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Move Covariance (Population) covar_pop to be a User Defined Aggregate Function [datafusion]

Reply via email to