alamb commented on code in PR #12667:
URL: https://github.com/apache/datafusion/pull/12667#discussion_r1792503168


##########
datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs:
##########
@@ -44,6 +44,307 @@ use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
 use tokio::task::JoinSet;
 
+use crate::fuzz_cases::aggregation_fuzzer::{
+    AggregationFuzzerBuilder, ColumnDescr, DatasetGeneratorConfig,
+};
+
+// ========================================================================
+//  The new aggregation fuzz tests based on [`AggregationFuzzer`]
+// ========================================================================
+
+// TODO: write more test case to cover more `group by`s and `aggregation 
function`s
+// TODO: maybe we can use macro to simply the case creating
+
+/// Fuzz test for `basic prim aggr(sum/sum distinct/max/min/count/avg)` + `no 
group by`
+#[tokio::test(flavor = "multi_thread")]
+async fn test_basic_prim_aggr_no_group() {
+    let builder = AggregationFuzzerBuilder::default();
+
+    // Define data generator config
+    let columns = vec![ColumnDescr::new("a", DataType::Int32)];
+
+    let data_gen_config = DatasetGeneratorConfig {
+        columns,
+        rows_num_range: (512, 1024),
+        sort_keys_set: Vec::new(),
+    };
+
+    // Build fuzzer
+    let fuzzer = builder
+        .data_gen_config(data_gen_config)
+        .data_gen_rounds(16)
+        .add_sql("SELECT sum(a) FROM fuzz_table")
+        .add_sql("SELECT sum(distinct a) FROM fuzz_table")
+        .add_sql("SELECT max(a) FROM fuzz_table")
+        .add_sql("SELECT min(a) FROM fuzz_table")
+        .add_sql("SELECT count(a) FROM fuzz_table")
+        .add_sql("SELECT count(distinct a) FROM fuzz_table")
+        .add_sql("SELECT avg(a) FROM fuzz_table")
+        .table_name("fuzz_table")
+        .build();
+
+    fuzzer.run().await;
+}
+
+/// Fuzz test for `basic prim aggr(sum/sum distinct/max/min/count/avg)` + 
`group by single int64`
+#[tokio::test(flavor = "multi_thread")]
+async fn test_basic_prim_aggr_group_by_single_int64() {
+    let builder = AggregationFuzzerBuilder::default();
+
+    // Define data generator config
+    let columns = vec![
+        ColumnDescr::new("a", DataType::Int32),
+        ColumnDescr::new("b", DataType::Int64),
+        ColumnDescr::new("c", DataType::Int64),
+    ];
+    let sort_keys_set = vec![
+        vec!["b".to_string()],
+        vec!["c".to_string(), "b".to_string()],
+    ];
+    let data_gen_config = DatasetGeneratorConfig {
+        columns,
+        rows_num_range: (512, 1024),
+        sort_keys_set,
+    };
+
+    // Build fuzzer
+    let fuzzer = builder

Review Comment:
   this is so great



##########
datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs:
##########
@@ -44,6 +44,118 @@ use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
 use tokio::task::JoinSet;
 
+use crate::fuzz_cases::aggregation_fuzzer::{
+    AggregationFuzzerBuilder, ColumnDescr, DatasetGeneratorConfig,
+};
+
+// ========================================================================
+//  The new aggregation fuzz tests based on [`AggregationFuzzer`]
+// ========================================================================
+
+// TODO: write more test case to cover more `group by`s and `aggregation 
function`s
+// TODO: maybe we can use macro to simply the case creating
+
+/// Fuzz test for group by `single int64`
+#[tokio::test(flavor = "multi_thread")]
+async fn test_group_by_single_int64() {
+    let builder = AggregationFuzzerBuilder::default();
+
+    // Define data generator config
+    let columns = vec![
+        ColumnDescr::new("a", DataType::Int64),
+        ColumnDescr::new("b", DataType::Int64),
+        ColumnDescr::new("c", DataType::Int64),
+    ];
+    let sort_keys_set = vec![
+        vec!["b".to_string()],
+        vec!["c".to_string(), "b".to_string()],
+    ];
+    let data_gen_config = DatasetGeneratorConfig {
+        columns,
+        rows_num_range: (512, 1024),
+        sort_keys_set,
+    };
+
+    // Build fuzzer
+    let fuzzer = builder
+        .data_gen_config(data_gen_config)
+        .data_gen_rounds(20)
+        .sql("SELECT b, sum(a) FROM fuzz_table GROUP BY b")
+        .table_name("fuzz_table")
+        .build();
+
+    fuzzer.run().await;
+}
+
+/// Fuzz test for group by `single string`
+#[tokio::test(flavor = "multi_thread")]
+async fn test_group_by_single_string() {
+    let builder = AggregationFuzzerBuilder::default();
+
+    // Define data generator config
+    let columns = vec![
+        ColumnDescr::new("a", DataType::Int64),
+        ColumnDescr::new("b", DataType::Utf8),
+        ColumnDescr::new("c", DataType::Int64),
+    ];
+    let sort_keys_set = vec![
+        vec!["b".to_string()],
+        vec!["c".to_string(), "b".to_string()],
+    ];
+    let data_gen_config = DatasetGeneratorConfig {
+        columns,
+        rows_num_range: (512, 1024),
+        sort_keys_set,
+    };
+
+    // Build fuzzer
+    let fuzzer = builder
+        .data_gen_config(data_gen_config)
+        .data_gen_rounds(20)
+        .sql("SELECT b, sum(a) FROM fuzz_table GROUP BY b")
+        .table_name("fuzz_table")
+        .build();
+
+    fuzzer.run().await;
+}
+
+/// Fuzz test for group by `sting + int64`
+#[tokio::test(flavor = "multi_thread")]

Review Comment:
   I meant like 
   
   ```
   #[tokio::test]
   async fn test_basic_string_aggr_group_by_mixed_string_int64_1() {
   ...
   }
   
   #[tokio::test]
   async fn test_basic_string_aggr_group_by_mixed_string_int64_2() {
   ...
   }
   ```
   
   Rather than a single test that was multi-threaded 
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to