alamb commented on code in PR #16232:
URL: https://github.com/apache/datafusion/pull/16232#discussion_r2126344160


##########
datafusion/core/tests/fuzz_cases/record_batch_generator.rs:
##########
@@ -656,11 +713,110 @@ impl RecordBatchGenerator {
                     self,
                     num_rows,
                     max_num_distinct,
+                    null_pct,
                     batch_gen_rng,
                     array_gen_rng,
                     BooleanType
                 }
             }
+            DataType::Dictionary(ref key_type, ref value_type)
+                if key_type.is_dictionary_key_type() =>
+            {
+                // We generate just num_distinct values because they will be 
reused by different keys
+                let mut array_gen_rng = array_gen_rng;
+
+                let values = Self::generate_array_of_type_inner(
+                    &ColumnDescr::new("values", *value_type.clone()),
+                    num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng.clone(),
+                    0.0, // null values are generated on the key level
+                );
+
+                match key_type.as_ref() {
+                    DataType::Int8 => generate_dict!(
+                        self,
+                        num_rows,
+                        num_distinct,
+                        null_pct,
+                        batch_gen_rng,
+                        array_gen_rng,
+                        Int8Type,
+                        values
+                    ),
+                    DataType::Int16 => generate_dict!(
+                        self,
+                        num_rows,
+                        num_distinct,
+                        null_pct,
+                        batch_gen_rng,
+                        array_gen_rng,
+                        Int16Type,
+                        values
+                    ),
+                    DataType::Int32 => generate_dict!(
+                        self,
+                        num_rows,
+                        num_distinct,
+                        null_pct,
+                        batch_gen_rng,
+                        array_gen_rng,
+                        Int32Type,
+                        values
+                    ),
+                    DataType::Int64 => generate_dict!(
+                        self,
+                        num_rows,
+                        num_distinct,
+                        null_pct,
+                        batch_gen_rng,
+                        array_gen_rng,
+                        Int64Type,
+                        values
+                    ),
+                    DataType::UInt8 => generate_dict!(
+                        self,
+                        num_rows,
+                        num_distinct,
+                        null_pct,
+                        batch_gen_rng,
+                        array_gen_rng,
+                        UInt8Type,
+                        values
+                    ),
+                    DataType::UInt16 => generate_dict!(
+                        self,
+                        num_rows,
+                        num_distinct,
+                        null_pct,
+                        batch_gen_rng,
+                        array_gen_rng,
+                        UInt16Type,
+                        values
+                    ),
+                    DataType::UInt32 => generate_dict!(
+                        self,
+                        num_rows,
+                        num_distinct,
+                        null_pct,
+                        batch_gen_rng,
+                        array_gen_rng,
+                        UInt32Type,
+                        values
+                    ),
+                    DataType::UInt64 => generate_dict!(

Review Comment:
   I think technically since only `Dictionary(UInt64, Utf8)` is made, we can 
avoid all the other cases in this `match` statement and save on codegen



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to