alamb commented on code in PR #16232: URL: https://github.com/apache/datafusion/pull/16232#discussion_r2126344160
########## datafusion/core/tests/fuzz_cases/record_batch_generator.rs: ########## @@ -656,11 +713,110 @@ impl RecordBatchGenerator { self, num_rows, max_num_distinct, + null_pct, batch_gen_rng, array_gen_rng, BooleanType } } + DataType::Dictionary(ref key_type, ref value_type) + if key_type.is_dictionary_key_type() => + { + // We generate just num_distinct values because they will be reused by different keys + let mut array_gen_rng = array_gen_rng; + + let values = Self::generate_array_of_type_inner( + &ColumnDescr::new("values", *value_type.clone()), + num_distinct, + batch_gen_rng, + array_gen_rng.clone(), + 0.0, // null values are generated on the key level + ); + + match key_type.as_ref() { + DataType::Int8 => generate_dict!( + self, + num_rows, + num_distinct, + null_pct, + batch_gen_rng, + array_gen_rng, + Int8Type, + values + ), + DataType::Int16 => generate_dict!( + self, + num_rows, + num_distinct, + null_pct, + batch_gen_rng, + array_gen_rng, + Int16Type, + values + ), + DataType::Int32 => generate_dict!( + self, + num_rows, + num_distinct, + null_pct, + batch_gen_rng, + array_gen_rng, + Int32Type, + values + ), + DataType::Int64 => generate_dict!( + self, + num_rows, + num_distinct, + null_pct, + batch_gen_rng, + array_gen_rng, + Int64Type, + values + ), + DataType::UInt8 => generate_dict!( + self, + num_rows, + num_distinct, + null_pct, + batch_gen_rng, + array_gen_rng, + UInt8Type, + values + ), + DataType::UInt16 => generate_dict!( + self, + num_rows, + num_distinct, + null_pct, + batch_gen_rng, + array_gen_rng, + UInt16Type, + values + ), + DataType::UInt32 => generate_dict!( + self, + num_rows, + num_distinct, + null_pct, + batch_gen_rng, + array_gen_rng, + UInt32Type, + values + ), + DataType::UInt64 => generate_dict!( Review Comment: I think technically since only `Dictionary(UInt64, Utf8)` is made, we can avoid all the other cases in this `match` statement and save on codegen -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org