Weijun-H commented on code in PR #12153:
URL: https://github.com/apache/datafusion/pull/12153#discussion_r1742259991
##########
datafusion/functions-nested/src/map.rs:
##########
@@ -51,24 +52,65 @@ fn can_evaluate_to_const(args: &[ColumnarValue]) -> bool {
.all(|arg| matches!(arg, ColumnarValue::Scalar(_)))
}
-fn make_map_batch(args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+fn make_map_batch(args: &[ColumnarValue]) -> Result<ColumnarValue> {
if args.len() != 2 {
return exec_err!(
"make_map requires exactly 2 arguments, got {} instead",
args.len()
);
}
- let data_type = args[0].data_type();
let can_evaluate_to_const = can_evaluate_to_const(args);
- let key = get_first_array_ref(&args[0])?;
- let value = get_first_array_ref(&args[1])?;
- make_map_batch_internal(key, value, can_evaluate_to_const, data_type)
+
+ // check the keys array is unique
+ let keys = get_first_array_ref(&args[0])?;
+ let key_array = keys.as_ref();
+
+ match &args[0] {
+ ColumnarValue::Array(_) => {
+ let row_keys = match key_array.data_type() {
+ DataType::List(_) => list_to_arrays::<i32>(&keys),
+ DataType::LargeList(_) => list_to_arrays::<i64>(&keys),
+ DataType::FixedSizeList(_, _) =>
fixed_size_list_to_arrays(&keys),
+ data_type => {
+ return exec_err!(
+ "Expected list, large_list or fixed_size_list, got
{:?}",
+ data_type
+ );
+ }
+ };
+
+ row_keys
+ .iter()
+ .try_for_each(|key| check_unique_keys(key.as_ref()))?;
+ }
+ ColumnarValue::Scalar(_) => {
+ check_unique_keys(key_array)?;
+ }
+ }
+
+ let values = get_first_array_ref(&args[1])?;
+ make_map_batch_internal(keys, values, can_evaluate_to_const,
args[0].data_type())
+}
+
+fn check_unique_keys(array: &dyn Array) -> Result<()> {
Review Comment:
We could check for unique keys earlier in the `make_map` function, but since
the `MAP` function is a UDF, the unique key validation needs to be handled in
the `make_map_batch` function. This approach, however, will result in the
`make_map` function performing the check twice.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]