Jefffrey commented on code in PR #19832:
URL: https://github.com/apache/datafusion/pull/19832#discussion_r2696525041
##########
datafusion/spark/src/function/math/hex.rs:
##########
@@ -241,29 +243,38 @@ pub fn compute_hex(
let array = as_fixed_size_binary_array(array)?;
hex_encode_bytes(array.iter(), lowercase, array.len())
}
- DataType::Dictionary(_, value_type) => {
+ DataType::Dictionary(_, _) => {
let dict = as_dictionary_array::<Int32Type>(&array);
+ let dict_values = dict.values();
- match **value_type {
+ let encoded_values: ColumnarValue = match
dict_values.data_type() {
Review Comment:
We might want to consider arms for LargeUtf, views, etc.
##########
datafusion/spark/src/function/math/hex.rs:
##########
@@ -241,29 +243,38 @@ pub fn compute_hex(
let array = as_fixed_size_binary_array(array)?;
hex_encode_bytes(array.iter(), lowercase, array.len())
}
- DataType::Dictionary(_, value_type) => {
+ DataType::Dictionary(_, _) => {
let dict = as_dictionary_array::<Int32Type>(&array);
+ let dict_values = dict.values();
- match **value_type {
+ let encoded_values: ColumnarValue = match
dict_values.data_type() {
DataType::Int64 => {
- let arr = dict.downcast_dict::<Int64Array>().unwrap();
- hex_encode_int64(arr.into_iter(), dict.len())
+ let arr = as_int64_array(dict_values)?;
+ hex_encode_int64(arr.iter(), arr.len())?
}
DataType::Utf8 => {
- let arr = dict.downcast_dict::<StringArray>().unwrap();
- hex_encode_bytes(arr.into_iter(), lowercase,
dict.len())
+ let arr = as_string_array(dict_values);
+ hex_encode_bytes(arr.iter(), lowercase, arr.len())?
}
DataType::Binary => {
- let arr = dict.downcast_dict::<BinaryArray>().unwrap();
- hex_encode_bytes(arr.into_iter(), lowercase,
dict.len())
+ let arr = as_binary_array(dict_values)?;
+ hex_encode_bytes(arr.iter(), lowercase, arr.len())?
}
_ => {
- exec_err!(
+ return exec_err!(
"hex got an unexpected argument type: {}",
- array.data_type()
- )
+ dict_values.data_type()
+ );
}
- }
+ };
+
+ let encoded_values_array: ArrayRef = match encoded_values {
+ ColumnarValue::Array(a) => a,
+ ColumnarValue::Scalar(s) => Arc::new(s.to_array()?),
+ };
Review Comment:
We should probably refactor `hex_encode_bytes` and `hex_encode_int64` to
return arrays only, as their signature say they return `ColumnarValue` but they
never return the scalar variant, forcing handling like this
##########
datafusion/sqllogictest/test_files/spark/math/hex.slt:
##########
@@ -63,3 +63,18 @@ query T
SELECT hex(arrow_cast('test', 'LargeBinary')) as lar_b;
----
74657374
+
+statement ok
+CREATE TABLE t_dict_utf8 AS
+SELECT arrow_cast(column1, 'Dictionary(Int32, Utf8)') as dict_col
+FROM VALUES ('foo'), ('bar'), ('foo'), (NULL), ('baz'), ('bar');
+
+query T
+SELECT hex(dict_col) FROM t_dict_utf8;
Review Comment:
Can we check the output type here with `arrow_typeof` to ensure they are
dictionaries
##########
datafusion/spark/src/function/math/hex.rs:
##########
@@ -241,29 +243,38 @@ pub fn compute_hex(
let array = as_fixed_size_binary_array(array)?;
hex_encode_bytes(array.iter(), lowercase, array.len())
}
- DataType::Dictionary(_, value_type) => {
+ DataType::Dictionary(_, _) => {
let dict = as_dictionary_array::<Int32Type>(&array);
Review Comment:
nit: we should have some check that the dictionary has i32 key type,
otherwise this will panic
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]