Omega359 commented on code in PR #13403:
URL: https://github.com/apache/datafusion/pull/13403#discussion_r1851135533
##########
datafusion/functions-nested/src/string.rs:
##########
@@ -524,63 +687,90 @@ pub fn string_to_array_inner<T: OffsetSizeTrait>(args:
&[ArrayRef]) -> Result<Ar
}
(Some(string), None) => {
string.chars().map(|c| c.to_string()).for_each(|c|
{
- list_builder.values().append_value(c);
+ list_builder.values().append_value(c.as_str());
});
list_builder.append(true);
}
_ => list_builder.append(false), // null value
}
},
- );
+ )
}
-
- 3 => {
- let null_value_array = as_generic_string_array::<T>(&args[2])?;
- string_array
- .iter()
- .zip(delimiter_array.iter())
- .zip(null_value_array.iter())
- .for_each(|((string, delimiter), null_value)| {
- match (string, delimiter) {
- (Some(string), Some("")) => {
- if Some(string) == null_value {
+ Some(null_value_array) => string_array
+ .iter()
+ .zip(delimiter_array.iter())
+ .zip(null_value_array.iter())
+ .for_each(|((string, delimiter), null_value)| {
+ match (string, delimiter) {
+ (Some(string), Some("")) => {
+ if Some(string) == null_value {
+ list_builder.values().append_null();
+ } else {
+ list_builder.values().append_value(string);
+ }
+ list_builder.append(true);
+ }
+ (Some(string), Some(delimiter)) => {
+ string.split(delimiter).for_each(|s| {
+ if Some(s) == null_value {
list_builder.values().append_null();
} else {
- list_builder.values().append_value(string);
+ list_builder.values().append_value(s);
}
- list_builder.append(true);
- }
- (Some(string), Some(delimiter)) => {
- string.split(delimiter).for_each(|s| {
- if Some(s) == null_value {
- list_builder.values().append_null();
- } else {
- list_builder.values().append_value(s);
- }
- });
- list_builder.append(true);
- }
- (Some(string), None) => {
- string.chars().map(|c| c.to_string()).for_each(|c|
{
- if Some(c.as_str()) == null_value {
- list_builder.values().append_null();
- } else {
- list_builder.values().append_value(c);
- }
- });
- list_builder.append(true);
- }
- _ => list_builder.append(false), // null value
+ });
+ list_builder.append(true);
}
- });
- }
- _ => {
- return exec_err!(
- "Expect string_to_array function to take two or three
parameters"
- )
- }
- }
+ (Some(string), None) => {
+ string.chars().map(|c| c.to_string()).for_each(|c| {
+ if Some(c.as_str()) == null_value {
+ list_builder.values().append_null();
+ } else {
+ list_builder.values().append_value(c.as_str());
+ }
+ });
+ list_builder.append(true);
+ }
+ _ => list_builder.append(false), // null value
+ }
+ }),
+ };
let list_array = list_builder.finish();
Ok(Arc::new(list_array) as ArrayRef)
}
+
+trait StringArrayBuilderType: ArrayBuilder {
Review Comment:
I really wish Rust had enum variants as types - with that I could really
reduce a lot of this code. One of the reasons this took so long is that I was
looking for a clean and fast way to handle the string types in a super nice
way. I failed unfortunately - what I came come up with similar to
https://github.com/Omega359/arrow-datafusion/blob/38d9bdde69381b4a7516346f862310700b6eb96e/datafusion/functions/src/utils.rs#L192
but that just has more overhead than is acceptable :(
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]