alamb commented on code in PR #11888:
URL: https://github.com/apache/datafusion/pull/11888#discussion_r1711643043
##########
datafusion/functions/src/string/initcap.rs:
##########
@@ -88,28 +89,40 @@ fn initcap<T: OffsetSizeTrait>(args: &[ArrayRef]) ->
Result<ArrayRef> {
// first map is the iterator, second is for the `Option<_>`
let result = string_array
.iter()
- .map(|string| {
- string.map(|string: &str| {
- let mut char_vector = Vec::<char>::new();
- let mut previous_character_letter_or_number = false;
- for c in string.chars() {
- if previous_character_letter_or_number {
- char_vector.push(c.to_ascii_lowercase());
- } else {
- char_vector.push(c.to_ascii_uppercase());
- }
- previous_character_letter_or_number =
c.is_ascii_uppercase()
- || c.is_ascii_lowercase()
- || c.is_ascii_digit();
- }
- char_vector.iter().collect::<String>()
- })
- })
+ .map(initcap_string)
.collect::<GenericStringArray<T>>();
Ok(Arc::new(result) as ArrayRef)
}
+fn initcap_utf8view(args: &[ArrayRef]) -> Result<ArrayRef> {
+ let string_view_array = as_string_view_array(&args[0])?;
+
+ let result = string_view_array
+ .iter()
+ .map(initcap_string)
+ .collect::<StringArray>();
+
+ Ok(Arc::new(result) as ArrayRef)
+}
+
+fn initcap_string(string: Option<&str>) -> Option<String> {
+ string.map(|string: &str| {
+ let mut char_vector = Vec::<char>::new();
Review Comment:
I suspect you could make this faster by creating the vector once and then
resetting on each loop -- like
```rust
let mut char_vector = Vec::<char>::new();
string.map(|string: &str| {
char_vector.clear();
...
}
```
##########
datafusion/sqllogictest/test_files/string_view.slt:
##########
@@ -586,16 +621,13 @@ logical_plan
02)--Projection: CAST(test.column2_utf8view AS Utf8) AS __common_expr_1,
test.column1_utf8view
03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
-
-## Ensure no casts for INITCAP
-## TODO https://github.com/apache/datafusion/issues/11853
query TT
EXPLAIN SELECT
INITCAP(column1_utf8view) as c
FROM test;
----
logical_plan
-01)Projection: initcap(CAST(test.column1_utf8view AS Utf8)) AS c
+01)Projection: initcap(test.column1_utf8view) AS c
Review Comment:
😠so nice
##########
datafusion/sqllogictest/test_files/string_view.slt:
##########
@@ -425,6 +425,41 @@ logical_plan
01)Projection: starts_with(test.column1_utf8view, Utf8View("äöüß")) AS c1,
starts_with(test.column1_utf8view, Utf8View("")) AS c2,
starts_with(test.column1_utf8view, Utf8View(NULL)) AS c3,
starts_with(Utf8View(NULL), test.column1_utf8view) AS c4
02)--TableScan: test projection=[column1_utf8view]
+### Initcap
Review Comment:
Any chance you can move these tests down with the other queries / tests for
Initcap?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]