viirya commented on code in PR #19572:
URL: https://github.com/apache/datafusion/pull/19572#discussion_r2656512026


##########
datafusion/functions/src/unicode/strpos.rs:
##########
@@ -215,14 +215,32 @@ where
                         )
                     }
                 } else {
-                    // The `find` method returns the byte index of the 
substring.
-                    // We count the number of chars up to that byte index.
-                    T::Native::from_usize(
-                        string
-                            .find(substring)
-                            .map(|x| string[..x].chars().count() + 1)
-                            .unwrap_or(0),
-                    )
+                    // For non-ASCII, use a single-pass search that tracks both
+                    // byte position and character position simultaneously
+                    if substring.is_empty() {
+                        return T::Native::from_usize(1);
+                    }
+
+                    let substring_bytes = substring.as_bytes();
+                    let string_bytes = string.as_bytes();
+
+                    if substring_bytes.len() > string_bytes.len() {
+                        return T::Native::from_usize(0);
+                    }
+
+                    // Single pass: find substring while counting characters
+                    let mut char_pos = 0;
+                    for (byte_idx, _) in string.char_indices() {
+                        char_pos += 1;
+                        if byte_idx + substring_bytes.len() <= 
string_bytes.len()
+                            && &string_bytes[byte_idx..byte_idx + 
substring_bytes.len()]

Review Comment:
   Use `get_unchecked` to avoid redundant bounds checking now when comparing 
substring slices.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to