yangzhg commented on a change in pull request #3638:
URL: https://github.com/apache/incubator-doris/pull/3638#discussion_r428494034
##########
File path: be/src/exprs/string_functions.cpp
##########
@@ -196,28 +196,56 @@ StringVal StringFunctions::lpad(
if (str.is_null || len.is_null || pad.is_null || len.val < 0) {
return StringVal::null();
}
+
+ size_t str_char_size = 0;
+ size_t pad_char_size = 0;
+ size_t byte_pos = 0;
+ std::vector<size_t> str_index;
+ std::vector<size_t> pad_index;
+ for (size_t i = 0, char_size = 0; i < str.len; i += char_size) {
+ char_size = get_utf8_byte_length((unsigned)(str.ptr)[i]);
+ str_index.push_back(byte_pos);
+ byte_pos += char_size;
+ ++str_char_size;
+ }
+ byte_pos = 0;
+ for (size_t i = 0, char_size = 0; i < pad.len; i += char_size) {
+ char_size = get_utf8_byte_length((unsigned)(pad.ptr)[i]);
+ pad_index.push_back(byte_pos);
+ byte_pos += char_size;
+ ++pad_char_size;
+ }
+
// Corner cases: Shrink the original string, or leave it alone.
// TODO: Hive seems to go into an infinite loop if pad.len == 0,
// so we should pay attention to Hive's future solution to be compatible.
- if (len.val <= str.len || pad.len == 0) {
- return StringVal(str.ptr, len.val);
+ if (len.val <= str_char_size || pad.len == 0) {
+ if (len.val >= str_index.size()) {
+ return StringVal::null();
+ }
+ return StringVal(str.ptr, str_index.at(len.val));
}
// TODO pengyubing
// StringVal result = StringVal::create_temp_string_val(context, len.val);
- StringVal result(context, len.val);
+ int32_t pad_byte_len = 0;
+ int32_t pad_times = (len.val - str_char_size) / pad_char_size;
+ int32_t pad_remainder = (len.val - str_char_size) % pad_char_size;
+ pad_byte_len = pad_times * pad.len;
+ pad_byte_len += pad_index.at(pad_remainder);
Review comment:
1 the code is now under my control, but if some modify the code in the
future and has index out of range problem, it will not report any error, it
will be a risk and make it difficult to solve it.
2. this code is not the critical point of performence, and just an if
statement will not cause too much impact on performance. A wrong access to
unexcepted memory is more harmful than this performance loss
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]