Yukang-Lian commented on code in PR #14216:
URL: https://github.com/apache/doris/pull/14216#discussion_r1023002893


##########
be/src/vec/functions/function_string.h:
##########
@@ -1344,6 +1345,167 @@ class FunctionSplitPart : public IFunction {
     }
 };
 
+class FunctionSplitByChar : public IFunction {
+public:
+    static constexpr auto name = "split_by_char";
+
+    static FunctionPtr create() { return 
std::make_shared<FunctionSplitByChar>(); }
+    using NullMapType = PaddedPODArray<UInt8>;
+
+    String get_name() const override { return name; }
+
+    bool is_variadic() const override { return false; }
+
+    size_t get_number_of_arguments() const override { return 2; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        DCHECK(is_string(arguments[0]))
+                << "first argument for function: " << name << " should be 
string"
+                << " and arguments[0] is " << arguments[0]->get_name();
+        DCHECK(is_string(arguments[1]))
+                << "second argument for function: " << name << " should be 
char"
+                << " and arguments[1] is " << arguments[1]->get_name();
+        return std::make_shared<DataTypeArray>(make_nullable(arguments[0]));
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        DCHECK_EQ(arguments.size(), 2);
+
+        ColumnPtr src_column = make_nullable(
+                
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(),
+                false);
+        ColumnPtr delimiter_column =
+                
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            string delimiter = delimiter_column->get_data_at(i).to_string();
+            if (delimiter.size() > 1) {
+                return Status::RuntimeError(
+                        fmt::format("only supported one or zero character 
delimiter for function "
+                                    "{}(but get {})",
+                                    get_name(), delimiter.size()));
+            }
+        }
+
+        DataTypePtr src_column_type = 
make_nullable(block.get_by_position(arguments[0]).type);
+        auto dest_column_ptr = 
ColumnArray::create(src_column_type->create_column(),
+                                                   
ColumnArray::ColumnOffsets::create());
+        IColumn* dest_nested_column = &dest_column_ptr->get_data();
+        auto& dest_offsets = dest_column_ptr->get_offsets();
+        DCHECK(dest_nested_column != nullptr);
+        dest_nested_column->reserve(0);
+        dest_offsets.reserve(0);
+
+        const NullMapType* src_null_map = nullptr;
+        if (src_column->is_nullable()) {
+            const ColumnNullable* src_nullable_col =
+                    check_and_get_column<ColumnNullable>(*src_column);
+            src_column = src_nullable_col->get_nested_column_ptr();
+            src_null_map = &src_nullable_col->get_null_map_column().get_data();
+        }
+
+        NullMapType* dest_nested_null_map = nullptr;
+        if (dest_nested_column->is_nullable()) {
+            ColumnNullable* dest_nullable_col =
+                    reinterpret_cast<ColumnNullable*>(dest_nested_column);
+            dest_nested_column = dest_nullable_col->get_nested_column_ptr();
+            dest_nested_null_map = 
&dest_nullable_col->get_null_map_column().get_data();
+        }
+        _execute(*src_column, *delimiter_column, *dest_nested_column, 
dest_offsets, src_null_map,
+                 dest_nested_null_map);
+
+        block.replace_by_position(result, std::move(dest_column_ptr));
+        return Status::OK();
+    }
+
+private:
+    void _execute(const IColumn& src_column, const IColumn& delimiter_column,
+                  IColumn& dest_nested_column, ColumnArray::Offsets64& 
dest_offsets,
+                  const NullMapType* src_null_map, NullMapType* 
dest_nested_null_map) {
+        ColumnString& dest_column_string = 
reinterpret_cast<ColumnString&>(dest_nested_column);
+        ColumnString::Chars& column_string_chars = 
dest_column_string.get_chars();
+        ColumnString::Offsets& column_string_offsets = 
dest_column_string.get_offsets();
+        column_string_chars.reserve(0);
+
+        ColumnArray::Offset64 string_pos = 0;
+        ColumnArray::Offset64 dest_pos = 0;
+        const ColumnString* src_column_string = reinterpret_cast<const 
ColumnString*>(&src_column);
+        ColumnArray::Offset64 src_offsets_size = 
src_column_string->get_offsets().size();
+        NullMapType null_map(src_null_map->size());
+        for (size_t i = 0; i < src_null_map->size(); ++i) {
+            null_map[i] = (*src_null_map)[i];
+        }
+
+        for (size_t i = 0; i < src_offsets_size; i++) {
+            if (src_null_map && null_map[i]) {
+                (*dest_nested_null_map).push_back(true);
+                column_string_offsets.push_back(string_pos);
+                dest_offsets.push_back(dest_pos);
+                continue;
+            }
+
+            const auto delimiter = delimiter_column.get_data_at(i).to_string();
+            const auto str = src_column_string->get_data_at(i).to_string();
+            StringRef str_ref = src_column_string->get_data_at(i);
+
+            if (str.size() == 0) {
+                dest_offsets.push_back(dest_pos);
+                if (src_null_map) {
+                    (*dest_nested_null_map).push_back(false);
+                }
+                continue;
+            }
+
+            if (delimiter.size() == 0) {
+                //If there is no delimiter, the entire string is printed
+                const size_t old_size = column_string_chars.size();
+                const size_t new_size = old_size + str.size();
+                const size_t str_size = str.size();
+                column_string_chars.resize(new_size);
+                if (str_size > 0) {
+                    memcpy(column_string_chars.data() + old_size, 
str_ref.data, str_ref.size);
+                }
+                if (src_null_map) {
+                    (*dest_nested_null_map).push_back(false);
+                }
+                string_pos += str_size;
+                dest_pos++;
+                column_string_offsets.push_back(string_pos);
+            } else if (delimiter.size() == 1) {
+                for (size_t str_pos = 0; str_pos <= str.size();) {
+                    const size_t str_offset = str_pos;
+                    const size_t old_size = column_string_chars.size();
+                    const size_t split_part_size = split_str(str_pos, str, 
delimiter);
+                    str_pos++;
+                    const size_t new_size = old_size + split_part_size;
+                    column_string_chars.resize(new_size);
+                    if (split_part_size > 0) {
+                        memcpy(column_string_chars.data() + old_size, 
str_ref.data + str_offset,
+                               split_part_size);
+                    }
+                    if (src_null_map) {
+                        (*dest_nested_null_map).push_back(false);
+                    }
+                    string_pos += split_part_size;
+                    dest_pos++;
+                    column_string_offsets.push_back(string_pos);
+                }
+            }
+            dest_offsets.push_back(dest_pos);
+        }
+    }
+
+    size_t split_str(size_t& pos, const string str, string delimiter) {

Review Comment:
   I have fixed other codes but I don't understand here why use `string_ref` or 
`string_value` instead of `char`. Can you explain it? Appreciate that!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to