HappenLee commented on code in PR #49212:
URL: https://github.com/apache/doris/pull/49212#discussion_r2004749195
##########
be/src/vec/exprs/lambda_function/varray_map_function.cpp:
##########
@@ -184,57 +184,73 @@ class ArrayMapFunction : public LambdaFunction {
data_types.push_back(col_type.get_nested_type());
}
- ColumnPtr result_col = nullptr;
+ MutableColumnPtr result_col = nullptr;
DataTypePtr res_type;
std::string res_name;
//process first row
- args.array_start = (*args.offsets_ptr)[args.current_row_idx - 1];
- args.cur_size = (*args.offsets_ptr)[args.current_row_idx] -
args.array_start;
-
- while (args.current_row_idx < block->rows()) {
- Block lambda_block;
- for (int i = 0; i < names.size(); i++) {
- ColumnWithTypeAndName data_column;
- if (_contains_column_id(args, i) || i >= gap) {
- data_column = ColumnWithTypeAndName(data_types[i],
names[i]);
+ args_info.array_start =
(*args_info.offsets_ptr)[args_info.current_row_idx - 1];
+ args_info.cur_size =
+ (*args_info.offsets_ptr)[args_info.current_row_idx] -
args_info.array_start;
+
+ // lambda block to exectute the lambda, and reuse the memory
+ Block lambda_block;
+ auto column_size = names.size();
+ MutableColumns columns(column_size);
+ while (args_info.current_row_idx < block->rows()) {
+ bool mem_reuse = lambda_block.mem_reuse();
+ for (int i = 0; i < column_size; i++) {
+ if (mem_reuse) {
+ columns[i] =
lambda_block.get_by_position(i).column->assume_mutable();
} else {
- data_column = ColumnWithTypeAndName(
-
data_types[i]->create_column_const_with_default_value(0), data_types[i],
- names[i]);
+ if (_contains_column_id(output_slot_ref_indexs, i) || i >=
gap) {
+ // TODO: maybe could create const column, so not
insert_many_from when extand data
+ // but now here handle batch_size of array nested data
every time, so maybe have different rows
+ columns[i] = data_types[i]->create_column();
+ } else {
+ columns[i] = data_types[i]
+
->create_column_const_with_default_value(0)
+ ->assume_mutable();
+ }
}
- lambda_block.insert(std::move(data_column));
}
-
- MutableColumns columns = lambda_block.mutate_columns();
+ // batch_size of array nested data every time inorder to avoid
memory overflow
while (columns[gap]->size() < batch_size) {
long max_step = batch_size - columns[gap]->size();
- long current_step =
- std::min(max_step, (long)(args.cur_size -
args.current_offset_in_array));
- size_t pos = args.array_start + args.current_offset_in_array;
+ long current_step = std::min(
+ max_step, (long)(args_info.cur_size -
args_info.current_offset_in_array));
+ size_t pos = args_info.array_start +
args_info.current_offset_in_array;
for (int i = 0; i < arguments.size(); ++i) {
columns[gap + i]->insert_range_from(*lambda_datas[i], pos,
current_step);
}
- args.current_offset_in_array += current_step;
- args.current_repeat_times += current_step;
- if (args.current_offset_in_array >= args.cur_size) {
- args.current_row_eos = true;
+ args_info.current_offset_in_array += current_step;
+ args_info.current_repeat_times += current_step;
+ if (args_info.current_offset_in_array >= args_info.cur_size) {
+ args_info.current_row_eos = true;
}
- _extend_data(columns, block, args, gap);
- if (args.current_row_eos) {
- args.current_row_idx++;
- args.current_offset_in_array = 0;
- if (args.current_row_idx >= block->rows()) {
+ _extend_data(columns, block, args_info.current_repeat_times,
gap,
Review Comment:
set ` args_info.current_repeat_times = 0; ` in
`_extend_data` func
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]