This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch vectorized in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit 1fc1c7005dccabdadeb0a5d2cd0cf0aa4871a3a8 Author: HappenLee <happen...@hotmail.com> AuthorDate: Mon Jan 10 20:27:45 2022 +0800 [Feature][Vectorized] Support String in vec exe engine (#7670) Co-authored-by: lihaopeng <lihaop...@baidu.com> --- be/src/olap/olap_define.h | 3 ++- be/src/olap/row_block2.cpp | 26 ++++++++++++++++++++-- be/src/olap/row_block2.h | 2 +- be/src/olap/rowset/beta_rowset_reader.cpp | 6 ++++- be/src/vec/exec/vunion_node.cpp | 9 ++++++++ .../apache/doris/rewrite/FoldConstantsRule.java | 7 ------ 6 files changed, 41 insertions(+), 12 deletions(-) diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h index c2d4b7f..a9ac731 100644 --- a/be/src/olap/olap_define.h +++ b/be/src/olap/olap_define.h @@ -384,7 +384,8 @@ enum OLAPStatus { OLAP_ERR_ROWSET_LOAD_FAILED = -3109, OLAP_ERR_ROWSET_READER_INIT = -3110, OLAP_ERR_ROWSET_READ_FAILED = -3111, - OLAP_ERR_ROWSET_INVALID_STATE_TRANSITION = -3112 + OLAP_ERR_ROWSET_INVALID_STATE_TRANSITION = -3112, + OLAP_ERR_STRING_OVERFLOW_IN_VEC_ENGINE = -3113 }; enum ColumnFamilyIndex { diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp index 26b58ca..877f6a2 100644 --- a/be/src/olap/row_block2.cpp +++ b/be/src/olap/row_block2.cpp @@ -95,7 +95,9 @@ Status RowBlockV2::convert_to_row_block(RowCursor* helper, RowBlock* dst) { return Status::OK(); } -void RowBlockV2::_copy_data_to_column(int cid, doris::vectorized::MutableColumnPtr& origin_column) { +Status RowBlockV2::_copy_data_to_column(int cid, doris::vectorized::MutableColumnPtr& origin_column) { + constexpr auto MAX_SIZE_OF_VEC_STRING = 1024l * 1024; + auto* column = origin_column.get(); bool nullable_mark_array[_selected_size]; @@ -170,6 +172,24 @@ void RowBlockV2::_copy_data_to_column(int cid, doris::vectorized::MutableColumnP } break; } + case OLAP_FIELD_TYPE_STRING: { + auto column_string = assert_cast<vectorized::ColumnString*>(column); + + for (uint16_t j = 0; j < _selected_size; ++j) { + if (!nullable_mark_array[j]) { + uint16_t row_idx = _selection_vector[j]; + auto slice = reinterpret_cast<const Slice*>(column_block(cid).cell_ptr(row_idx)); + if (LIKELY(slice->size <= MAX_SIZE_OF_VEC_STRING)) { + column_string->insert_data(slice->data, slice->size); + } else { + return Status::NotSupported("Not support string len over than 1MB in vec engine."); + } + } else { + column_string->insert_default(); + } + } + break; + } case OLAP_FIELD_TYPE_CHAR: { auto column_string = assert_cast<vectorized::ColumnString*>(column); @@ -286,13 +306,15 @@ void RowBlockV2::_copy_data_to_column(int cid, doris::vectorized::MutableColumnP DCHECK(false) << "Invalid type in RowBlockV2:" << _schema.column(cid)->type(); } } + + return Status::OK(); } Status RowBlockV2::convert_to_vec_block(vectorized::Block* block) { for (int i = 0; i < _schema.column_ids().size(); ++i) { auto cid = _schema.column_ids()[i]; auto column = (*std::move(block->get_by_position(i).column)).assume_mutable(); - _copy_data_to_column(cid, column); + RETURN_IF_ERROR(_copy_data_to_column(cid, column)); } _pool->clear(); return Status::OK(); diff --git a/be/src/olap/row_block2.h b/be/src/olap/row_block2.h index cdbf428..b98ab95 100644 --- a/be/src/olap/row_block2.h +++ b/be/src/olap/row_block2.h @@ -109,7 +109,7 @@ public: std::string debug_string(); private: - void _copy_data_to_column(int cid, vectorized::MutableColumnPtr& mutable_column_ptr); + Status _copy_data_to_column(int cid, vectorized::MutableColumnPtr& mutable_column_ptr); Schema _schema; size_t _capacity; diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index 459f3ca..4d35f2f 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -204,7 +204,11 @@ OLAPStatus BetaRowsetReader::next_block(vectorized::Block* block) { { SCOPED_RAW_TIMER(&_stats->block_convert_ns); - _input_block->convert_to_vec_block(block); + auto s = _input_block->convert_to_vec_block(block); + if (UNLIKELY(!s.ok())) { + LOG(WARNING) << "failed to read next block: " << s.to_string(); + return OLAP_ERR_STRING_OVERFLOW_IN_VEC_ENGINE; + } } is_first = false; } while (block->rows() < _context->runtime_state->batch_size()); // here we should keep block.rows() < batch_size diff --git a/be/src/vec/exec/vunion_node.cpp b/be/src/vec/exec/vunion_node.cpp index 1fa4da4..c05b3ef 100644 --- a/be/src/vec/exec/vunion_node.cpp +++ b/be/src/vec/exec/vunion_node.cpp @@ -195,6 +195,15 @@ Status VUnionNode::get_next_const(RuntimeState* state, Block* block) { if (!mem_reuse) { block->swap(mblock.to_block()); } + + // some insert query like "insert into string_test select 1, repeat('a', 1024 * 1024);" + // the const expr will be in output expr cause the union node return a empty block. so here we + // need add one row to make sure the union node exec const expr return at least one row + if (block->rows() == 0) { + block->insert({vectorized::ColumnUInt8::create(1), + std::make_shared<vectorized::DataTypeUInt8>(), ""}); + } + return Status::OK(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java index 628740a..8f5137f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java @@ -27,7 +27,6 @@ import org.apache.doris.analysis.LiteralExpr; import org.apache.doris.analysis.NullLiteral; import org.apache.doris.analysis.SysVariableDesc; import org.apache.doris.catalog.Catalog; -import org.apache.doris.catalog.Function; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; @@ -195,12 +194,6 @@ public class FoldConstantsRule implements ExprRewriteRule { Analyzer analyzer, Map<String, Expr> sysVarMap, Map<String, Expr> infoFnMap) throws AnalysisException { if (expr.isConstant()) { - if (VectorizedUtil.isVectorized()) { - Function fn = expr.getFn(); - if (fn != null && (fn.functionName().equalsIgnoreCase("curtime") || - fn.functionName().equalsIgnoreCase("current_time"))) - return; - } // Do not constant fold cast(null as dataType) because we cannot preserve the // cast-to-types and that can lead to query failures, e.g., CTAS if (expr instanceof CastExpr) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org