This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 1d8265c5a3 [refactor](row-store) make row store column a hidden column in meta (#16251) 1d8265c5a3 is described below commit 1d8265c5a3df818fbf87a12192ee419a7593d851 Author: lihangyu <15605149...@163.com> AuthorDate: Thu Feb 2 20:56:13 2023 +0800 [refactor](row-store) make row store column a hidden column in meta (#16251) This could simplfy storage engine logic and make code more readable, and we could analyze the hidden `__DORIS_ROW_STORE_COL__` length etc.. --- be/src/common/consts.h | 3 +- be/src/olap/compaction.cpp | 5 ---- be/src/olap/memtable.cpp | 32 ++++++++++++++++++++ be/src/olap/memtable.h | 4 +++ be/src/olap/rowset/beta_rowset_writer.cpp | 34 ---------------------- be/src/olap/rowset/beta_rowset_writer.h | 1 - be/src/olap/rowset/segment_v2/segment.cpp | 13 --------- be/src/olap/rowset/segment_v2/segment.h | 1 - be/src/olap/rowset/segment_v2/segment_writer.cpp | 29 ++++-------------- be/src/olap/rowset/segment_v2/segment_writer.h | 2 -- be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 3 -- be/src/olap/schema.h | 2 +- be/src/olap/tablet.cpp | 8 ++--- be/src/olap/tablet_schema.cpp | 17 +++-------- be/src/olap/tablet_schema.h | 3 +- be/src/vec/jsonb/serialize.cpp | 4 +++ .../java/org/apache/doris/analysis/ColumnDef.java | 6 ++++ .../org/apache/doris/analysis/CreateTableStmt.java | 7 ++++- .../main/java/org/apache/doris/catalog/Column.java | 6 ++++ 19 files changed, 73 insertions(+), 107 deletions(-) diff --git a/be/src/common/consts.h b/be/src/common/consts.h index f6c7ece8e0..bf7a2e6013 100644 --- a/be/src/common/consts.h +++ b/be/src/common/consts.h @@ -26,11 +26,10 @@ const std::string CSV_WITH_NAMES = "csv_with_names"; const std::string CSV_WITH_NAMES_AND_TYPES = "csv_with_names_and_types"; const std::string BLOCK_TEMP_COLUMN_PREFIX = "__TEMP__"; const std::string ROWID_COL = "__DORIS_ROWID_COL__"; -const std::string SOURCE_COL = "__DORIS_SOURCE_COL__"; +const std::string ROW_STORE_COL = "__DORIS_ROW_STORE_COL__"; constexpr int MAX_DECIMAL32_PRECISION = 9; constexpr int MAX_DECIMAL64_PRECISION = 18; constexpr int MAX_DECIMAL128_PRECISION = 38; -constexpr int SOURCE_COL_UNIQUE_ID = INT32_MAX; } // namespace BeConsts } // namespace doris diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index eb7d2521bf..76c7fc3374 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -276,11 +276,6 @@ Status Compaction::do_compaction_impl(int64_t permits) { stats.rowid_conversion = &_rowid_conversion; } - if (_cur_tablet_schema->store_row_column()) { - // table with row column not support vertical compaction now - vertical_compaction = false; - } - if (use_vectorized_compaction) { if (vertical_compaction) { res = Merger::vertical_merge_rowsets(_tablet, compaction_type(), _cur_tablet_schema, diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp index c56683a9c8..9ec3cb0fda 100644 --- a/be/src/olap/memtable.cpp +++ b/be/src/olap/memtable.cpp @@ -27,6 +27,7 @@ #include "vec/aggregate_functions/aggregate_function_reader.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" #include "vec/core/field.h" +#include "vec/jsonb/serialize.h" namespace doris { using namespace ErrorCode; @@ -356,6 +357,10 @@ Status MemTable::_do_flush(int64_t& duration_ns) { SCOPED_RAW_TIMER(&duration_ns); _collect_vskiplist_results<true>(); vectorized::Block block = _output_mutable_block.to_block(); + if (_tablet_schema->store_row_column()) { + // convert block to row store format + serialize_block_to_row_column(block); + } RETURN_NOT_OK(_rowset_writer->flush_single_memtable(&block, &_flush_size)); return Status::OK(); } @@ -364,4 +369,31 @@ Status MemTable::close() { return flush(); } +void MemTable::serialize_block_to_row_column(vectorized::Block& block) { + if (block.rows() == 0) { + return; + } + MonotonicStopWatch watch; + watch.start(); + // find row column id + int row_column_id = 0; + for (int i = 0; i < _tablet_schema->num_columns(); ++i) { + if (_tablet_schema->column(i).is_row_store_column()) { + row_column_id = i; + break; + } + } + vectorized::ColumnString* row_store_column = + static_cast<vectorized::ColumnString*>(block.get_by_position(row_column_id) + .column->assume_mutable_ref() + .assume_mutable() + .get()); + row_store_column->clear(); + vectorized::JsonbSerializeUtil::block_to_jsonb(*_tablet_schema, block, *row_store_column, + _tablet_schema->num_columns()); + VLOG_DEBUG << "serialize , num_rows:" << block.rows() << ", row_column_id:" << row_column_id + << ", total_byte_size:" << block.allocated_bytes() << ", serialize_cost(us)" + << watch.elapsed_time() / 1000; +} + } // namespace doris diff --git a/be/src/olap/memtable.h b/be/src/olap/memtable.h index 0135b99247..b3b9026566 100644 --- a/be/src/olap/memtable.h +++ b/be/src/olap/memtable.h @@ -110,6 +110,10 @@ private: Status _generate_delete_bitmap(int64_t atomic_num_segments_before_flush, int64_t atomic_num_segments_after_flush); + // serialize block to row store format and append serialized data into row store column + // in block + void serialize_block_to_row_column(vectorized::Block& block); + private: TabletSharedPtr _tablet; const KeysType _keys_type; diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index c2a231be4e..9b9a670475 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -35,7 +35,6 @@ #include "olap/storage_engine.h" #include "runtime/exec_env.h" #include "runtime/memory/mem_tracker_limiter.h" -#include "vec/jsonb/serialize.h" namespace doris { using namespace ErrorCode; @@ -461,30 +460,6 @@ Status BetaRowsetWriter::_find_longest_consecutive_small_segment( return Status::OK(); } -Status BetaRowsetWriter::_append_row_column(vectorized::Block* block, - vectorized::Block* dst_block) { - MonotonicStopWatch watch; - watch.start(); - *dst_block = block->clone_empty(); - dst_block->swap(*block); - if (!dst_block->has(BeConsts::SOURCE_COL)) { - auto string_type = std::make_shared<vectorized::DataTypeString>(); - auto source_column = string_type->create_column(); - dst_block->insert({std::move(source_column), string_type, BeConsts::SOURCE_COL}); - } - auto column = - static_cast<vectorized::ColumnString*>(dst_block->get_by_name(BeConsts::SOURCE_COL) - .column->assume_mutable_ref() - .assume_mutable() - .get()); - vectorized::JsonbSerializeUtil::block_to_jsonb(*_context.tablet_schema, *dst_block, *column, - _context.tablet_schema->num_columns()); - VLOG_DEBUG << "serialize , num_rows:" << dst_block->rows() - << ", total_byte_size:" << dst_block->allocated_bytes() << ", serialize_cost(us)" - << watch.elapsed_time() / 1000; - return Status::OK(); -} - Status BetaRowsetWriter::_get_segcompaction_candidates(SegCompactionCandidatesSharedPtr& segments, bool is_last) { if (is_last) { @@ -573,12 +548,6 @@ Status BetaRowsetWriter::_segcompaction_ramaining_if_necessary() { Status BetaRowsetWriter::_add_block(const vectorized::Block* block, std::unique_ptr<segment_v2::SegmentWriter>* segment_writer) { - std::unique_ptr<vectorized::Block> temp; - if (_context.tablet_schema->store_row_column()) { - temp.reset(new vectorized::Block); - RETURN_IF_ERROR(_append_row_column(const_cast<vectorized::Block*>(block), temp.get())); - block = temp.get(); - } size_t block_size_in_bytes = block->bytes(); size_t block_row_num = block->rows(); size_t row_avg_size_in_bytes = std::max((size_t)1, block_size_in_bytes / block_row_num); @@ -895,9 +864,6 @@ Status BetaRowsetWriter::_do_create_segment_writer( writer->reset(nullptr); return s; } - if (_context.tablet_schema->store_row_column()) { - (*writer)->append_row_column_writer(); - } return Status::OK(); } diff --git a/be/src/olap/rowset/beta_rowset_writer.h b/be/src/olap/rowset/beta_rowset_writer.h index ceb4bf4bdf..08fa36016a 100644 --- a/be/src/olap/rowset/beta_rowset_writer.h +++ b/be/src/olap/rowset/beta_rowset_writer.h @@ -124,7 +124,6 @@ private: bool _is_segment_overlapping(const std::vector<KeyBoundsPB>& segments_encoded_key_bounds); protected: - Status _append_row_column(vectorized::Block* block, vectorized::Block* dst_block); RowsetWriterContext _context; std::shared_ptr<RowsetMeta> _rowset_meta; diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 5e61479702..7eeb15a717 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -263,19 +263,6 @@ Status Segment::_create_column_readers() { return Status::OK(); } -Status Segment::new_row_column_iterator(ColumnIterator** iter) { - const auto& row_column = TabletSchema::row_oriented_column(); - if (_column_readers.count(row_column.unique_id()) < 1) { - ColumnReaderOptions opts; - opts.kept_in_memory = _tablet_schema->is_in_memory(); - std::unique_ptr<ColumnReader> reader; - RETURN_IF_ERROR(ColumnReader::create(opts, _footer.columns(_footer.columns_size() - 1), - _footer.num_rows(), _file_reader, &reader)); - _column_readers.emplace(row_column.unique_id(), std::move(reader)); - } - return _column_readers.at(row_column.unique_id())->new_iterator(iter); -} - // Not use cid anymore, for example original table schema is colA int, then user do following actions // 1.add column b // 2. drop column b diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index df2b46f880..f99ebade28 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -77,7 +77,6 @@ public: uint32_t num_rows() const { return _footer.num_rows(); } Status new_column_iterator(const TabletColumn& tablet_column, ColumnIterator** iter); - Status new_row_column_iterator(ColumnIterator** iter); Status new_bitmap_index_iterator(const TabletColumn& tablet_column, BitmapIndexIterator** iter); diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 3da1dbef56..778b5d392b 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -95,30 +95,6 @@ Status SegmentWriter::init() { return init(column_ids, true); } -Status SegmentWriter::append_row_column_writer() { - ColumnWriterOptions opts; - opts.meta = _footer.add_columns(); - - init_column_meta(opts.meta, _footer.columns_size(), TabletSchema::row_oriented_column(), - _tablet_schema); - opts.need_bloom_filter = false; - opts.need_bitmap_index = false; - // smaller page size - opts.data_page_size = 16 * 1024; - opts.need_zone_map = false; - opts.need_bloom_filter = false; - opts.need_bitmap_index = false; - - std::unique_ptr<ColumnWriter> writer; - RETURN_IF_ERROR(ColumnWriter::create(opts, &TabletSchema::row_oriented_column(), _file_writer, - &writer)); - RETURN_IF_ERROR(writer->init()); - _column_ids.push_back(_column_ids.size()); - _column_writers.push_back(std::move(writer)); - _olap_data_convertor->add_column_data_convertor(TabletSchema::row_oriented_column()); - return Status::OK(); -} - Status SegmentWriter::init(const std::vector<uint32_t>& col_ids, bool has_key) { DCHECK(_column_writers.empty()); DCHECK(_column_ids.empty()); @@ -178,6 +154,11 @@ Status SegmentWriter::init(const std::vector<uint32_t>& col_ids, bool has_key) { } } + if (column.is_row_store_column()) { + // smaller page size for row store column + opts.data_page_size = 16 * 1024; + } + std::unique_ptr<ColumnWriter> writer; RETURN_IF_ERROR(ColumnWriter::create(opts, &column, _file_writer, &writer)); RETURN_IF_ERROR(writer->init()); diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h index 3589205b3e..3e6e15b842 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.h +++ b/be/src/olap/rowset/segment_v2/segment_writer.h @@ -103,8 +103,6 @@ public: DataDir* get_data_dir() { return _data_dir; } bool is_unique_key() { return _tablet_schema->keys_type() == UNIQUE_KEYS; } - // add an extra column writer for writing row column - Status append_row_column_writer(); private: DISALLOW_COPY_AND_ASSIGN(SegmentWriter); diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp index ab4f82d458..7aa1b3e214 100644 --- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp +++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp @@ -162,9 +162,6 @@ Status VerticalBetaRowsetWriter::_create_segment_writer( writer->reset(nullptr); return s; } - if (_context.tablet_schema->store_row_column()) { - (*writer)->append_row_column_writer(); - } return Status::OK(); } diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h index 50c2b76b5f..56bc1ecff3 100644 --- a/be/src/olap/schema.h +++ b/be/src/olap/schema.h @@ -40,7 +40,7 @@ public: Schema(TabletSchemaSPtr tablet_schema) { size_t num_columns = tablet_schema->num_columns(); // ignore this column - if (tablet_schema->columns().back().name() == BeConsts::SOURCE_COL) { + if (tablet_schema->columns().back().name() == BeConsts::ROW_STORE_COL) { --num_columns; } std::vector<ColumnId> col_ids(num_columns); diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 64a01d3209..8a377b4c99 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -1998,7 +1998,8 @@ Status Tablet::lookup_row_data(const RowLocation& row_location, const TupleDescr if (tablet_schema->store_row_column()) { // create _source column segment_v2::ColumnIterator* column_iterator = nullptr; - RETURN_IF_ERROR(segment->new_row_column_iterator(&column_iterator)); + RETURN_IF_ERROR(segment->new_column_iterator(tablet_schema->column(BeConsts::ROW_STORE_COL), + &column_iterator)); std::unique_ptr<segment_v2::ColumnIterator> ptr_guard(column_iterator); segment_v2::ColumnIteratorOptions opt; OlapReaderStatistics stats; @@ -2007,10 +2008,7 @@ Status Tablet::lookup_row_data(const RowLocation& row_location, const TupleDescr opt.use_page_cache = !config::disable_storage_page_cache; column_iterator->init(opt); // get and parse tuple row - vectorized::MutableColumnPtr column_ptr = - vectorized::DataTypeFactory::instance() - .create_data_type(TabletSchema::row_oriented_column()) - ->create_column(); + vectorized::MutableColumnPtr column_ptr = vectorized::ColumnString::create(); std::vector<segment_v2::rowid_t> rowids { static_cast<segment_v2::rowid_t>(row_location.row_id)}; RETURN_IF_ERROR(column_iterator->read_by_rowids(rowids.data(), 1, column_ptr)); diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index e1ca64fad6..787ed8c834 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -449,6 +449,10 @@ void TabletColumn::add_sub_column(TabletColumn& sub_column) { _sub_column_count += 1; } +bool TabletColumn::is_row_store_column() const { + return _col_name == BeConsts::ROW_STORE_COL; +} + vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function( vectorized::DataTypes argument_types, std::string suffix) const { std::string agg_name = TabletColumn::get_string_by_aggregation_type(_aggregation) + suffix; @@ -930,17 +934,4 @@ bool operator!=(const TabletSchema& a, const TabletSchema& b) { return !(a == b); } -const TabletColumn& TabletSchema::row_oriented_column() { - static TabletColumn source_column(OLAP_FIELD_AGGREGATION_NONE, - FieldType::OLAP_FIELD_TYPE_STRING, false, - BeConsts::SOURCE_COL_UNIQUE_ID, 0); - source_column.set_name(BeConsts::SOURCE_COL); - return source_column; -} - -void TabletSchema::add_row_column() { - // create row column - append_column(row_oriented_column()); -} - } // namespace doris diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index b1e4a3c82d..8f9a21ee9a 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -95,6 +95,7 @@ public: static FieldType get_field_type_by_string(const std::string& str); static FieldAggregationMethod get_aggregation_type_by_string(const std::string& str); static uint32_t get_field_length_by_type(TPrimitiveType::type type, uint32_t string_length); + bool is_row_store_column() const; private: int32_t _unique_id; @@ -245,8 +246,6 @@ public: bool is_dropped_column(const TabletColumn& col) const; - static const TabletColumn& row_oriented_column(); - private: friend bool operator==(const TabletSchema& a, const TabletSchema& b); friend bool operator!=(const TabletSchema& a, const TabletSchema& b); diff --git a/be/src/vec/jsonb/serialize.cpp b/be/src/vec/jsonb/serialize.cpp index 24f2e295d1..ebd3518d41 100644 --- a/be/src/vec/jsonb/serialize.cpp +++ b/be/src/vec/jsonb/serialize.cpp @@ -290,6 +290,10 @@ void JsonbSerializeUtil::block_to_jsonb(const TabletSchema& schema, const Block& for (int j = 0; j < num_cols; ++j) { const auto& column = block.get_by_position(j).column; const auto& tablet_column = schema.columns()[j]; + if (tablet_column.is_row_store_column()) { + // ignore dst row store column + continue; + } const auto& data_ref = !tablet_column.is_array_type() ? column->get_data_at(i) : StringRef(); serialize_column(&pool, tablet_column, column.get(), data_ref, i, jsonb_writer); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java index 66670db2f2..49052059d9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java @@ -149,6 +149,12 @@ public class ColumnDef { "sequence column hidden column", false); } + public static ColumnDef newRowStoreColumnDef() { + return new ColumnDef(Column.ROW_STORE_COL, TypeDef.create(PrimitiveType.STRING), false, null, false, + new ColumnDef.DefaultValue(true, ""), "doris row store hidden column", false); + } + + public boolean isAllowNull() { return isAllowNull; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java index 91661c5a3a..3bf3b89bcf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java @@ -306,11 +306,13 @@ public class CreateTableStmt extends DdlStmt { analyzeEngineName(); - // `analyzeUniqueKeyMergeOnWrite` would modify `properties`, which will be used later, + // `analyzeXXX` would modify `properties`, which will be used later, // so we just clone a properties map here. boolean enableUniqueKeyMergeOnWrite = false; + boolean enableStoreRowColumn = false; if (properties != null) { enableUniqueKeyMergeOnWrite = PropertyAnalyzer.analyzeUniqueKeyMergeOnWrite(new HashMap<>(properties)); + enableStoreRowColumn = PropertyAnalyzer.analyzeStoreRowColumn(new HashMap<>(properties)); } // analyze key desc @@ -416,6 +418,9 @@ public class CreateTableStmt extends DdlStmt { columnDefs.add(ColumnDef.newDeleteSignColumnDef(AggregateType.REPLACE)); } } + if (enableStoreRowColumn) { + columnDefs.add(ColumnDef.newRowStoreColumnDef()); + } boolean hasObjectStored = false; String objectStoredColumn = ""; Set<String> columnSet = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index db80d8b2d5..d1230e6cb1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -59,6 +59,7 @@ public class Column implements Writable, GsonPostProcessable { public static final String DELETE_SIGN = "__DORIS_DELETE_SIGN__"; public static final String SEQUENCE_COL = "__DORIS_SEQUENCE_COL__"; public static final String ROWID_COL = "__DORIS_ROWID_COL__"; + public static final String ROW_STORE_COL = "__DORIS_ROW_STORE_COL__"; private static final String COLUMN_ARRAY_CHILDREN = "item"; public static final int COLUMN_UNIQUE_ID_INIT_VALUE = -1; @@ -262,6 +263,11 @@ public class Column implements Writable, GsonPostProcessable { || aggregationType == AggregateType.NONE) && nameEquals(SEQUENCE_COL, true); } + public boolean isRowStoreColumn() { + return !visible && (aggregationType == AggregateType.REPLACE + || aggregationType == AggregateType.NONE) && nameEquals(ROW_STORE_COL, true); + } + public PrimitiveType getDataType() { return type.getPrimitiveType(); } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org