github-actions[bot] commented on code in PR #24403:
URL: https://github.com/apache/doris/pull/24403#discussion_r1389097936


##########
be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:
##########
@@ -0,0 +1,1008 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/vertical_segment_writer.h"
+
+#include <gen_cpp/segment_v2.pb.h>
+#include <parallel_hashmap/phmap.h>
+
+#include <algorithm>
+#include <cassert>
+#include <ostream>
+#include <unordered_map>
+#include <utility>
+
+#include "cloud/config.h"
+#include "common/compiler_util.h" // IWYU pragma: keep
+#include "common/config.h"
+#include "common/logging.h" // LOG
+#include "gutil/port.h"
+#include "io/fs/file_writer.h"
+#include "olap/data_dir.h"
+#include "olap/key_coder.h"
+#include "olap/olap_common.h"
+#include "olap/primary_key_index.h"
+#include "olap/row_cursor.h"                      // RowCursor // IWYU pragma: 
keep
+#include "olap/rowset/rowset_writer_context.h"    // RowsetWriterContext
+#include "olap/rowset/segment_v2/column_writer.h" // ColumnWriter
+#include "olap/rowset/segment_v2/page_io.h"
+#include "olap/rowset/segment_v2/page_pointer.h"
+#include "olap/segment_loader.h"
+#include "olap/short_key_index.h"
+#include "olap/tablet_schema.h"
+#include "olap/utils.h"
+#include "runtime/memory/mem_tracker.h"
+#include "service/point_query_executor.h"
+#include "util/coding.h"
+#include "util/crc32c.h"
+#include "util/faststring.h"
+#include "util/key_util.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/common/schema_util.h"
+#include "vec/core/block.h"
+#include "vec/core/column_with_type_and_name.h"
+#include "vec/core/types.h"
+#include "vec/io/reader_buffer.h"
+#include "vec/jsonb/serialize.h"
+#include "vec/olap/olap_data_convertor.h"
+
+namespace doris {
+namespace segment_v2 {

Review Comment:
   warning: nested namespaces can be concatenated 
[modernize-concat-nested-namespaces]
   
   ```suggestion
   namespace doris::segment_v2 {
   ```
   
   be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1006:
   ```diff
   - } // namespace segment_v2
   - } // namespace doris
   + } // namespace doris
   ```
   



##########
be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:
##########
@@ -0,0 +1,1008 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/vertical_segment_writer.h"
+
+#include <gen_cpp/segment_v2.pb.h>
+#include <parallel_hashmap/phmap.h>
+
+#include <algorithm>
+#include <cassert>
+#include <ostream>
+#include <unordered_map>
+#include <utility>
+
+#include "cloud/config.h"
+#include "common/compiler_util.h" // IWYU pragma: keep
+#include "common/config.h"
+#include "common/logging.h" // LOG
+#include "gutil/port.h"
+#include "io/fs/file_writer.h"
+#include "olap/data_dir.h"
+#include "olap/key_coder.h"
+#include "olap/olap_common.h"
+#include "olap/primary_key_index.h"
+#include "olap/row_cursor.h"                      // RowCursor // IWYU pragma: 
keep
+#include "olap/rowset/rowset_writer_context.h"    // RowsetWriterContext
+#include "olap/rowset/segment_v2/column_writer.h" // ColumnWriter
+#include "olap/rowset/segment_v2/page_io.h"
+#include "olap/rowset/segment_v2/page_pointer.h"
+#include "olap/segment_loader.h"
+#include "olap/short_key_index.h"
+#include "olap/tablet_schema.h"
+#include "olap/utils.h"
+#include "runtime/memory/mem_tracker.h"
+#include "service/point_query_executor.h"
+#include "util/coding.h"
+#include "util/crc32c.h"
+#include "util/faststring.h"
+#include "util/key_util.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/common/schema_util.h"
+#include "vec/core/block.h"
+#include "vec/core/column_with_type_and_name.h"
+#include "vec/core/types.h"
+#include "vec/io/reader_buffer.h"
+#include "vec/jsonb/serialize.h"
+#include "vec/olap/olap_data_convertor.h"
+
+namespace doris {
+namespace segment_v2 {
+
+using namespace ErrorCode;
+
+static const char* k_segment_magic = "D0R1";
+static const uint32_t k_segment_magic_length = 4;
+
+VerticalSegmentWriter::VerticalSegmentWriter(io::FileWriter* file_writer, 
uint32_t segment_id,
+                                             TabletSchemaSPtr tablet_schema, 
BaseTabletSPtr tablet,
+                                             DataDir* data_dir, uint32_t 
max_row_per_segment,
+                                             const 
VerticalSegmentWriterOptions& opts,
+                                             std::shared_ptr<MowContext> 
mow_context)
+        : _segment_id(segment_id),
+          _tablet_schema(std::move(tablet_schema)),
+          _tablet(std::move(tablet)),
+          _data_dir(data_dir),
+          _opts(opts),
+          _file_writer(file_writer),
+          
_mem_tracker(std::make_unique<MemTracker>("VerticalSegmentWriter:Segment-" +
+                                                    
std::to_string(segment_id))),
+          _mow_context(std::move(mow_context)) {
+    CHECK_NOTNULL(file_writer);
+    _num_key_columns = _tablet_schema->num_key_columns();
+    _num_short_key_columns = _tablet_schema->num_short_key_columns();
+    DCHECK(_num_key_columns >= _num_short_key_columns);
+    for (size_t cid = 0; cid < _num_key_columns; ++cid) {
+        const auto& column = _tablet_schema->column(cid);
+        _key_coders.push_back(get_key_coder(column.type()));
+        _key_index_size.push_back(column.index_length());
+    }
+    // encode the sequence id into the primary key index
+    if (_tablet_schema->has_sequence_col() && _tablet_schema->keys_type() == 
UNIQUE_KEYS &&
+        _opts.enable_unique_key_merge_on_write) {
+        const auto& column = 
_tablet_schema->column(_tablet_schema->sequence_col_idx());
+        _seq_coder = get_key_coder(column.type());
+    }
+}
+
+VerticalSegmentWriter::~VerticalSegmentWriter() {
+    _mem_tracker->release(_mem_tracker->consumption());
+}
+
+void VerticalSegmentWriter::_init_column_meta(ColumnMetaPB* meta, uint32_t 
column_id,
+                                              const TabletColumn& column) {
+    meta->set_column_id(column_id);
+    meta->set_unique_id(column.unique_id());
+    meta->set_type(int(column.type()));
+    meta->set_length(column.length());
+    meta->set_encoding(DEFAULT_ENCODING);
+    meta->set_compression(_opts.compression_type);
+    meta->set_is_nullable(column.is_nullable());
+    for (uint32_t i = 0; i < column.get_subtype_count(); ++i) {
+        _init_column_meta(meta->add_children_columns(), column_id, 
column.get_sub_column(i));
+    }
+}
+
+Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const 
TabletColumn& column) {
+    ColumnWriterOptions opts;
+    opts.meta = _footer.add_columns();
+
+    _init_column_meta(opts.meta, cid, column);
+
+    // now we create zone map for key columns in AGG_KEYS or all column in 
UNIQUE_KEYS or DUP_KEYS
+    // and not support zone map for array type and jsonb type.
+    opts.need_zone_map = (column.is_key() || _tablet_schema->keys_type() != 
KeysType::AGG_KEYS) &&
+                         column.type() != FieldType::OLAP_FIELD_TYPE_OBJECT;
+    opts.need_bloom_filter = column.is_bf_column();
+    auto* tablet_index = 
_tablet_schema->get_ngram_bf_index(column.unique_id());
+    if (tablet_index) {
+        opts.need_bloom_filter = true;
+        opts.is_ngram_bf_index = true;
+        opts.gram_size = tablet_index->get_gram_size();
+        opts.gram_bf_size = tablet_index->get_gram_bf_size();
+    }
+
+    opts.need_bitmap_index = column.has_bitmap_index();
+    bool skip_inverted_index = false;
+    if (_opts.rowset_ctx != nullptr) {
+        // skip write inverted index for index compaction
+        skip_inverted_index = 
_opts.rowset_ctx->skip_inverted_index.count(column.unique_id()) > 0;
+    }
+    // skip write inverted index on load if skip_write_index_on_load is true
+    if (_opts.write_type == DataWriteType::TYPE_DIRECT &&
+        _tablet_schema->skip_write_index_on_load()) {
+        skip_inverted_index = true;
+    }
+    // indexes for this column
+    opts.indexes = _tablet_schema->get_indexes_for_column(column.unique_id());
+    for (auto index : opts.indexes) {
+        if (!skip_inverted_index && index && index->index_type() == 
IndexType::INVERTED) {
+            opts.inverted_index = index;
+            // TODO support multiple inverted index
+            break;
+        }
+    }
+    if (column.type() == FieldType::OLAP_FIELD_TYPE_STRUCT) {
+        opts.need_zone_map = false;
+        if (opts.need_bloom_filter) {
+            return Status::NotSupported("Do not support bloom filter for 
struct type");
+        }
+        if (opts.need_bitmap_index) {
+            return Status::NotSupported("Do not support bitmap index for 
struct type");
+        }
+    }
+    if (column.type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
+        opts.need_zone_map = false;
+        if (opts.need_bloom_filter) {
+            return Status::NotSupported("Do not support bloom filter for array 
type");
+        }
+        if (opts.need_bitmap_index) {
+            return Status::NotSupported("Do not support bitmap index for array 
type");
+        }
+    }
+    if (column.type() == FieldType::OLAP_FIELD_TYPE_JSONB) {
+        opts.need_zone_map = false;
+        if (opts.need_bloom_filter) {
+            return Status::NotSupported("Do not support bloom filter for jsonb 
type");
+        }
+        if (opts.need_bitmap_index) {
+            return Status::NotSupported("Do not support bitmap index for jsonb 
type");
+        }
+    }
+    if (column.type() == FieldType::OLAP_FIELD_TYPE_AGG_STATE) {
+        opts.need_zone_map = false;
+        if (opts.need_bloom_filter) {
+            return Status::NotSupported("Do not support bloom filter for 
agg_state type");
+        }
+        if (opts.need_bitmap_index) {
+            return Status::NotSupported("Do not support bitmap index for 
agg_state type");
+        }
+    }
+    if (column.type() == FieldType::OLAP_FIELD_TYPE_MAP) {
+        opts.need_zone_map = false;
+        if (opts.need_bloom_filter) {
+            return Status::NotSupported("Do not support bloom filter for map 
type");
+        }
+        if (opts.need_bitmap_index) {
+            return Status::NotSupported("Do not support bitmap index for map 
type");
+        }
+    }
+
+    if (column.is_row_store_column()) {
+        // smaller page size for row store column
+        opts.data_page_size = config::row_column_page_size;
+    }
+
+    std::unique_ptr<ColumnWriter> writer;
+    RETURN_IF_ERROR(ColumnWriter::create(opts, &column, _file_writer, 
&writer));
+    RETURN_IF_ERROR(writer->init());
+    _column_writers.push_back(std::move(writer));
+
+    _olap_data_convertor->add_column_data_convertor(column);
+    return Status::OK();
+};
+
+Status VerticalSegmentWriter::init() {
+    DCHECK(_column_writers.empty());
+    if (_opts.compression_type == UNKNOWN_COMPRESSION) {
+        _opts.compression_type = _tablet_schema->compression_type();
+    }
+    _olap_data_convertor = 
std::make_unique<vectorized::OlapBlockDataConvertor>();
+    _olap_data_convertor->reserve(_tablet_schema->num_columns());
+    _column_writers.reserve(_tablet_schema->columns().size());
+    // we don't need the short key index for unique key merge on write table.
+    if (_tablet_schema->keys_type() == UNIQUE_KEYS && 
_opts.enable_unique_key_merge_on_write) {
+        size_t seq_col_length = 0;
+        if (_tablet_schema->has_sequence_col()) {
+            seq_col_length =
+                    
_tablet_schema->column(_tablet_schema->sequence_col_idx()).length() + 1;
+        }
+        _primary_key_index_builder.reset(new 
PrimaryKeyIndexBuilder(_file_writer, seq_col_length));
+        RETURN_IF_ERROR(_primary_key_index_builder->init());
+    } else {
+        _short_key_index_builder.reset(
+                new ShortKeyIndexBuilder(_segment_id, 
_opts.num_rows_per_block));
+    }
+    return Status::OK();
+}
+
+void VerticalSegmentWriter::_maybe_invalid_row_cache(const std::string& key) 
const {
+    // Just invalid row cache for simplicity, since the rowset is not visible 
at present.
+    // If we update/insert cache, if load failed rowset will not be visible 
but cached data
+    // will be visible, and lead to inconsistency.
+    if (!config::disable_storage_row_cache && 
_tablet_schema->store_row_column() &&
+        _opts.write_type == DataWriteType::TYPE_DIRECT) {
+        // invalidate cache
+        RowCache::instance()->erase({_opts.rowset_ctx->tablet_id, key});
+    }
+}
+
+void VerticalSegmentWriter::_serialize_block_to_row_column(vectorized::Block& 
block) {

Review Comment:
   warning: method '_serialize_block_to_row_column' can be made static 
[readability-convert-member-functions-to-static]
   
   be/src/olap/rowset/segment_v2/vertical_segment_writer.h:141:
   ```diff
   -     void _serialize_block_to_row_column(vectorized::Block& block);
   +     static void _serialize_block_to_row_column(vectorized::Block& block);
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to