This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ab8346560d [Enhancement](storage) add num_values consistency check 
when build/load IndexedColumn  (#14447) (#14450)
ab8346560d is described below

commit ab8346560d9ae094835b26693c1c9d2338c4b8c9
Author: zhengyu <[email protected]>
AuthorDate: Tue Nov 22 21:37:08 2022 +0800

    [Enhancement](storage) add num_values consistency check when build/load 
IndexedColumn  (#14447) (#14450)
---
 be/src/olap/rowset/segment_v2/indexed_column_reader.cpp |  9 +++++++--
 be/src/olap/rowset/segment_v2/indexed_column_writer.cpp | 12 +++++++++---
 be/src/olap/rowset/segment_v2/indexed_column_writer.h   |  2 +-
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp 
b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
index 31bab3a912..6bcf6055d9 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
@@ -99,6 +99,7 @@ Status IndexedColumnReader::read_page(const PagePointer& pp, 
PageHandle* handle,
 ///////////////////////////////////////////////////////////////////////////////
 
 Status IndexedColumnIterator::_read_data_page(const PagePointer& pp) {
+    Status status;
     // there is not init() for IndexedColumnIterator, so do it here
     if (!_compress_codec) {
         
RETURN_IF_ERROR(get_block_compression_codec(_reader->get_compression(), 
&_compress_codec));
@@ -113,8 +114,12 @@ Status IndexedColumnIterator::_read_data_page(const 
PagePointer& pp) {
     // note that page_index is not used in IndexedColumnIterator, so we pass 0
     PageDecoderOptions opts;
     opts.need_check_bitmap = false;
-    return ParsedPage::create(std::move(handle), body, 
footer.data_page_footer(),
-                              _reader->encoding_info(), pp, 0, &_data_page, 
opts);
+    status = ParsedPage::create(std::move(handle), body, 
footer.data_page_footer(),
+                                _reader->encoding_info(), pp, 0, &_data_page, 
opts);
+    DCHECK(_reader->_meta.ordinal_index_meta().is_root_data_page()
+                   ? _reader->_meta.num_values() == _data_page.num_rows
+                   : true);
+    return status;
 }
 
 Status IndexedColumnIterator::seek_to_ordinal(ordinal_t idx) {
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp 
b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
index cfce5afbb8..c15fe36429 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
@@ -87,14 +87,16 @@ Status IndexedColumnWriter::add(const void* value) {
     RETURN_IF_ERROR(
             _data_page_builder->add(reinterpret_cast<const uint8_t*>(value), 
&num_to_write));
     _num_values++;
+    size_t num_val;
     if (_data_page_builder->is_page_full()) {
-        RETURN_IF_ERROR(_finish_current_data_page());
+        RETURN_IF_ERROR(_finish_current_data_page(num_val));
     }
     return Status::OK();
 }
 
-Status IndexedColumnWriter::_finish_current_data_page() {
+Status IndexedColumnWriter::_finish_current_data_page(size_t& num_val) {
     auto num_values_in_page = _data_page_builder->count();
+    num_val = num_values_in_page;
     if (num_values_in_page == 0) {
         return Status::OK();
     }
@@ -134,7 +136,8 @@ Status IndexedColumnWriter::_finish_current_data_page() {
 }
 
 Status IndexedColumnWriter::finish(IndexedColumnMetaPB* meta) {
-    RETURN_IF_ERROR(_finish_current_data_page());
+    size_t num_val_in_page;
+    RETURN_IF_ERROR(_finish_current_data_page(num_val_in_page));
     if (_options.write_ordinal_index) {
         RETURN_IF_ERROR(
                 _flush_index(_ordinal_index_builder.get(), 
meta->mutable_ordinal_index_meta()));
@@ -146,6 +149,9 @@ Status IndexedColumnWriter::finish(IndexedColumnMetaPB* 
meta) {
     meta->set_encoding(_options.encoding);
     meta->set_num_values(_num_values);
     meta->set_compression(_options.compression);
+    if (_num_data_pages <= 1) {
+        DCHECK(num_val_in_page == _num_values);
+    }
     return Status::OK();
 }
 
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.h 
b/be/src/olap/rowset/segment_v2/indexed_column_writer.h
index 0c2df791f1..c0ea7da4a5 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_writer.h
+++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.h
@@ -82,7 +82,7 @@ public:
     Status finish(IndexedColumnMetaPB* meta);
 
 private:
-    Status _finish_current_data_page();
+    Status _finish_current_data_page(size_t& num_val);
 
     Status _flush_index(IndexPageBuilder* index_builder, BTreeMetaPB* meta);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to