This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 3b5f4ad198 [fix](unique-key-merge-on-write) fix that unique key with
mow may loss some data in the query result with predicates (#14455)
3b5f4ad198 is described below
commit 3b5f4ad1980afb1d5730724aa8e0e2bb14239ec5
Author: Xin Liao <[email protected]>
AuthorDate: Wed Nov 23 09:08:07 2022 +0800
[fix](unique-key-merge-on-write) fix that unique key with mow may loss some
data in the query result with predicates (#14455)
When unique key with MOW table has sequence column, the query result may be
wrong with predicates. There are two problems:
The sequence column needs to be removed from primary key index when
comparing key.
The sequence column needs to be removed from min/max key.
---
be/src/olap/primary_key_index.cpp | 6 +-
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 28 ++++++++
.../primary_index/test_unique_mow_sequence.out | 25 +++++++
.../primary_index/test_unique_mow_sequence.groovy | 83 ++++++++++++++++++++++
4 files changed, 139 insertions(+), 3 deletions(-)
diff --git a/be/src/olap/primary_key_index.cpp
b/be/src/olap/primary_key_index.cpp
index 79f6e782cd..6e2c3d954d 100644
--- a/be/src/olap/primary_key_index.cpp
+++ b/be/src/olap/primary_key_index.cpp
@@ -60,9 +60,9 @@ Status
PrimaryKeyIndexBuilder::finalize(segment_v2::PrimaryKeyIndexMetaPB* meta)
// finish primary key index
RETURN_IF_ERROR(_primary_key_index_builder->finish(meta->mutable_primary_key_index()));
- // set min_max key
- meta->set_min_key(_min_key.ToString());
- meta->set_max_key(_max_key.ToString());
+ // set min_max key, the sequence column should be removed
+ meta->set_min_key(min_key().to_string());
+ meta->set_max_key(max_key().to_string());
// finish bloom filter index
RETURN_IF_ERROR(_bloom_filter_index_builder->flush());
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index a4ca422e69..e6eff66685 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -525,6 +525,34 @@ Status
SegmentIterator::_lookup_ordinal_from_pk_index(const RowCursor& key, bool
}
*rowid = index_iterator->get_current_ordinal();
+ // The sequence column needs to be removed from primary key index when
comparing key
+ bool has_seq_col = _segment->_tablet_schema->has_sequence_col();
+ if (has_seq_col) {
+ size_t seq_col_length =
+
_segment->_tablet_schema->column(_segment->_tablet_schema->sequence_col_idx())
+ .length() +
+ 1;
+ MemPool pool;
+ size_t num_to_read = 1;
+ std::unique_ptr<ColumnVectorBatch> cvb;
+ RETURN_IF_ERROR(ColumnVectorBatch::create(
+ num_to_read, false, _segment->_pk_index_reader->type_info(),
nullptr, &cvb));
+ ColumnBlock block(cvb.get(), &pool);
+ ColumnBlockView column_block_view(&block);
+ size_t num_read = num_to_read;
+ RETURN_IF_ERROR(index_iterator->next_batch(&num_read,
&column_block_view));
+ DCHECK(num_to_read == num_read);
+
+ const Slice* sought_key = reinterpret_cast<const
Slice*>(cvb->cell_ptr(0));
+ Slice sought_key_without_seq =
+ Slice(sought_key->get_data(), sought_key->get_size() -
seq_col_length);
+
+ // compare key
+ if (Slice(index_key).compare(sought_key_without_seq) == 0) {
+ exact_match = true;
+ }
+ }
+
// find the key in primary key index, and the is_include is false, so move
// to the next row.
if (exact_match && !is_include) {
diff --git a/regression-test/data/primary_index/test_unique_mow_sequence.out
b/regression-test/data/primary_index/test_unique_mow_sequence.out
new file mode 100644
index 0000000000..65b68e4152
--- /dev/null
+++ b/regression-test/data/primary_index/test_unique_mow_sequence.out
@@ -0,0 +1,25 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+1 Customer#000000001 j5JsirBM9P MOROCCO 0 MOROCCO AFRICA
25-989-741-2988 BUILDING
+2 Customer#000000002 487LW1dovn6Q4dMVym JORDAN 1 JORDAN
MIDDLE EAST 23-768-687-3665 AUTOMOBILE
+3 Customer#000000003 fkRGN8n ARGENTINA7 ARGENTINA AMERICA
11-719-748-3364 AUTOMOBILE
+4 Customer#000000004 4u58h f EGYPT 4 EGYPT MIDDLE EAST
14-128-190-5944 MACHINERY
+5 Customer#000000005 hwBtxkoBF qSW4KrI CANADA 5 CANADA
AMERICA 13-750-942-6364 HOUSEHOLD
+
+-- !sql --
+2996 Customer#000002996 PFd,H,pC PERU 1 PERU AMERICA
27-412-836-3763 FURNITURE
+2997 Customer#000002997 LiVKxN3lQHLunID ALGERIA 0 ALGERIA AFRICA
10-600-583-9608 FURNITURE
+2998 Customer#000002998 waJRUwjblh3sJbglX9gS9w PERU 7 PERU
AMERICA 27-747-219-4938 AUTOMOBILE
+2999 Customer#000002999 HaPy4sQ MiANd0pR5uA7 VIETNAM 5 VIETNAM
ASIA 31-297-683-9811 MACHINERY
+3000 Customer#000003000 ,5Yw1O EGYPT 4 EGYPT MIDDLE EAST
14-645-615-5901 FURNITURE
+
+-- !sql --
+1 Customer#000000001 j5JsirBM9P MOROCCO 0 MOROCCO AFRICA
25-989-741-2988 BUILDING
+
+-- !sql --
+3000 Customer#000003000 ,5Yw1O EGYPT 4 EGYPT MIDDLE EAST
14-645-615-5901 FURNITURE
+
+-- !sql --
+
+-- !sql --
+
diff --git
a/regression-test/suites/primary_index/test_unique_mow_sequence.groovy
b/regression-test/suites/primary_index/test_unique_mow_sequence.groovy
new file mode 100644
index 0000000000..2612712165
--- /dev/null
+++ b/regression-test/suites/primary_index/test_unique_mow_sequence.groovy
@@ -0,0 +1,83 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_unique_mow_sequence") {
+ def tableName = "test_mow_sequence"
+ sql """ DROP TABLE IF EXISTS $tableName """
+ sql """
+ CREATE TABLE `$tableName` (
+ `c_custkey` int(11) NOT NULL COMMENT "",
+ `c_name` varchar(26) NOT NULL COMMENT "",
+ `c_address` varchar(41) NOT NULL COMMENT "",
+ `c_city` varchar(11) NOT NULL COMMENT "",
+ `c_nation` varchar(16) NOT NULL COMMENT "",
+ `c_region` varchar(13) NOT NULL COMMENT "",
+ `c_phone` varchar(16) NOT NULL COMMENT "",
+ `c_mktsegment` varchar(11) NOT NULL COMMENT ""
+ )
+ UNIQUE KEY (`c_custkey`)
+ DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 10
+ PROPERTIES (
+ "function_column.sequence_type" = 'int',
+ "compression"="zstd",
+ "replication_num" = "1",
+ "enable_unique_key_merge_on_write" = "true"
+ );
+ """
+
+ streamLoad {
+ table "${tableName}"
+
+ set 'column_separator', '|'
+ set 'compress_type', 'GZ'
+ set 'columns',
'c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use'
+ set 'function_column.sequence_col', 'c_custkey'
+
+ file """${context.sf1DataPath}/ssb/sf0.1/customer.tbl.gz"""
+
+ time 10000 // limit inflight 10s
+
+ // stream load action will check result, include Success status,
and NumberTotalRows == NumberLoadedRows
+
+ // if declared a check callback, the default check condition will
ignore.
+ // So you must check all condition
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals("success", json.Status.toLowerCase())
+ assertEquals(json.NumberTotalRows, json.NumberLoadedRows)
+ assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+ }
+ }
+
+ sql "sync"
+
+ order_qt_sql "select * from $tableName where c_custkey < 6;"
+
+ order_qt_sql "select * from $tableName where c_custkey > 2995;"
+
+ qt_sql "select * from $tableName where c_custkey = 1;"
+
+ qt_sql "select * from $tableName where c_custkey = 3000;"
+
+ qt_sql "select * from $tableName where c_custkey = 3001;"
+
+ qt_sql "select * from $tableName where c_custkey = 0;"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]