This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 420a91d0e1c [fix](merge-on-write) incorrect result caused by key range 
filter with pk (#31456)
420a91d0e1c is described below

commit 420a91d0e1c1f8177a152b08e56e9b225a293515
Author: Xin Liao <liaoxin...@126.com>
AuthorDate: Wed Feb 28 16:34:24 2024 +0800

    [fix](merge-on-write) incorrect result caused by key range filter with pk 
(#31456)
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  4 +--
 be/src/util/key_util.h                             | 17 ++++-----
 .../test_primary_key_simple_case.out               | 22 ++++++++++++
 .../test_primary_key_simple_case.groovy            | 42 ++++++++++++++++++++++
 4 files changed, 74 insertions(+), 11 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index f1cb1f2c61c..327a002e529 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1373,10 +1373,8 @@ Status 
SegmentIterator::_lookup_ordinal_from_pk_index(const RowCursor& key, bool
     DCHECK(pk_index_reader != nullptr);
 
     std::string index_key;
-    // when is_include is false, we shoudle append KEY_NORMAL_MARKER to the
-    // encode key. Otherwise, we will get an incorrect upper bound.
     encode_key_with_padding<RowCursor, true>(
-            &index_key, key, _segment->_tablet_schema->num_key_columns(), 
is_include, true);
+            &index_key, key, _segment->_tablet_schema->num_key_columns(), 
is_include);
     if (index_key < _segment->min_key()) {
         *rowid = 0;
         return Status::OK();
diff --git a/be/src/util/key_util.h b/be/src/util/key_util.h
index 0dbaa397101..fd57566fa4f 100644
--- a/be/src/util/key_util.h
+++ b/be/src/util/key_util.h
@@ -50,6 +50,8 @@ constexpr uint8_t KEY_NULL_FIRST_MARKER = 0x01;
 constexpr uint8_t KEY_NORMAL_MARKER = 0x02;
 // Used to represent maximal value for that field
 constexpr uint8_t KEY_MAXIMAL_MARKER = 0xFF;
+// Used to represent a value greater than the normal marker by 1, using by MoW
+constexpr uint8_t KEY_NORMAL_NEXT_MARKER = 0x03;
 
 // Encode one row into binary according given num_keys.
 // A cell will be encoded in the format of a marker and encoded content.
@@ -57,21 +59,20 @@ constexpr uint8_t KEY_MAXIMAL_MARKER = 0xFF;
 // fill a marker and return. If padding_minimal is true, KEY_MINIMAL_MARKER 
will
 // be added, if padding_minimal is false, KEY_MAXIMAL_MARKER will be added.
 // If all num_keys are found in row, no marker will be added.
-// if padding_minimal is false and padding_normal_marker is true,
-// KEY_NORMAL_MARKER will be added.
-template <typename RowType, bool full_encode = false>
+template <typename RowType, bool is_mow = false>
 void encode_key_with_padding(std::string* buf, const RowType& row, size_t 
num_keys,
-                             bool padding_minimal, bool padding_normal_marker 
= false) {
+                             bool padding_minimal) {
     for (auto cid = 0; cid < num_keys; cid++) {
         auto field = row.schema()->column(cid);
         if (field == nullptr) {
             if (padding_minimal) {
                 buf->push_back(KEY_MINIMAL_MARKER);
             } else {
-                if (padding_normal_marker) {
-                    buf->push_back(KEY_NORMAL_MARKER);
+                if (is_mow) {
+                    buf->push_back(KEY_NORMAL_NEXT_MARKER);
+                } else {
+                    buf->push_back(KEY_MAXIMAL_MARKER);
                 }
-                buf->push_back(KEY_MAXIMAL_MARKER);
             }
             break;
         }
@@ -82,7 +83,7 @@ void encode_key_with_padding(std::string* buf, const RowType& 
row, size_t num_ke
             continue;
         }
         buf->push_back(KEY_NORMAL_MARKER);
-        if (full_encode) {
+        if (is_mow) {
             field->full_encode_ascending(cell.cell_ptr(), buf);
         } else {
             field->encode_ascending(cell.cell_ptr(), buf);
diff --git 
a/regression-test/data/unique_with_mow_p0/test_primary_key_simple_case.out 
b/regression-test/data/unique_with_mow_p0/test_primary_key_simple_case.out
new file mode 100644
index 00000000000..d82fe80fdea
--- /dev/null
+++ b/regression-test/data/unique_with_mow_p0/test_primary_key_simple_case.out
@@ -0,0 +1,22 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !pk_key_range --
+2024-02-18     \N      -4
+2024-02-18     \N      9
+2024-02-18     -10     -10
+2024-02-18     -10     -10
+2024-02-18     -10     -4
+2024-02-18     -10     5
+2024-02-18     -10     9
+2024-02-18     -4      -10
+2024-02-18     -4      -10
+2024-02-18     0       4
+2024-02-18     0       5
+2024-02-18     0       6
+2024-02-18     1       6
+2024-02-18     2       9
+2024-02-18     3       9
+2024-02-18     5       4
+2024-02-18     8       2
+2024-02-18     9       1
+2024-02-18     9       9
+
diff --git 
a/regression-test/suites/unique_with_mow_p0/test_primary_key_simple_case.groovy 
b/regression-test/suites/unique_with_mow_p0/test_primary_key_simple_case.groovy
index fafd2de3be6..a2e351372e3 100644
--- 
a/regression-test/suites/unique_with_mow_p0/test_primary_key_simple_case.groovy
+++ 
b/regression-test/suites/unique_with_mow_p0/test_primary_key_simple_case.groovy
@@ -108,4 +108,46 @@ suite("test_primary_key_simple_case") {
     result = sql """ SELECT * FROM ${tableName} t ORDER BY user_id; """
     assertTrue(result.size() == 7)
     assertTrue(result[6][10] == 25)
+
+    sql """ DROP TABLE IF EXISTS test_unique_key_range_tbl """
+    sql """
+        create table test_unique_key_range_tbl (
+            k1 date not null,
+            k2 bigint not null,
+            v1 int null,
+            v2 int not null
+        ) UNIQUE KEY(`k1`, `k2`)
+        DISTRIBUTED BY HASH(`k2`) BUCKETS 30
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "enable_unique_key_merge_on_write" = "true"
+        );
+    """
+
+    sql """
+        insert into test_unique_key_range_tbl values
+            ( '2024-02-18' , -7822995176885966013 ,    -10 ,     -4 ),
+            ( '2024-02-18' , -5987215688096912139 ,      8 ,      2 ),
+            ( '2024-02-18' , -5889932400568797810 ,    -10 ,    -10 ),
+            ( '2024-02-18' , -5051784705055344649 ,      1 ,      6 ),
+            ( '2024-02-18' , -4635608137995832373 ,      3 ,      9 ),
+            ( '2024-02-18' , -3836821172182966892 ,    -10 ,    -10 ),
+            ( '2024-02-18' , -3675645188438967877 ,   NULL ,     -4 ),
+            ( '2024-02-18' , -3363157164254363034 ,      5 ,      4 ),
+            ( '2024-02-18' ,  -849169574767655353 ,     -4 ,    -10 ),
+            ( '2024-02-18' ,  -293023807696575395 ,   NULL ,      9 ),
+            ( '2024-02-18' ,  1167104788249072527 ,      0 ,      4 ),
+            ( '2024-02-18' ,  1660707941299238025 ,      9 ,      9 ),
+            ( '2024-02-18' ,  2852819493813807984 ,      0 ,      6 ),
+            ( '2024-02-18' ,  5444305694667795860 ,      9 ,      1 ),
+            ( '2024-02-18' ,  6136152292926889790 ,      2 ,      9 ),
+            ( '2024-02-18' ,  6538123407677174537 ,     -4 ,    -10 ),
+            ( '2024-02-18' ,  7958269158967938474 ,    -10 ,      9 ),
+            ( '2024-02-18' ,  9019386549208004184 ,    -10 ,      5 ),
+            ( '2024-02-18' ,  9208781524087970597 ,      0 ,      5 );
+    """
+
+    qt_pk_key_range """
+        select k1, v1, v2 from test_unique_key_range_tbl where k1 = 
'2024-02-18' order by 1, 2, 3;
+    """
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to