This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 8d60e573e5c [fix](agg) Aggregating string types with null values may
result in incorrect result (#42067)
8d60e573e5c is described below
commit 8d60e573e5c7e0fe117d1a63f7024463627f64b8
Author: Xin Liao <[email protected]>
AuthorDate: Mon Oct 21 11:29:17 2024 +0800
[fix](agg) Aggregating string types with null values may result in
incorrect result (#42067)
Aggregating string types with null values may result in incorrect result
because using the replace_column_data function can cause incorrect
offsets in the column.
A reproducible caseļ¼
```
CREATE TABLE `test_scan_keys_with_bool_type` (
`col1` tinyint NOT NULL,
`col2` int NOT NULL,
`col3` tinyint NOT NULL,
`col5` boolean REPLACE NOT NULL,
`col4` datetime(2) REPLACE NOT NULL,
`col6` double REPLACE_IF_NOT_NULL NULL,
`col7` varchar(100) REPLACE_IF_NOT_NULL NULL
) ENGINE=OLAP
AGGREGATE KEY(`col1`, `col2`, `col3`)
DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"disable_auto_compaction" = "true"
);
insert into test_scan_keys_with_bool_type values
( -100 , 1 , -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282
, NULL ),
( -100 , 0 , -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282
, "hi" ),
( -100 , 1 , 92 , 1 , '2024-02-16 04:37:37.00' , 23423423.0324234
, NULL );
insert into test_scan_keys_with_bool_type values
( -100 , 1 , 1 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 ,
"doris" );
MySQL [test]> select * from test_scan_keys_with_bool_type;
+------+------+------+------+------------------------+---------------------+-------+
| col1 | col2 | col3 | col5 | col4 | col6
| col7 |
+------+------+------+------+------------------------+---------------------+-------+
| -100 | 0 | -82 | 1 | 2024-02-16 04:37:37.00 | -1299962421.9042821
| hi |
| -100 | 1 | -82 | 1 | 2024-02-16 04:37:37.00 | -1299962421.9042821
| NULL |
| -100 | 1 | 1 | 1 | 2024-02-16 04:37:37.00 | -1299962421.9042821
| hidor |
| -100 | 1 | 92 | 1 | 2024-02-16 04:37:37.00 | 23423423.0324234
| NULL |
+------+------+------+------+------------------------+---------------------+-------+
4 rows in set (0.057 sec)
MySQL [test]> set skip_storage_engine_merge = true; select * from
test_scan_keys_with_bool_type;
+------+------+------+------+------------------------+---------------------+-------+
| col1 | col2 | col3 | col5 | col4 | col6
| col7 |
+------+------+------+------+------------------------+---------------------+-------+
| -100 | 0 | -82 | 1 | 2024-02-16 04:37:37.00 | -1299962421.9042821
| hi |
| -100 | 1 | -82 | 1 | 2024-02-16 04:37:37.00 | -1299962421.9042821
| NULL |
| -100 | 1 | 92 | 1 | 2024-02-16 04:37:37.00 | 23423423.0324234
| NULL |
| -100 | 1 | 1 | 1 | 2024-02-16 04:37:37.00 | -1299962421.9042821
| doris |
+------+------+------+------+------------------------+---------------------+-------+
4 rows in set (0.023 sec)
```
https://github.com/apache/doris/pull/33493 By supporting variant type
aggregation, this issue has been resolved.So versions after 2.1 do not
have this issue.
---
be/src/vec/olap/block_reader.cpp | 4 +--
.../aggregate/test_aggregate_table.out | 6 ++++
.../aggregate/test_aggregate_table.groovy | 32 ++++++++++++++++++++++
3 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/be/src/vec/olap/block_reader.cpp b/be/src/vec/olap/block_reader.cpp
index d69efa181e8..5ff2890ab37 100644
--- a/be/src/vec/olap/block_reader.cpp
+++ b/be/src/vec/olap/block_reader.cpp
@@ -470,10 +470,10 @@ size_t BlockReader::_copy_agg_data() {
auto& dst_column = _stored_data_columns[idx];
if (_stored_has_variable_length_tag[idx]) {
//variable length type should replace ordered
+ dst_column->clear();
for (size_t i = 0; i < copy_size; i++) {
auto& ref = _stored_row_ref[i];
-
dst_column->replace_column_data(*ref.block->get_by_position(idx).column,
- ref.row_pos, i);
+
dst_column->insert_from(*ref.block->get_by_position(idx).column, ref.row_pos);
}
} else {
for (auto& it : _temp_ref_map) {
diff --git
a/regression-test/data/data_model_p0/aggregate/test_aggregate_table.out
b/regression-test/data/data_model_p0/aggregate/test_aggregate_table.out
index 1a7ed71eb2e..8da75c78275 100644
--- a/regression-test/data/data_model_p0/aggregate/test_aggregate_table.out
+++ b/regression-test/data/data_model_p0/aggregate/test_aggregate_table.out
@@ -46,3 +46,9 @@ datetimev2_value_min_2 datetime(6) Yes false
\N MIN
datetimev2_value_replace_2 datetime(6) Yes false \N REPLACE
datetimev2_value_replace_if_not_null_2 datetime(6) Yes false \N
REPLACE_IF_NOT_NULL
+-- !string_agg_table_with_null --
+-100 0 -82 true 2024-02-16T04:37:37 -1.299962421904282E9
hi
+-100 1 -82 true 2024-02-16T04:37:37 -1.299962421904282E9
\N
+-100 1 1 true 2024-02-16T04:37:37 1.399962421904282E9
doris
+-100 1 92 true 2024-02-16T04:37:37 2.34234230324234E7
NULL
+
diff --git
a/regression-test/suites/data_model_p0/aggregate/test_aggregate_table.groovy
b/regression-test/suites/data_model_p0/aggregate/test_aggregate_table.groovy
index f7c4d8266ae..9d0322df17d 100644
--- a/regression-test/suites/data_model_p0/aggregate/test_aggregate_table.groovy
+++ b/regression-test/suites/data_model_p0/aggregate/test_aggregate_table.groovy
@@ -99,4 +99,36 @@ suite("test_aggregate_table") {
qt_desc_date_table """desc date_agg"""
sql """DROP TABLE date_agg"""
+ sql """DROP TABLE IF EXISTS test_string_agg_with_null"""
+ sql """
+ CREATE TABLE `test_string_agg_with_null` (
+ `col1` tinyint NOT NULL,
+ `col2` int NOT NULL,
+ `col3` tinyint NOT NULL,
+ `col5` boolean REPLACE NOT NULL,
+ `col4` datetime(2) REPLACE NOT NULL,
+ `col6` double REPLACE_IF_NOT_NULL NULL,
+ `col7` varchar(100) REPLACE_IF_NOT_NULL NULL
+ ) ENGINE=OLAP
+ AGGREGATE KEY(`col1`, `col2`, `col3`)
+ DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "disable_auto_compaction" = "true"
+ );
+ """
+
+ sql """ insert into test_string_agg_with_null values
+ ( -100 , 1 , -82 , 1 , '2024-02-16 04:37:37.00' ,
-1299962421.904282 , NULL ),
+ ( -100 , 0 , -82 , 1 , '2024-02-16 04:37:37.00' ,
-1299962421.904282 , "hi" ),
+ ( -100 , 1 , 92 , 1 , '2024-02-16 04:37:37.00' ,
23423423.0324234 , "NULL" );
+ """
+
+ sql """ insert into test_string_agg_with_null values
+ ( -100 , 1 , 1 , 1 , '2024-02-16 04:37:37.00' ,
1399962421.904282 , "doris" );
+ """
+
+ qt_string_agg_table_with_null """ select * from test_string_agg_with_null
"""
+ sql """DROP TABLE test_string_agg_with_null"""
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]