This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 5909237ab1c9922d5c7dbe16d271ade9ff33e766 Author: feiniaofeiafei <[email protected]> AuthorDate: Fri Mar 8 16:32:32 2024 +0800 [Fix](nereids) Add semantic check that the hash bucket column must be a key column when creating table for aggregate and unique models (#31951) --- .../commands/info/DistributionDescriptor.java | 7 +++ .../ddl_p0/unique_aggregate_key_hash_check.groovy | 66 ++++++++++++++++++++++ .../test_rowset_writer_fault.groovy | 2 +- .../test_group_commit_data_bytes_property.groovy | 2 +- .../test_group_commit_interval_ms_property.groovy | 2 +- .../schema_change_p0/test_schema_change_agg.groovy | 6 +- 6 files changed, 79 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DistributionDescriptor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DistributionDescriptor.java index ba8b587812e..0b86bf74c58 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DistributionDescriptor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DistributionDescriptor.java @@ -82,6 +82,13 @@ public class DistributionDescriptor { throw new AnalysisException(String.format("Distribution column(%s) doesn't exist", c)); } }); + for (String columnName : cols) { + ColumnDefinition columnDefinition = columnMap.get(columnName); + if (!columnDefinition.isKey() + && (keysType == KeysType.UNIQUE_KEYS || keysType == KeysType.AGG_KEYS)) { + throw new AnalysisException("Distribution column[" + columnName + "] is not key column"); + } + } } else { if (keysType.equals(KeysType.UNIQUE_KEYS)) { throw new AnalysisException("Create unique keys table should not contain random distribution desc"); diff --git a/regression-test/suites/ddl_p0/unique_aggregate_key_hash_check.groovy b/regression-test/suites/ddl_p0/unique_aggregate_key_hash_check.groovy new file mode 100644 index 00000000000..73b9959e4b6 --- /dev/null +++ b/regression-test/suites/ddl_p0/unique_aggregate_key_hash_check.groovy @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +suite("unique_aggregate_key_hash_check") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + sql "DROP TABLE IF EXISTS example_tbl_agg3" + sql "DROP TABLE IF EXISTS example_tbl_unique" + + test { + sql """CREATE TABLE IF NOT EXISTS example_tbl_agg3 + ( + `user_id` LARGEINT NOT NULL COMMENT "用户id", + `date` DATE NOT NULL COMMENT "数据灌入日期时间", + `city` VARCHAR(20) COMMENT "用户所在城市", + `age` SMALLINT COMMENT "用户年龄", + `sex` TINYINT COMMENT "用户性别", + `last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间", + `cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费", + `max_dwell_time` INT MAX DEFAULT "0" COMMENT "用户最大停留时间", + `min_dwell_time` INT MIN DEFAULT "99999" COMMENT "用户最小停留时间" + ) + AGGREGATE KEY(`user_id`, `date`, `city`, `age`, `sex`) + DISTRIBUTED BY HASH(`cost`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + exception "Distribution column[cost] is not key column" + } + test { + sql """ + CREATE TABLE IF NOT EXISTS example_tbl_unique + ( + `user_id` LARGEINT NOT NULL COMMENT "用户id", + `username` VARCHAR(50) NOT NULL COMMENT "用户昵称", + `city` VARCHAR(20) COMMENT "用户所在城市", + `age` SMALLINT COMMENT "用户年龄", + `sex` TINYINT COMMENT "用户性别", + `PHONE` LARGEINT COMMENT "用户电话", + `address` VARCHAR(500) COMMENT "用户地址", + `register_time` DATETIME COMMENT "用户注册时间" + ) + UNIQUE KEY(`user_id`, `username`) + DISTRIBUTED BY HASH(`phone`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + exception "Distribution column[phone] is not key column" + } + +} diff --git a/regression-test/suites/fault_injection_p0/test_rowset_writer_fault.groovy b/regression-test/suites/fault_injection_p0/test_rowset_writer_fault.groovy index 84192e12b51..005e4b6bc97 100644 --- a/regression-test/suites/fault_injection_p0/test_rowset_writer_fault.groovy +++ b/regression-test/suites/fault_injection_p0/test_rowset_writer_fault.groovy @@ -35,7 +35,7 @@ suite("test_rowset_writer_fault", "nonConcurrent") { `k13` largeint(40) null comment "" ) engine=olap UNIQUE KEY (k0) - DISTRIBUTED BY HASH(`k1`) BUCKETS 5 properties("replication_num" = "1") + DISTRIBUTED BY HASH(`k0`) BUCKETS 5 properties("replication_num" = "1") """ GetDebugPoint().clearDebugPointsForAllBEs() diff --git a/regression-test/suites/insert_p0/test_group_commit_data_bytes_property.groovy b/regression-test/suites/insert_p0/test_group_commit_data_bytes_property.groovy index b71ebe0d099..cba4349e16d 100644 --- a/regression-test/suites/insert_p0/test_group_commit_data_bytes_property.groovy +++ b/regression-test/suites/insert_p0/test_group_commit_data_bytes_property.groovy @@ -51,7 +51,7 @@ suite("test_group_commit_data_bytes_property") { v bigint ) UNIQUE KEY(k) - DISTRIBUTED BY HASH (v) BUCKETS 8 + DISTRIBUTED BY HASH (k) BUCKETS 8 PROPERTIES( "replication_num" = "1", "group_commit_data_bytes"="1024" diff --git a/regression-test/suites/insert_p0/test_group_commit_interval_ms_property.groovy b/regression-test/suites/insert_p0/test_group_commit_interval_ms_property.groovy index c4bf3bd0002..5d8962d07ac 100644 --- a/regression-test/suites/insert_p0/test_group_commit_interval_ms_property.groovy +++ b/regression-test/suites/insert_p0/test_group_commit_interval_ms_property.groovy @@ -51,7 +51,7 @@ suite("test_group_commit_interval_ms_property") { v bigint ) UNIQUE KEY(k) - DISTRIBUTED BY HASH (v) BUCKETS 8 + DISTRIBUTED BY HASH (k) BUCKETS 8 PROPERTIES( "replication_num" = "1", "group_commit_interval_ms"="10000" diff --git a/regression-test/suites/schema_change_p0/test_schema_change_agg.groovy b/regression-test/suites/schema_change_p0/test_schema_change_agg.groovy index e2931bda9ac..905dd3f8c35 100644 --- a/regression-test/suites/schema_change_p0/test_schema_change_agg.groovy +++ b/regression-test/suites/schema_change_p0/test_schema_change_agg.groovy @@ -176,7 +176,7 @@ suite("test_schema_change_agg", "p0") { partition `old_p1` values [("1"), ("2")), partition `old_p2` values [("2"), ("3")) ) - DISTRIBUTED BY HASH(pv) BUCKETS 1 + DISTRIBUTED BY HASH(citycode) BUCKETS 1 PROPERTIES ( "replication_num" = "1" ); @@ -204,8 +204,8 @@ suite("test_schema_change_agg", "p0") { //distribution key test { - sql "alter table ${tableName3} modify column pv bigint sum default '0' comment 'pv'" - exception "Can not modify distribution column[pv]. index[${tableName3}]" + sql "alter table ${tableName3} modify column citycode smallint comment 'citycode'" + exception "Can not modify distribution column[citycode]. index[${tableName3}]" } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
