This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 8db0cd995fdc9fa615e094a68f2a6c20668b8e2a Author: bobhan1 <[email protected]> AuthorDate: Wed Aug 23 14:21:22 2023 +0800 [enhancement](bitmap)support bitmap type for non-key column in unique table (#23228) --- be/src/olap/rowset/segment_v2/segment_writer.cpp | 4 +- .../sql-manual/sql-reference/Data-Types/BITMAP.md | 4 +- .../sql-manual/sql-reference/Data-Types/BITMAP.md | 17 ++++--- .../org/apache/doris/analysis/CreateTableStmt.java | 18 +++---- .../org/apache/doris/catalog/AggregateType.java | 1 - .../apache/doris/analysis/CreateTableStmtTest.java | 17 ++----- .../data_model_p0/unique/test_unique_bitmap.out | 21 +++++++++ .../data_model_p0/unique/test_unique_bitmap.groovy | 55 ++++++++++++++++++++++ 8 files changed, 106 insertions(+), 31 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 270eb98995..681934b8ad 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -154,7 +154,9 @@ Status SegmentWriter::init(const std::vector<uint32_t>& col_ids, bool has_key, // now we create zone map for key columns in AGG_KEYS or all column in UNIQUE_KEYS or DUP_KEYS // and not support zone map for array type and jsonb type. - opts.need_zone_map = column.is_key() || _tablet_schema->keys_type() != KeysType::AGG_KEYS; + opts.need_zone_map = + (column.is_key() || _tablet_schema->keys_type() != KeysType::AGG_KEYS) && + column.type() != FieldType::OLAP_FIELD_TYPE_OBJECT; opts.need_bloom_filter = column.is_bf_column(); auto* tablet_index = _tablet_schema->get_ngram_bf_index(column.unique_id()); if (tablet_index) { diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Types/BITMAP.md b/docs/en/docs/sql-manual/sql-reference/Data-Types/BITMAP.md index a96026ac60..81ec1bae79 100644 --- a/docs/en/docs/sql-manual/sql-reference/Data-Types/BITMAP.md +++ b/docs/en/docs/sql-manual/sql-reference/Data-Types/BITMAP.md @@ -28,7 +28,9 @@ under the License. ### Description BITMAP -BITMAP cannot be used as a key column, and the aggregation type is BITMAP_UNION when building the table. +The columns of the BITMAP type can be used in Aggregate tables or Unique tables. +When used in a Unique table, they must be used as non-key columns. +When used in an Aggregate table, they must be used as non-key columns, and the aggregation type is BITMAP_UNION when building the table. The user does not need to specify the length and default value. The length is controlled within the system according to the degree of data aggregation. And the BITMAP column can only be queried or used by supporting functions such as bitmap_union_count, bitmap_union, bitmap_hash and bitmap_hash64. diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/BITMAP.md b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/BITMAP.md index 02604de472..dee7f760cc 100644 --- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/BITMAP.md +++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/BITMAP.md @@ -26,13 +26,16 @@ under the License. ## BITMAP ### description - BITMAP - BITMAP不能作为key列使用,建表时配合聚合类型为BITMAP_UNION。 - 用户不需要指定长度和默认值。长度根据数据的聚合程度系统内控制。 - 并且BITMAP列只能通过配套的bitmap_union_count、bitmap_union、bitmap_hash、bitmap_hash64等函数进行查询或使用。 - - 离线场景下使用BITMAP会影响导入速度,在数据量大的情况下查询速度会慢于HLL,并优于Count Distinct。 - 注意:实时场景下BITMAP如果不使用全局字典,使用了bitmap_hash()可能会导致有千分之一左右的误差。如果这个误差不可接受,可以使用bitmap_hash64。 +BITMAP + +BITMAP类型的列可以在Aggregate表或Unique表中使用。 +在Unique表中使用时,其必须作为非key列使用。 +在Aggregate表中使用时,其必须作为非key列使用,且建表时配合的聚合类型为BITMAP_UNION。 +用户不需要指定长度和默认值。长度根据数据的聚合程度系统内控制。 +并且BITMAP列只能通过配套的bitmap_union_count、bitmap_union、bitmap_hash、bitmap_hash64等函数进行查询或使用。 + +离线场景下使用BITMAP会影响导入速度,在数据量大的情况下查询速度会慢于HLL,并优于Count Distinct。 +注意:实时场景下BITMAP如果不使用全局字典,使用了bitmap_hash()可能会导致有千分之一左右的误差。如果这个误差不可接受,可以使用bitmap_hash64。 ### example diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java index 3192b37038..b1df8498b8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java @@ -491,8 +491,6 @@ public class CreateTableStmt extends DdlStmt { columnDefs.add(ColumnDef.newVersionColumnDef(AggregateType.REPLACE)); } } - boolean hasObjectStored = false; - String objectStoredColumn = ""; Set<String> columnSet = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER); for (ColumnDef columnDef : columnDefs) { columnDef.analyze(engineName.equals("olap")); @@ -519,8 +517,16 @@ public class CreateTableStmt extends DdlStmt { } if (columnDef.getType().isObjectStored()) { - hasObjectStored = true; - objectStoredColumn = columnDef.getName(); + if (columnDef.getType().isBitmapType()) { + if (keysDesc.getKeysType() == KeysType.DUP_KEYS) { + throw new AnalysisException("column:" + columnDef.getName() + + " must be used in AGG_KEYS or UNIQUE_KEYS."); + } + } else { + if (keysDesc.getKeysType() != KeysType.AGG_KEYS) { + throw new AnalysisException("column:" + columnDef.getName() + " must be used in AGG_KEYS."); + } + } } if (!columnSet.add(columnDef.getName())) { @@ -528,10 +534,6 @@ public class CreateTableStmt extends DdlStmt { } } - if (hasObjectStored && keysDesc.getKeysType() != KeysType.AGG_KEYS) { - throw new AnalysisException("column:" + objectStoredColumn + " must be used in AGG_KEYS."); - } - if (engineName.equals("olap")) { // before analyzing partition, handle the replication allocation info properties = rewriteReplicaAllocationProperties(properties); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateType.java index d58330b599..1d6b862869 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateType.java @@ -102,7 +102,6 @@ public enum AggregateType { // all types except object stored column type, such as bitmap hll // quantile_state. EnumSet<PrimitiveType> excObjectStored = EnumSet.allOf(PrimitiveType.class); - excObjectStored.remove(PrimitiveType.BITMAP); excObjectStored.remove(PrimitiveType.HLL); excObjectStored.remove(PrimitiveType.QUANTILE_STATE); excObjectStored.remove(PrimitiveType.AGG_STATE); diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateTableStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateTableStmtTest.java index 0c0eec2c53..b7bdfed3ff 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateTableStmtTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateTableStmtTest.java @@ -329,21 +329,12 @@ public class CreateTableStmtTest { } @Test - public void testBmpHllNoAggTab() throws Exception { - ColumnDef bitmap = new ColumnDef("col3", new TypeDef(ScalarType.createType(PrimitiveType.BITMAP))); - cols.add(bitmap); - CreateTableStmt stmt = new CreateTableStmt(false, false, tblNameNoDb, cols, "olap", - new KeysDesc(KeysType.DUP_KEYS, colsName), null, new RandomDistributionDesc(10), null, null, ""); - expectedEx.expect(AnalysisException.class); - expectedEx.expectMessage( - "Aggregate type `col3` bitmap NONE NOT NULL COMMENT \"\" is not compatible with primitive type bitmap"); - stmt.analyze(analyzer); - - cols.remove(bitmap); + public void testHllNoAggTab() throws Exception { ColumnDef hll = new ColumnDef("col3", new TypeDef(ScalarType.createType(PrimitiveType.HLL))); cols.add(hll); - stmt = new CreateTableStmt(false, false, tblNameNoDb, cols, "olap", new KeysDesc(KeysType.DUP_KEYS, colsName), - null, new RandomDistributionDesc(10), null, null, ""); + CreateTableStmt stmt = new CreateTableStmt(false, false, tblNameNoDb, cols, "olap", + new KeysDesc(KeysType.DUP_KEYS, colsName), null, new RandomDistributionDesc(10), + null, null, ""); expectedEx.expect(AnalysisException.class); expectedEx.expectMessage( "Aggregate type `col3` hll NONE NOT NULL COMMENT \"\" is not compatible with primitive type hll"); diff --git a/regression-test/data/data_model_p0/unique/test_unique_bitmap.out b/regression-test/data/data_model_p0/unique/test_unique_bitmap.out new file mode 100644 index 0000000000..85f559e238 --- /dev/null +++ b/regression-test/data/data_model_p0/unique/test_unique_bitmap.out @@ -0,0 +1,21 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 1 1 +2 2 3,1000 +3 3 999,1000,888888 + +-- !sql -- +1 4 5,90,876,1000 +2 2 3,1000 +3 8 0,1,2,3,5,99,876,2445 + +-- !sql -- +1 1 1 +2 2 3,1000 +3 3 999,1000,888888 + +-- !sql -- +1 4 5,90,876,1000 +2 2 3,1000 +3 8 0,1,2,3,5,99,876,2445 + diff --git a/regression-test/suites/data_model_p0/unique/test_unique_bitmap.groovy b/regression-test/suites/data_model_p0/unique/test_unique_bitmap.groovy new file mode 100644 index 0000000000..64cf809eea --- /dev/null +++ b/regression-test/suites/data_model_p0/unique/test_unique_bitmap.groovy @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_unique_table_bitmap") { + def tbName = "test_uniq_table_bitmap1" + sql "DROP TABLE IF EXISTS ${tbName}" + sql """ + CREATE TABLE IF NOT EXISTS ${tbName} ( + k int, + id_bitmap bitmap + ) UNIQUE KEY(k) + DISTRIBUTED BY HASH(k) BUCKETS 1 properties("replication_num" = "1"); + """ + sql "insert into ${tbName} values(1,to_bitmap(1));" + sql "insert into ${tbName} values(2,bitmap_or(to_bitmap(3),to_bitmap(1000)));" + sql "insert into ${tbName} values(3,bitmap_or(to_bitmap(999),to_bitmap(1000),to_bitmap(888888)));" + qt_sql "select k,bitmap_count(id_bitmap),bitmap_to_string(id_bitmap) from ${tbName} order by k;" + sql "insert into ${tbName} values(3,bitmap_from_string('1,0,1,2,3,1,5,99,876,2445'));" + sql "insert into ${tbName} values(1,bitmap_or(bitmap_from_string('90,5,876'),to_bitmap(1000)));" + qt_sql "select k,bitmap_count(id_bitmap),bitmap_to_string(id_bitmap) from ${tbName} order by k;" + sql "DROP TABLE ${tbName};" + + def tbName2 = "test_uniq_table_bitmap2" + sql "DROP TABLE IF EXISTS ${tbName2}" + sql """ + CREATE TABLE IF NOT EXISTS ${tbName2} ( + k int, + id_bitmap bitmap + ) UNIQUE KEY(k) + DISTRIBUTED BY HASH(k) BUCKETS 1 + properties("replication_num" = "1", "enable_unique_key_merge_on_write" = "true"); + """ + sql "insert into ${tbName2} values(1,to_bitmap(1));" + sql "insert into ${tbName2} values(2,bitmap_or(to_bitmap(3),to_bitmap(1000)));" + sql "insert into ${tbName2} values(3,bitmap_or(to_bitmap(999),to_bitmap(1000),to_bitmap(888888)));" + qt_sql "select k,bitmap_count(id_bitmap),bitmap_to_string(id_bitmap) from ${tbName2} order by k;" + sql "insert into ${tbName2} values(3,bitmap_from_string('1,0,1,2,3,1,5,99,876,2445'));" + sql "insert into ${tbName2} values(1,bitmap_or(bitmap_from_string('90,5,876'),to_bitmap(1000)));" + qt_sql "select k,bitmap_count(id_bitmap),bitmap_to_string(id_bitmap) from ${tbName2} order by k;" + sql "DROP TABLE ${tbName2};" +} --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
