This is an automated email from the ASF dual-hosted git repository. airborne pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 514e79c1d4e [fix](ngram bloomfilter) fix narrow conversion for ngram bf_size #43480 (#43655) 514e79c1d4e is described below commit 514e79c1d4e0abc2e418624dc63c50506de8de83 Author: airborne12 <jiang...@selectdb.com> AuthorDate: Tue Nov 12 12:01:14 2024 +0800 [fix](ngram bloomfilter) fix narrow conversion for ngram bf_size #43480 (#43655) cherry pick from #43480 --- be/src/olap/rowset/segment_v2/segment_writer.cpp | 15 ++++++- .../java/org/apache/doris/analysis/IndexDef.java | 4 +- .../index_p0/test_ngram_bloomfilter_index.groovy | 47 ++++++++++++++++++++++ 3 files changed, 62 insertions(+), 4 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 6aab6c464fd..01e818b454d 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -164,8 +164,19 @@ Status SegmentWriter::init(const std::vector<uint32_t>& col_ids, bool has_key, if (tablet_index) { opts.need_bloom_filter = true; opts.is_ngram_bf_index = true; - opts.gram_size = tablet_index->get_gram_size(); - opts.gram_bf_size = tablet_index->get_gram_bf_size(); + //narrow convert from int32_t to uint8_t and uint16_t which is dangerous + auto gram_size = tablet_index->get_gram_size(); + auto gram_bf_size = tablet_index->get_gram_bf_size(); + if (gram_size > 256 || gram_size < 1) { + return Status::NotSupported("Do not support ngram bloom filter for ngram_size: ", + gram_size); + } + if (gram_bf_size > 65535 || gram_bf_size < 64) { + return Status::NotSupported("Do not support ngram bloom filter for bf_size: ", + gram_bf_size); + } + opts.gram_size = gram_size; + opts.gram_bf_size = gram_bf_size; } opts.need_bitmap_index = column.has_bitmap_index(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java index 19d7681ffed..406ee0cb247 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java @@ -252,8 +252,8 @@ public class IndexDef { if (ngramSize > 256 || ngramSize < 1) { throw new AnalysisException("gram_size should be integer and less than 256"); } - if (bfSize > 65536 || bfSize < 64) { - throw new AnalysisException("bf_size should be integer and between 64 and 65536"); + if (bfSize > 65535 || bfSize < 64) { + throw new AnalysisException("bf_size should be integer and between 64 and 65535"); } } catch (NumberFormatException e) { throw new AnalysisException("invalid ngram properties:" + e.getMessage(), e); diff --git a/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy b/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy index c56eed967a0..e2ab9b9c117 100644 --- a/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy +++ b/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy @@ -59,4 +59,51 @@ suite("test_ngram_bloomfilter_index") { qt_select_eq_3 "SELECT * FROM ${tableName} WHERE http_url = '/%/7212503657802320699%' ORDER BY key_id" qt_select_in_3 "SELECT * FROM ${tableName} WHERE http_url IN ('/%/7212503657802320699%') ORDER BY key_id" qt_select_like_3 "SELECT * FROM ${tableName} WHERE http_url like '/%/7212503657802320699%' ORDER BY key_id" + + //case for bf_size 65536 + def tableName2 = 'test_ngram_bloomfilter_index2' + sql "DROP TABLE IF EXISTS ${tableName2}" + test { + sql """ + CREATE TABLE IF NOT EXISTS ${tableName2} ( + `key_id` bigint(20) NULL COMMENT '', + `category` varchar(200) NULL COMMENT '', + `https_url` varchar(300) NULL COMMENT '', + `hostname` varchar(300) NULL, + `http_url` text NULL COMMENT '', + `url_path` varchar(2000) NULL COMMENT '', + `cnt` bigint(20) NULL COMMENT '', + `host_flag` boolean NULL COMMENT '', + INDEX idx_ngrambf (`http_url`) USING NGRAM_BF PROPERTIES("gram_size" = "2", "bf_size" = "65536") + ) ENGINE=OLAP + DUPLICATE KEY(`key_id`, `category`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`key_id`) BUCKETS 3 + PROPERTIES("replication_num" = "1"); + """ + exception "bf_size should be integer and between 64 and 65535" + } + + def tableName3 = 'test_ngram_bloomfilter_index3' + sql "DROP TABLE IF EXISTS ${tableName3}" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName3} ( + `key_id` bigint(20) NULL COMMENT '', + `category` varchar(200) NULL COMMENT '', + `https_url` varchar(300) NULL COMMENT '', + `hostname` varchar(300) NULL, + `http_url` text NULL COMMENT '', + `url_path` varchar(2000) NULL COMMENT '', + `cnt` bigint(20) NULL COMMENT '', + `host_flag` boolean NULL COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`key_id`, `category`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`key_id`) BUCKETS 3 + PROPERTIES("replication_num" = "1"); + """ + test { + sql """ALTER TABLE ${tableName3} ADD INDEX idx_http_url(http_url) USING NGRAM_BF PROPERTIES("gram_size"="3", "bf_size"="65536") COMMENT 'http_url ngram_bf index'""" + exception "bf_size should be integer and between 64 and 65535" + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org