This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 514e79c1d4e [fix](ngram bloomfilter) fix narrow conversion for ngram 
bf_size #43480 (#43655)
514e79c1d4e is described below

commit 514e79c1d4e0abc2e418624dc63c50506de8de83
Author: airborne12 <jiang...@selectdb.com>
AuthorDate: Tue Nov 12 12:01:14 2024 +0800

    [fix](ngram bloomfilter) fix narrow conversion for ngram bf_size #43480 
(#43655)
    
    cherry pick from #43480
---
 be/src/olap/rowset/segment_v2/segment_writer.cpp   | 15 ++++++-
 .../java/org/apache/doris/analysis/IndexDef.java   |  4 +-
 .../index_p0/test_ngram_bloomfilter_index.groovy   | 47 ++++++++++++++++++++++
 3 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 6aab6c464fd..01e818b454d 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -164,8 +164,19 @@ Status SegmentWriter::init(const std::vector<uint32_t>& 
col_ids, bool has_key,
         if (tablet_index) {
             opts.need_bloom_filter = true;
             opts.is_ngram_bf_index = true;
-            opts.gram_size = tablet_index->get_gram_size();
-            opts.gram_bf_size = tablet_index->get_gram_bf_size();
+            //narrow convert from int32_t to uint8_t and uint16_t which is 
dangerous
+            auto gram_size = tablet_index->get_gram_size();
+            auto gram_bf_size = tablet_index->get_gram_bf_size();
+            if (gram_size > 256 || gram_size < 1) {
+                return Status::NotSupported("Do not support ngram bloom filter 
for ngram_size: ",
+                                            gram_size);
+            }
+            if (gram_bf_size > 65535 || gram_bf_size < 64) {
+                return Status::NotSupported("Do not support ngram bloom filter 
for bf_size: ",
+                                            gram_bf_size);
+            }
+            opts.gram_size = gram_size;
+            opts.gram_bf_size = gram_bf_size;
         }
 
         opts.need_bitmap_index = column.has_bitmap_index();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
index 19d7681ffed..406ee0cb247 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
@@ -252,8 +252,8 @@ public class IndexDef {
                     if (ngramSize > 256 || ngramSize < 1) {
                         throw new AnalysisException("gram_size should be 
integer and less than 256");
                     }
-                    if (bfSize > 65536 || bfSize < 64) {
-                        throw new AnalysisException("bf_size should be integer 
and between 64 and 65536");
+                    if (bfSize > 65535 || bfSize < 64) {
+                        throw new AnalysisException("bf_size should be integer 
and between 64 and 65535");
                     }
                 } catch (NumberFormatException e) {
                     throw new AnalysisException("invalid ngram properties:" + 
e.getMessage(), e);
diff --git 
a/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy 
b/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy
index c56eed967a0..e2ab9b9c117 100644
--- a/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy
+++ b/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy
@@ -59,4 +59,51 @@ suite("test_ngram_bloomfilter_index") {
     qt_select_eq_3 "SELECT * FROM ${tableName} WHERE http_url = 
'/%/7212503657802320699%' ORDER BY key_id"
     qt_select_in_3 "SELECT * FROM ${tableName} WHERE http_url IN 
('/%/7212503657802320699%') ORDER BY key_id"
     qt_select_like_3 "SELECT * FROM ${tableName} WHERE http_url like 
'/%/7212503657802320699%' ORDER BY key_id"
+
+    //case for bf_size 65536
+    def tableName2 = 'test_ngram_bloomfilter_index2'
+    sql "DROP TABLE IF EXISTS ${tableName2}"
+    test {
+        sql """
+        CREATE TABLE IF NOT EXISTS ${tableName2} (
+            `key_id` bigint(20) NULL COMMENT '',
+            `category` varchar(200) NULL COMMENT '',
+            `https_url` varchar(300) NULL COMMENT '',
+            `hostname` varchar(300) NULL,
+            `http_url` text NULL COMMENT '',
+            `url_path` varchar(2000) NULL COMMENT '',
+            `cnt` bigint(20) NULL COMMENT '',
+            `host_flag` boolean NULL COMMENT '',
+            INDEX idx_ngrambf (`http_url`) USING NGRAM_BF 
PROPERTIES("gram_size" = "2", "bf_size" = "65536")
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`key_id`, `category`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`key_id`) BUCKETS 3
+        PROPERTIES("replication_num" = "1");
+        """
+        exception "bf_size should be integer and between 64 and 65535"
+    }
+
+    def tableName3 = 'test_ngram_bloomfilter_index3'
+    sql "DROP TABLE IF EXISTS ${tableName3}"
+    sql """
+        CREATE TABLE IF NOT EXISTS ${tableName3} (
+            `key_id` bigint(20) NULL COMMENT '',
+            `category` varchar(200) NULL COMMENT '',
+            `https_url` varchar(300) NULL COMMENT '',
+            `hostname` varchar(300) NULL,
+            `http_url` text NULL COMMENT '',
+            `url_path` varchar(2000) NULL COMMENT '',
+            `cnt` bigint(20) NULL COMMENT '',
+            `host_flag` boolean NULL COMMENT ''
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`key_id`, `category`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`key_id`) BUCKETS 3
+        PROPERTIES("replication_num" = "1");
+        """
+    test {
+        sql """ALTER TABLE  ${tableName3} ADD INDEX idx_http_url(http_url) 
USING NGRAM_BF PROPERTIES("gram_size"="3", "bf_size"="65536") COMMENT 'http_url 
ngram_bf index'"""
+        exception "bf_size should be integer and between 64 and 65535"
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to