This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new e43d255815f [fix](array) fix add_array_null_value for inverted 
index_builder (#42899)
e43d255815f is described below

commit e43d255815f94e4d318530fe8001968ec7cbb9a3
Author: amory <wangqian...@selectdb.com>
AuthorDate: Wed Oct 30 19:16:59 2024 +0800

    [fix](array) fix add_array_null_value for inverted index_builder (#42899)
    
    ## Proposed changes
    Pick #42638 to doris branch 2.1
    Issue Number: close #xxx
    
    <!--Describe your changes.-->
---
 be/src/olap/task/index_builder.cpp                 |   7 +
 .../data/inverted_index_p0/arr_null_test_data.csv  | 200 +++++++++++++++++++++
 .../test_add_index_for_arr.groovy                  | 155 ++++++++++++++++
 3 files changed, 362 insertions(+)

diff --git a/be/src/olap/task/index_builder.cpp 
b/be/src/olap/task/index_builder.cpp
index b0b7eb46d19..490f0654e90 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -541,6 +541,13 @@ Status IndexBuilder::_add_nullable(const std::string& 
column_name,
             return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
                     "CLuceneError occured: {}", e.what());
         }
+        // we should refresh nullmap for array
+        for (int row_id = 0; row_id < num_rows; row_id++) {
+            if (null_map && null_map[row_id] == 1) {
+                RETURN_IF_ERROR(
+                        
_inverted_index_builders[index_writer_sign]->add_array_nulls(row_id));
+            }
+        }
         return Status::OK();
     }
 
diff --git a/regression-test/data/inverted_index_p0/arr_null_test_data.csv 
b/regression-test/data/inverted_index_p0/arr_null_test_data.csv
new file mode 100644
index 00000000000..2258751c931
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/arr_null_test_data.csv
@@ -0,0 +1,200 @@
+0|NULL|NULL
+1|[]|NULL
+2|NULL|[]
+3|[null, 'None', null, null, 'None', 'text5', null]|[]
+4|[null, 'None', 'None', null, 'text4', null, 'None']|NULL
+5|NULL|NULL
+6|NULL|[]
+7|NULL|NULL
+8|[null, 'None', 'None', null, 'text4', 'None', null, 'text7', null]|[]
+9|NULL|[]
+10|['None', 'text1', 'None', 'text3', 'text4', null, 'text6', null]|['text0', 
'None', null, 'text3', 'None']
+11|[null, 'None', null, 'None', null, 'text5', null, null, 'text8']|[null, 
null, null, 'text3', null, 'text5', null]
+12|[]|['text0', 'text1', null, 'text3', 'text4', 'None', 'text6', null]
+13|[]|[]
+14|NULL|[null, null, null, 'None', 'None']
+15|NULL|NULL
+16|NULL|NULL
+17|NULL|['text0', null, null, null, 'text4', 'text5', null, null]
+18|[null, 'None', 'None', 'None', 'None', null, 'text6', 'text7', null, 
null]|NULL
+19|[]|[]
+20|NULL|[]
+21|NULL|NULL
+22|NULL|NULL
+23|NULL|NULL
+24|NULL|[]
+25|NULL|[]
+26|NULL|['text0', 'text1', 'text2', 'text3', null, null, null]
+27|[]|NULL
+28|NULL|NULL
+29|NULL|[]
+30|[]|[null, null, null, 'text3', 'None', 'None', null, null, null, null]
+31|NULL|NULL
+32|NULL|NULL
+33|[]|[]
+34|['None', null, null, 'text3', 'None', null, null, null, 'text8']|[null, 
'None', 'None', 'None', 'None', 'None', null]
+35|[]|NULL
+36|NULL|[null, 'None', null, 'text3', 'text4', null, 'text6', null, 'None', 
'text9']
+37|['text0', null, 'text2', null, 'text4', 'text5']|NULL
+38|[]|[null, null, null, 'None', 'text4']
+39|[]|['text0', 'text1', null, 'None', 'None']
+40|NULL|['None', null, 'text2', 'text3', 'text4', 'None', 'None', null, null, 
null]
+41|[null, 'text1', 'text2', 'None', 'None', 'None', null, null, null]|[null, 
'None', null, null, 'text4', 'text5', null, 'text7', null, 'None']
+42|NULL|NULL
+43|NULL|NULL
+44|NULL|[]
+45|['None', null, null, 'None', null, 'None', 'None', 'None']|[]
+46|[null, null, 'text2', null, null, null, 'text6', null, 'text8']|NULL
+47|NULL|NULL
+48|NULL|[]
+49|NULL|NULL
+50|['text0', null, 'None', null, null, null, null, 'None']|[]
+51|NULL|NULL
+52|NULL|['text0', 'None', 'None', 'None', 'text4']
+53|[]|NULL
+54|['text0', 'None', 'text2', null, 'None', 'None', null, null]|NULL
+55|['None', null, null, null, null, 'None', null]|[]
+56|NULL|[]
+57|['None', 'text1', null, null, null, null, 'None', null]|[null, null, 
'None', 'text3', 'None', null, 'None', 'None']
+58|NULL|[]
+59|NULL|NULL
+60|[]|NULL
+61|[null, 'text1', null, 'None', 'text4', null]|[]
+62|NULL|NULL
+63|[null, null, 'text2', null, 'None', null, null, null, null]|[null, 'text1', 
'None', 'text3', null, null, 'text6']
+64|['text0', null, null, null, null, 'text5']|NULL
+65|NULL|['text0', 'None', 'None', 'None', null, 'None', null, 'None', 'text8', 
'None']
+66|NULL|NULL
+67|[]|NULL
+68|['None', null, null, 'text3', 'None', null, 'text6', 'None']|NULL
+69|NULL|['text0', null, null, 'None', 'text4', 'text5', null]
+70|['text0', null, null, null, 'text4']|[]
+71|NULL|[null, 'text1', null, null, 'text4', 'text5']
+72|NULL|[]
+73|NULL|[]
+74|[null, 'None', null, 'None', null, null]|[null, 'text1', 'None', null, 
'text4', null]
+75|NULL|NULL
+76|[]|[]
+77|[]|[]
+78|[null, null, 'None', 'None', null, null]|['None', 'text1', null, 'None', 
null, null, null, 'None', null]
+79|NULL|[]
+80|[]|[]
+81|[]|NULL
+82|NULL|NULL
+83|['None', 'None', null, 'None', 'None', 'text5', null, null, null, 
'text9']|['None', null, 'None', null, 'text4', 'None', 'None', 'text7', 'None']
+84|NULL|['None', 'None', 'text2', null, 'None', null, null, 'None']
+85|NULL|[null, null, 'text2', 'text3', null, 'text5']
+86|[]|['text0', null, null, 'text3', null, 'text5']
+87|NULL|['None', null, null, null, 'None']
+88|NULL|[]
+89|[null, 'None', 'None', null, 'text4', null, 'text6']|[]
+90|NULL|['None', null, 'None', null, 'None', 'text5', 'text6', 'text7', null]
+91|NULL|[null, 'None', null, 'text3', null, null, 'text6', 'text7', 'None']
+92|NULL|['None', 'None', 'None', null, null, 'text5', null, null]
+93|[]|[]
+94|[null, null, 'None', 'text3', 'text4', null]|[]
+95|NULL|[null, 'None', 'None', null, 'None', null, null, 'None']
+96|NULL|NULL
+97|[]|NULL
+98|[]|NULL
+99|['text0', 'text1', null, 'text3', 'text4', null, null, null, null, 
'None']|['None', 'text1', 'text2', 'text3', null, null, 'None', null]
+100|NULL|NULL
+101|NULL|[null, null, null, null, 'text4', null, 'None', 'None']
+102|[null, 'None', 'None', 'text3', 'None', null, null, null, null]|['None', 
null, null, 'text3', null]
+103|NULL|NULL
+104|NULL|[null, 'None', 'text2', null, 'None', null]
+105|[null, null, 'text2', 'text3', null]|NULL
+106|NULL|[]
+107|NULL|['text0', null, null, null, 'None']
+108|NULL|NULL
+109|NULL|[]
+110|[null, 'text1', 'None', null, 'None', null, 'text6', 'text7']|[]
+111|[null, null, 'None', 'text3', null, 'None']|[null, 'text1', null, null, 
'None', null]
+112|NULL|[null, 'None', null, null, 'text4', 'text5', 'text6', 'None']
+113|NULL|[]
+114|NULL|['None', null, null, null, null, 'text5']
+115|[null, null, 'None', 'text3', null]|['text0', 'text1', null, null, null]
+116|NULL|NULL
+117|NULL|[]
+118|['text0', null, null, null, 'None', 'text5', 'text6', 'None', 'text8']|NULL
+119|[null, 'text1', 'text2', 'None', null, null, 'text6', 'None']|NULL
+120|[null, 'None', null, 'None', null, null, null, null, null]|['text0', 
'text1', 'None', 'None', null, 'None', null, null]
+121|NULL|[null, null, 'text2', null, 'None']
+122|NULL|[]
+123|['None', 'text1', 'None', 'None', null, 'text5']|[null, 'None', null, 
null, 'None', 'None', null]
+124|[]|[null, 'None', null, null, null, null]
+125|NULL|NULL
+126|NULL|[]
+127|NULL|NULL
+128|NULL|[]
+129|['text0', 'None', 'None', 'text3', null, 'text5']|['None', 'text1', 
'text2', 'None', null, null, 'None', null, 'None']
+130|NULL|NULL
+131|['text0', 'text1', 'text2', 'None', null, null, 'None']|NULL
+132|NULL|NULL
+133|NULL|NULL
+134|[]|[]
+135|NULL|NULL
+136|NULL|[null, null, null, null, null, 'None', 'text6', null]
+137|[]|NULL
+138|NULL|[]
+139|NULL|['text0', null, 'text2', null, 'text4', null, null, 'None', 'text8', 
null]
+140|[]|[]
+141|[]|[]
+142|NULL|NULL
+143|NULL|NULL
+144|[]|['None', 'text1', null, null, 'text4', null, 'text6', null, null, 
'None']
+145|[]|NULL
+146|[]|['text0', 'None', null, null, null, 'None', null]
+147|NULL|NULL
+148|['None', 'None', null, 'None', 'text4', 'None']|['None', null, null, null, 
'None']
+149|NULL|['None', null, null, 'None', 'None', null, null, 'text7']
+150|['text0', 'text1', 'None', 'text3', 'text4', 'None']|[]
+151|[]|[]
+152|NULL|NULL
+153|NULL|NULL
+154|[null, 'text1', null, null, null, 'None', 'None', 'text7']|[null, null, 
null, null, 'text4', 'text5', 'text6']
+155|NULL|['None', 'text1', 'text2', 'text3', 'None', null, 'None', 'text7', 
'text8', 'None']
+156|[null, 'None', null, 'text3', null, 'text5', 'text6', null]|NULL
+157|NULL|['None', 'None', 'text2', null, null]
+158|[]|[]
+159|NULL|[]
+160|NULL|NULL
+161|[]|[]
+162|[null, 'None', null, 'None', 'text4']|NULL
+163|NULL|NULL
+164|['None', 'None', 'None', null, null, null, 'text6', null, null]|NULL
+165|[]|NULL
+166|NULL|[]
+167|NULL|NULL
+168|[]|[]
+169|[]|['text0', 'text1', null, null, 'text4', null]
+170|NULL|[null, null, 'None', 'None', 'None', 'text5', 'None', null, 'None', 
'None']
+171|NULL|NULL
+172|[]|NULL
+173|NULL|[]
+174|['None', 'text1', null, 'None', 'text4', null, null, 'None', 'text8']|[]
+175|NULL|NULL
+176|[]|NULL
+177|NULL|[]
+178|[]|[]
+179|NULL|NULL
+180|[null, null, null, null, 'text4', null, null, null, 'text8']|NULL
+181|[]|[null, 'None', null, null, 'text4', null, 'text6', 'None']
+182|NULL|['None', 'text1', null, 'None', 'None', null, 'None', 'text7']
+183|NULL|[null, null, 'None', 'text3', null, 'None', null, 'None', null, null]
+184|[null, 'None', null, 'None', null, 'text5', 'None', 'text7']|NULL
+185|[]|NULL
+186|NULL|[null, null, null, 'text3', 'text4']
+187|[null, null, 'None', 'text3', 'text4']|NULL
+188|[]|[]
+189|[null, null, 'text2', null, 'None', null, null, 'None', 'text8']|NULL
+190|['None', null, 'None', null, 'None', 'text5', 'None', null, 'text8', 
null]|[]
+191|NULL|NULL
+192|NULL|NULL
+193|['None', null, 'text2', 'None', null]|NULL
+194|NULL|[]
+195|[]|NULL
+196|NULL|NULL
+197|[null, 'text1', 'text2', 'None', 'text4', null, null]|[null, 'None', 
'None', 'None', null, null, null, 'text7', null, null]
+198|NULL|NULL
+199|[]|[null, 'None', 'None', null, null, null, null, null, null, 'text9']
diff --git 
a/regression-test/suites/inverted_index_p0/test_add_index_for_arr.groovy 
b/regression-test/suites/inverted_index_p0/test_add_index_for_arr.groovy
new file mode 100644
index 00000000000..6f3e772dd08
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_add_index_for_arr.groovy
@@ -0,0 +1,155 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_add_index_for_arr") {
+    // prepare test table
+    def timeout = 60000
+    def delta_time = 1000
+    def alter_res = "null"
+    def useTime = 0
+    // here some variable to control inverted index query
+    sql """ set enable_profile=true"""
+    sql """ set enable_pipeline_x_engine=true;"""
+    sql """ set enable_inverted_index_query=false"""
+    sql """ set enable_common_expr_pushdown=true """
+
+    def wait_for_latest_op_on_table_finish = { table_name, OpTimeout ->
+        for(int t = delta_time; t <= OpTimeout; t += delta_time){
+            alter_res = sql """SHOW ALTER TABLE COLUMN WHERE TableName = 
"${table_name}" ORDER BY CreateTime DESC LIMIT 1;"""
+            alter_res = alter_res.toString()
+            if(alter_res.contains("FINISHED")) {
+                sleep(3000) // wait change table state to normal
+                logger.info(table_name + " latest alter job finished, detail: 
" + alter_res)
+                break
+            }
+            useTime = t
+            sleep(delta_time)
+        }
+        assertTrue(useTime <= OpTimeout, "wait_for_latest_op_on_table_finish 
timeout")
+    }
+
+    def wait_for_build_index_on_partition_finish = { table_name, OpTimeout ->
+        for(int t = delta_time; t <= OpTimeout; t += delta_time){
+            alter_res = sql """SHOW BUILD INDEX WHERE TableName = 
"${table_name}";"""
+            def expected_finished_num = alter_res.size();
+            def finished_num = 0;
+            for (int i = 0; i < expected_finished_num; i++) {
+                logger.info(table_name + " build index job state: " + 
alter_res[i][7] + i)
+                if (alter_res[i][7] == "FINISHED") {
+                    ++finished_num;
+                }
+            }
+            if (finished_num == expected_finished_num) {
+                logger.info(table_name + " all build index jobs finished, 
detail: " + alter_res)
+                break
+            }
+            useTime = t
+            sleep(delta_time)
+        }
+        assertTrue(useTime <= OpTimeout, 
"wait_for_latest_build_index_on_partition_finish timeout")
+    }
+
+
+    sql "DROP TABLE IF EXISTS my_test_array"
+    // create table without any index
+    sql """
+            CREATE TABLE IF NOT EXISTS my_test_array (
+                `id` int(11) NULL,
+                `name` ARRAY<text> NULL,
+                `description` ARRAY<text> NULL,
+            )
+            DUPLICATE KEY(`id`)
+            DISTRIBUTED BY HASH(`id`) BUCKETS 1
+            properties("replication_num" = "1");
+    """
+
+    def var_result = sql "show variables"
+    logger.info("show variales result: " + var_result )
+
+    // stream_load with csv data
+     streamLoad {
+             table "my_test_array"
+
+             file "arr_null_test_data.csv" // import csv file
+             time 10000 // limit inflight 10s
+             set 'column_separator', '|'
+             set 'format', 'csv'
+
+             // if declared a check callback, the default check condition will 
ignore.
+             // So you must check all condition
+             check { result, exception, startTime, endTime ->
+                 if (exception != null) {
+                     throw exception
+                 }
+                 log.info("Stream load result: ${result}".toString())
+                 def json = parseJson(result)
+                 assertEquals(200, json.NumberTotalRows)
+                 assertEquals(200, json.NumberLoadedRows)
+                 assertTrue(json.LoadBytes > 0)
+             }
+     }
+
+    // query without inverted index
+    // query rows with array_contains
+    def sql_query_name1 = sql "select id, name[1], description[1] from 
my_test_array where array_contains(name,'text7')"
+    // query rows with !array_contains
+    def sql_query_name2 = sql "select id, name[1], description[1] from 
my_test_array where !array_contains(name,'text7')"
+
+    // add index for name
+    sql "ALTER TABLE my_test_array ADD INDEX name_idx (name) USING INVERTED;"
+    wait_for_latest_op_on_table_finish("my_test_array", timeout)
+    // build index for name that name data can using inverted index
+    if (!isCloudMode()) {
+        sql "BUILD INDEX name_idx ON my_test_array"
+        wait_for_build_index_on_partition_finish("my_test_array", timeout)
+    }
+
+    // query with inverted index
+    sql "set enable_inverted_index_query=true"
+    // query rows with array_contains
+    def sql_query_name1_inverted = sql "select id, name[1], description[1] 
from my_test_array where array_contains(name,'text7')"
+    // query rows with !array_contains
+    def sql_query_name2_inverted = sql "select id, name[1], description[1] 
from my_test_array where !array_contains(name,'text7')"
+
+    // check result for query without inverted index and with inverted index
+    def size1 = sql_query_name1.size();
+    log.info("sql_query_name1 query without inverted index rows size: 
${size1}")
+    for (int i = 0; i < sql_query_name1.size(); i++) {
+        assertEquals(sql_query_name1[i][0], sql_query_name1_inverted[i][0])
+        assertEquals(sql_query_name1[i][1], sql_query_name1_inverted[i][1])
+        assertEquals(sql_query_name1[i][2], sql_query_name1_inverted[i][2])
+    }
+    def size2 = sql_query_name2.size();
+    log.info("sql_query_name2 query without inverted index rows size: 
${size2}")
+    for (int i = 0; i < sql_query_name2.size(); i++) {
+        assertEquals(sql_query_name2[i][0], sql_query_name2_inverted[i][0])
+        assertEquals(sql_query_name2[i][1], sql_query_name2_inverted[i][1])
+        assertEquals(sql_query_name2[i][2], sql_query_name2_inverted[i][2])
+    }
+
+    // drop index
+    // add index on column description
+    sql "drop index name_idx on my_test_array"
+    wait_for_latest_op_on_table_finish("my_test_array", timeout)
+
+    def sql_query_name1_without_inverted = sql "select id, name[1], 
description[1] from my_test_array where array_contains(name,'text7')"
+    def sql_query_name2_without_inverted = sql "select id, name[1], 
description[1] from my_test_array where !array_contains(name,'text7')"
+
+    assertEquals(sql_query_name1.size(), 
sql_query_name1_without_inverted.size())
+    assertEquals(sql_query_name2.size(), 
sql_query_name2_without_inverted.size())
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to