This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new bb179b77f7 [Feature-WIP](inverted index) support array type for 
inverted index reader (#16355)
bb179b77f7 is described below

commit bb179b77f75d2b0471eb7b3b75ad783d21596194
Author: YueW <45946325+tany...@users.noreply.github.com>
AuthorDate: Thu Feb 2 16:14:14 2023 +0800

    [Feature-WIP](inverted index) support array type for inverted index reader 
(#16355)
---
 be/src/vec/exec/scan/vscan_node.cpp                | 20 ++++++-
 .../main/java/org/apache/doris/catalog/Type.java   | 10 ++++
 .../java/org/apache/doris/analysis/IndexDef.java   |  4 ++
 .../org/apache/doris/analysis/MatchPredicate.java  | 69 +++++++++++----------
 .../data/inverted_index_p0/test_array_index.out    | 58 ++++++++++++++++++
 .../inverted_index_p0/test_array_index.groovy      | 70 ++++++++++++++++++++++
 6 files changed, 197 insertions(+), 34 deletions(-)

diff --git a/be/src/vec/exec/scan/vscan_node.cpp 
b/be/src/vec/exec/scan/vscan_node.cpp
index 198e7ab0c7..d0fc12f37a 100644
--- a/be/src/vec/exec/scan/vscan_node.cpp
+++ b/be/src/vec/exec/scan/vscan_node.cpp
@@ -49,6 +49,17 @@ static bool ignore_cast(SlotDescriptor* slot, VExpr* expr) {
     if (slot->type().is_string_type() && expr->type().is_string_type()) {
         return true;
     }
+    if (slot->type().is_array_type()) {
+        if (slot->type().children[0].type == expr->type().type) {
+            return true;
+        }
+        if (slot->type().children[0].is_date_type() && 
expr->type().is_date_type()) {
+            return true;
+        }
+        if (slot->type().children[0].is_string_type() && 
expr->type().is_string_type()) {
+            return true;
+        }
+    }
     return false;
 }
 
@@ -391,7 +402,14 @@ Status VScanNode::_normalize_conjuncts() {
     std::vector<SlotDescriptor*> slots = _output_tuple_desc->slots();
 
     for (int slot_idx = 0; slot_idx < slots.size(); ++slot_idx) {
-        switch (slots[slot_idx]->type().type) {
+        auto type = slots[slot_idx]->type().type;
+        if (slots[slot_idx]->type().type == TYPE_ARRAY) {
+            type = slots[slot_idx]->type().children[0].type;
+            if (type == TYPE_ARRAY) {
+                continue;
+            }
+        }
+        switch (type) {
 #define M(NAME)                                                                
              \
     case TYPE_##NAME: {                                                        
              \
         ColumnValueRange<TYPE_##NAME> range(slots[slot_idx]->col_name(),       
              \
diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java 
b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
index e6c2e3a4cd..ef3ec7c834 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
@@ -109,6 +109,7 @@ public abstract class Type {
 
     private static final Logger LOG = LogManager.getLogger(Type.class);
     private static final ArrayList<ScalarType> integerTypes;
+    private static final ArrayList<ScalarType> stringTypes;
     private static final ArrayList<ScalarType> numericTypes;
     private static final ArrayList<ScalarType> numericDateTimeTypes;
     private static final ArrayList<ScalarType> supportedTypes;
@@ -123,6 +124,11 @@ public abstract class Type {
         integerTypes.add(BIGINT);
         integerTypes.add(LARGEINT);
 
+        stringTypes = Lists.newArrayList();
+        stringTypes.add(CHAR);
+        stringTypes.add(VARCHAR);
+        stringTypes.add(STRING);
+
         numericTypes = Lists.newArrayList();
         numericTypes.addAll(integerTypes);
         numericTypes.add(FLOAT);
@@ -207,6 +213,10 @@ public abstract class Type {
         return integerTypes;
     }
 
+    public static ArrayList<ScalarType> getStringTypes() {
+        return stringTypes;
+    }
+
     public static ArrayList<ScalarType> getNumericTypes() {
         return numericTypes;
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
index ed03dbd84e..d1c21b5d37 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.analysis;
 
+import org.apache.doris.catalog.ArrayType;
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.KeysType;
 import org.apache.doris.catalog.PrimitiveType;
@@ -176,6 +177,9 @@ public class IndexDef {
                 || indexType == IndexType.NGRAM_BF) {
             String indexColName = column.getName();
             PrimitiveType colType = column.getDataType();
+            if (indexType == IndexType.INVERTED && colType.isArrayType()) {
+                colType = ((ArrayType) 
column.getType()).getItemType().getPrimitiveType();
+            }
             if (!(colType.isDateType() || colType.isDecimalV2Type() || 
colType.isDecimalV3Type()
                     || colType.isFixedPointType() || colType.isStringType() || 
colType == PrimitiveType.BOOLEAN)) {
                 throw new AnalysisException(colType + " is not supported in " 
+ indexType.toString() + " index. "
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java
index ad6a6968a7..bec9ed403c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java
@@ -107,38 +107,41 @@ public class MatchPredicate extends Predicate {
                     Lists.<Type>newArrayList(new ArrayType(t), t),
                     Type.BOOLEAN));
         }
-        
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
-                Operator.MATCH_ANY.getName(),
-                symbolNotUsed,
-                Lists.<Type>newArrayList(Type.VARCHAR, Type.VARCHAR),
-                Type.BOOLEAN));
-        
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
-                Operator.MATCH_ANY.getName(),
-                symbolNotUsed,
-                Lists.<Type>newArrayList(new ArrayType(Type.VARCHAR), 
Type.VARCHAR),
-                Type.BOOLEAN));
-
-        
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
-                Operator.MATCH_ALL.getName(),
-                symbolNotUsed,
-                Lists.<Type>newArrayList(Type.VARCHAR, Type.VARCHAR),
-                Type.BOOLEAN));
-        
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
-                Operator.MATCH_ALL.getName(),
-                symbolNotUsed,
-                Lists.<Type>newArrayList(new ArrayType(Type.VARCHAR), 
Type.VARCHAR),
-                Type.BOOLEAN));
-
-        
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
-                Operator.MATCH_PHRASE.getName(),
-                symbolNotUsed,
-                Lists.<Type>newArrayList(Type.VARCHAR, Type.VARCHAR),
-                Type.BOOLEAN));
-        
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
-                Operator.MATCH_PHRASE.getName(),
-                symbolNotUsed,
-                Lists.<Type>newArrayList(new ArrayType(Type.VARCHAR), 
Type.VARCHAR),
-                Type.BOOLEAN));
+
+        for (Type t : Type.getStringTypes()) {
+            
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
+                    Operator.MATCH_ANY.getName(),
+                    symbolNotUsed,
+                    Lists.<Type>newArrayList(t, t),
+                    Type.BOOLEAN));
+            
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
+                    Operator.MATCH_ANY.getName(),
+                    symbolNotUsed,
+                    Lists.<Type>newArrayList(new ArrayType(t), t),
+                    Type.BOOLEAN));
+
+            
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
+                    Operator.MATCH_ALL.getName(),
+                    symbolNotUsed,
+                    Lists.<Type>newArrayList(t, t),
+                    Type.BOOLEAN));
+            
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
+                    Operator.MATCH_ALL.getName(),
+                    symbolNotUsed,
+                    Lists.<Type>newArrayList(new ArrayType(t), t),
+                    Type.BOOLEAN));
+
+            
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
+                    Operator.MATCH_PHRASE.getName(),
+                    symbolNotUsed,
+                    Lists.<Type>newArrayList(t, t),
+                    Type.BOOLEAN));
+            
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
+                    Operator.MATCH_PHRASE.getName(),
+                    symbolNotUsed,
+                    Lists.<Type>newArrayList(new ArrayType(t), t),
+                    Type.BOOLEAN));
+        }
     }
 
     private final Operator op;
@@ -219,7 +222,7 @@ public class MatchPredicate extends Predicate {
                 collectChildReturnTypes(), 
Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
         if (fn == null) {
             throw new AnalysisException(
-                    "no function found for " + op.toString() + " " + toSql());
+                    "no function found for " + op.toString() + "," + toSql());
         }
         Expr e1 = getChild(0);
         Expr e2 = getChild(1);
diff --git a/regression-test/data/inverted_index_p0/test_array_index.out 
b/regression-test/data/inverted_index_p0/test_array_index.out
new file mode 100644
index 0000000000..8a858f5611
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_array_index.out
@@ -0,0 +1,58 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+1      [10, 20, 30]    ['i', 'love', 'china']
+
+-- !sql --
+1      [10, 20, 30]    ['i', 'love', 'china']
+2      [20, 30, 40]    ['i', 'love', 'north korea']
+
+-- !sql --
+2      [20, 30, 40]    ['i', 'love', 'north korea']
+
+-- !sql --
+2      [20, 30, 40]    ['i', 'love', 'north korea']
+
+-- !sql --
+2      [20, 30, 40]    ['i', 'love', 'north korea']
+3      [30, 40, 50]    \N
+4      [40, 50, 60]    \N
+
+-- !sql --
+1      [10, 20, 30]    ['i', 'love', 'china']
+2      [20, 30, 40]    ['i', 'love', 'north korea']
+3      [30, 40, 50]    \N
+4      [40, 50, 60]    \N
+
+-- !sql --
+3      [30, 40, 50]    \N
+4      [40, 50, 60]    \N
+
+-- !sql --
+1      [10, 20, 30]    ['i', 'love', 'china']
+2      [20, 30, 40]    ['i', 'love', 'north korea']
+3      [30, 40, 50]    \N
+
+-- !sql --
+1      [10, 20, 30]    ['i', 'love', 'china']
+
+-- !sql --
+1      [10, 20, 30]    ['i', 'love', 'china']
+2      [20, 30, 40]    ['i', 'love', 'north korea']
+
+-- !sql --
+1      [10, 20, 30]    ['i', 'love', 'china']
+2      [20, 30, 40]    ['i', 'love', 'north korea']
+3      [30, 40, 50]    \N
+
+-- !sql --
+2      [20, 30, 40]    ['i', 'love', 'north korea']
+3      [30, 40, 50]    \N
+4      [40, 50, 60]    \N
+
+-- !sql --
+3      [30, 40, 50]    \N
+4      [40, 50, 60]    \N
+
+-- !sql --
+4      [40, 50, 60]    \N
+
diff --git a/regression-test/suites/inverted_index_p0/test_array_index.groovy 
b/regression-test/suites/inverted_index_p0/test_array_index.groovy
new file mode 100644
index 0000000000..d240dbdeb7
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_array_index.groovy
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_array_index"){
+    // prepare test table
+
+
+    def timeout = 60000
+    def delta_time = 1000
+    def alter_res = "null"
+    def useTime = 0
+
+    def indexTblName = "array_test"
+
+    sql "DROP TABLE IF EXISTS ${indexTblName}"
+    // create 1 replica table
+    sql """
+       CREATE TABLE IF NOT EXISTS ${indexTblName}(
+               `id`int(11)NULL,
+               `int_array` array<int(20)> NULL,
+               `c_array` array<varchar(20)> NULL,
+               INDEX c_array_idx(`c_array`) USING INVERTED 
PROPERTIES("parser"="english") COMMENT 'c_array index',
+               INDEX int_array_idx(`int_array`) USING INVERTED COMMENT 
'int_array index'
+       ) ENGINE=OLAP
+       DUPLICATE KEY(`id`)
+       COMMENT 'OLAP'
+       DISTRIBUTED BY HASH(`id`) BUCKETS 1
+       PROPERTIES(
+               "replication_allocation" = "tag.location.default: 1",
+               "persistent"="false"
+       );
+    """
+    
+    // set enable_vectorized_engine=true
+    sql """ SET enable_vectorized_engine=true; """
+    def var_result = sql "show variables"
+    logger.info("show variales result: " + var_result )
+
+    sql "INSERT INTO $indexTblName VALUES (1, [10,20,30], 
['i','love','china']), (2, [20,30,40], ['i','love','north korea']), (3, 
[30,40,50], NULL);"
+    sql "INSERT INTO $indexTblName VALUES (4, [40,50,60], NULL);"
+    qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'china';"
+    qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'love';"
+    qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'north';"
+    qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'korea';"
+    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_ge 40;"
+    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_le 40;"
+    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_gt 40;"
+    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_lt 40;"
+    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 10;"
+    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 20;"
+    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 30;"
+    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 40;"
+    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 50;"
+    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 60;"
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to