This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new bb179b77f7 [Feature-WIP](inverted index) support array type for inverted index reader (#16355) bb179b77f7 is described below commit bb179b77f75d2b0471eb7b3b75ad783d21596194 Author: YueW <45946325+tany...@users.noreply.github.com> AuthorDate: Thu Feb 2 16:14:14 2023 +0800 [Feature-WIP](inverted index) support array type for inverted index reader (#16355) --- be/src/vec/exec/scan/vscan_node.cpp | 20 ++++++- .../main/java/org/apache/doris/catalog/Type.java | 10 ++++ .../java/org/apache/doris/analysis/IndexDef.java | 4 ++ .../org/apache/doris/analysis/MatchPredicate.java | 69 +++++++++++---------- .../data/inverted_index_p0/test_array_index.out | 58 ++++++++++++++++++ .../inverted_index_p0/test_array_index.groovy | 70 ++++++++++++++++++++++ 6 files changed, 197 insertions(+), 34 deletions(-) diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp index 198e7ab0c7..d0fc12f37a 100644 --- a/be/src/vec/exec/scan/vscan_node.cpp +++ b/be/src/vec/exec/scan/vscan_node.cpp @@ -49,6 +49,17 @@ static bool ignore_cast(SlotDescriptor* slot, VExpr* expr) { if (slot->type().is_string_type() && expr->type().is_string_type()) { return true; } + if (slot->type().is_array_type()) { + if (slot->type().children[0].type == expr->type().type) { + return true; + } + if (slot->type().children[0].is_date_type() && expr->type().is_date_type()) { + return true; + } + if (slot->type().children[0].is_string_type() && expr->type().is_string_type()) { + return true; + } + } return false; } @@ -391,7 +402,14 @@ Status VScanNode::_normalize_conjuncts() { std::vector<SlotDescriptor*> slots = _output_tuple_desc->slots(); for (int slot_idx = 0; slot_idx < slots.size(); ++slot_idx) { - switch (slots[slot_idx]->type().type) { + auto type = slots[slot_idx]->type().type; + if (slots[slot_idx]->type().type == TYPE_ARRAY) { + type = slots[slot_idx]->type().children[0].type; + if (type == TYPE_ARRAY) { + continue; + } + } + switch (type) { #define M(NAME) \ case TYPE_##NAME: { \ ColumnValueRange<TYPE_##NAME> range(slots[slot_idx]->col_name(), \ diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java index e6c2e3a4cd..ef3ec7c834 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java @@ -109,6 +109,7 @@ public abstract class Type { private static final Logger LOG = LogManager.getLogger(Type.class); private static final ArrayList<ScalarType> integerTypes; + private static final ArrayList<ScalarType> stringTypes; private static final ArrayList<ScalarType> numericTypes; private static final ArrayList<ScalarType> numericDateTimeTypes; private static final ArrayList<ScalarType> supportedTypes; @@ -123,6 +124,11 @@ public abstract class Type { integerTypes.add(BIGINT); integerTypes.add(LARGEINT); + stringTypes = Lists.newArrayList(); + stringTypes.add(CHAR); + stringTypes.add(VARCHAR); + stringTypes.add(STRING); + numericTypes = Lists.newArrayList(); numericTypes.addAll(integerTypes); numericTypes.add(FLOAT); @@ -207,6 +213,10 @@ public abstract class Type { return integerTypes; } + public static ArrayList<ScalarType> getStringTypes() { + return stringTypes; + } + public static ArrayList<ScalarType> getNumericTypes() { return numericTypes; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java index ed03dbd84e..d1c21b5d37 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java @@ -17,6 +17,7 @@ package org.apache.doris.analysis; +import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.PrimitiveType; @@ -176,6 +177,9 @@ public class IndexDef { || indexType == IndexType.NGRAM_BF) { String indexColName = column.getName(); PrimitiveType colType = column.getDataType(); + if (indexType == IndexType.INVERTED && colType.isArrayType()) { + colType = ((ArrayType) column.getType()).getItemType().getPrimitiveType(); + } if (!(colType.isDateType() || colType.isDecimalV2Type() || colType.isDecimalV3Type() || colType.isFixedPointType() || colType.isStringType() || colType == PrimitiveType.BOOLEAN)) { throw new AnalysisException(colType + " is not supported in " + indexType.toString() + " index. " diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java index ad6a6968a7..bec9ed403c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java @@ -107,38 +107,41 @@ public class MatchPredicate extends Predicate { Lists.<Type>newArrayList(new ArrayType(t), t), Type.BOOLEAN)); } - functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( - Operator.MATCH_ANY.getName(), - symbolNotUsed, - Lists.<Type>newArrayList(Type.VARCHAR, Type.VARCHAR), - Type.BOOLEAN)); - functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( - Operator.MATCH_ANY.getName(), - symbolNotUsed, - Lists.<Type>newArrayList(new ArrayType(Type.VARCHAR), Type.VARCHAR), - Type.BOOLEAN)); - - functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( - Operator.MATCH_ALL.getName(), - symbolNotUsed, - Lists.<Type>newArrayList(Type.VARCHAR, Type.VARCHAR), - Type.BOOLEAN)); - functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( - Operator.MATCH_ALL.getName(), - symbolNotUsed, - Lists.<Type>newArrayList(new ArrayType(Type.VARCHAR), Type.VARCHAR), - Type.BOOLEAN)); - - functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( - Operator.MATCH_PHRASE.getName(), - symbolNotUsed, - Lists.<Type>newArrayList(Type.VARCHAR, Type.VARCHAR), - Type.BOOLEAN)); - functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( - Operator.MATCH_PHRASE.getName(), - symbolNotUsed, - Lists.<Type>newArrayList(new ArrayType(Type.VARCHAR), Type.VARCHAR), - Type.BOOLEAN)); + + for (Type t : Type.getStringTypes()) { + functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( + Operator.MATCH_ANY.getName(), + symbolNotUsed, + Lists.<Type>newArrayList(t, t), + Type.BOOLEAN)); + functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( + Operator.MATCH_ANY.getName(), + symbolNotUsed, + Lists.<Type>newArrayList(new ArrayType(t), t), + Type.BOOLEAN)); + + functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( + Operator.MATCH_ALL.getName(), + symbolNotUsed, + Lists.<Type>newArrayList(t, t), + Type.BOOLEAN)); + functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( + Operator.MATCH_ALL.getName(), + symbolNotUsed, + Lists.<Type>newArrayList(new ArrayType(t), t), + Type.BOOLEAN)); + + functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( + Operator.MATCH_PHRASE.getName(), + symbolNotUsed, + Lists.<Type>newArrayList(t, t), + Type.BOOLEAN)); + functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( + Operator.MATCH_PHRASE.getName(), + symbolNotUsed, + Lists.<Type>newArrayList(new ArrayType(t), t), + Type.BOOLEAN)); + } } private final Operator op; @@ -219,7 +222,7 @@ public class MatchPredicate extends Predicate { collectChildReturnTypes(), Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF); if (fn == null) { throw new AnalysisException( - "no function found for " + op.toString() + " " + toSql()); + "no function found for " + op.toString() + "," + toSql()); } Expr e1 = getChild(0); Expr e2 = getChild(1); diff --git a/regression-test/data/inverted_index_p0/test_array_index.out b/regression-test/data/inverted_index_p0/test_array_index.out new file mode 100644 index 0000000000..8a858f5611 --- /dev/null +++ b/regression-test/data/inverted_index_p0/test_array_index.out @@ -0,0 +1,58 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 [10, 20, 30] ['i', 'love', 'china'] + +-- !sql -- +1 [10, 20, 30] ['i', 'love', 'china'] +2 [20, 30, 40] ['i', 'love', 'north korea'] + +-- !sql -- +2 [20, 30, 40] ['i', 'love', 'north korea'] + +-- !sql -- +2 [20, 30, 40] ['i', 'love', 'north korea'] + +-- !sql -- +2 [20, 30, 40] ['i', 'love', 'north korea'] +3 [30, 40, 50] \N +4 [40, 50, 60] \N + +-- !sql -- +1 [10, 20, 30] ['i', 'love', 'china'] +2 [20, 30, 40] ['i', 'love', 'north korea'] +3 [30, 40, 50] \N +4 [40, 50, 60] \N + +-- !sql -- +3 [30, 40, 50] \N +4 [40, 50, 60] \N + +-- !sql -- +1 [10, 20, 30] ['i', 'love', 'china'] +2 [20, 30, 40] ['i', 'love', 'north korea'] +3 [30, 40, 50] \N + +-- !sql -- +1 [10, 20, 30] ['i', 'love', 'china'] + +-- !sql -- +1 [10, 20, 30] ['i', 'love', 'china'] +2 [20, 30, 40] ['i', 'love', 'north korea'] + +-- !sql -- +1 [10, 20, 30] ['i', 'love', 'china'] +2 [20, 30, 40] ['i', 'love', 'north korea'] +3 [30, 40, 50] \N + +-- !sql -- +2 [20, 30, 40] ['i', 'love', 'north korea'] +3 [30, 40, 50] \N +4 [40, 50, 60] \N + +-- !sql -- +3 [30, 40, 50] \N +4 [40, 50, 60] \N + +-- !sql -- +4 [40, 50, 60] \N + diff --git a/regression-test/suites/inverted_index_p0/test_array_index.groovy b/regression-test/suites/inverted_index_p0/test_array_index.groovy new file mode 100644 index 0000000000..d240dbdeb7 --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_array_index.groovy @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +suite("test_array_index"){ + // prepare test table + + + def timeout = 60000 + def delta_time = 1000 + def alter_res = "null" + def useTime = 0 + + def indexTblName = "array_test" + + sql "DROP TABLE IF EXISTS ${indexTblName}" + // create 1 replica table + sql """ + CREATE TABLE IF NOT EXISTS ${indexTblName}( + `id`int(11)NULL, + `int_array` array<int(20)> NULL, + `c_array` array<varchar(20)> NULL, + INDEX c_array_idx(`c_array`) USING INVERTED PROPERTIES("parser"="english") COMMENT 'c_array index', + INDEX int_array_idx(`int_array`) USING INVERTED COMMENT 'int_array index' + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES( + "replication_allocation" = "tag.location.default: 1", + "persistent"="false" + ); + """ + + // set enable_vectorized_engine=true + sql """ SET enable_vectorized_engine=true; """ + def var_result = sql "show variables" + logger.info("show variales result: " + var_result ) + + sql "INSERT INTO $indexTblName VALUES (1, [10,20,30], ['i','love','china']), (2, [20,30,40], ['i','love','north korea']), (3, [30,40,50], NULL);" + sql "INSERT INTO $indexTblName VALUES (4, [40,50,60], NULL);" + qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'china';" + qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'love';" + qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'north';" + qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'korea';" + qt_sql "SELECT * FROM $indexTblName WHERE int_array element_ge 40;" + qt_sql "SELECT * FROM $indexTblName WHERE int_array element_le 40;" + qt_sql "SELECT * FROM $indexTblName WHERE int_array element_gt 40;" + qt_sql "SELECT * FROM $indexTblName WHERE int_array element_lt 40;" + qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 10;" + qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 20;" + qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 30;" + qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 40;" + qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 50;" + qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 60;" +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org