This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 0a599fa7438 branch-3.0: [fix](array/map) Fix BE crash in lambda
functions #49140 (#49168)
0a599fa7438 is described below
commit 0a599fa74382d5af17ed479457f2e0edcb5ace81
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Mar 19 11:16:14 2025 +0800
branch-3.0: [fix](array/map) Fix BE crash in lambda functions #49140
(#49168)
Cherry-picked from #49140
Co-authored-by: Gabriel <[email protected]>
---
.../exprs/lambda_function/varray_map_function.cpp | 10 +-
.../vec/functions/array/function_array_element.h | 5 +-
.../data/function_p0/test_array_map.out | Bin 0 -> 107 bytes
.../suites/function_p0/test_array_map.groovy | 232 +++++++++++++++++++++
4 files changed, 242 insertions(+), 5 deletions(-)
diff --git a/be/src/vec/exprs/lambda_function/varray_map_function.cpp
b/be/src/vec/exprs/lambda_function/varray_map_function.cpp
index 609f5dcebda..f4b36b2fff2 100644
--- a/be/src/vec/exprs/lambda_function/varray_map_function.cpp
+++ b/be/src/vec/exprs/lambda_function/varray_map_function.cpp
@@ -92,8 +92,9 @@ public:
if (type_array->is_nullable()) {
// get the nullmap of nullable column
- const auto& column_array_nullmap =
- assert_cast<const
ColumnNullable&>(*column_array).get_null_map_column();
+ // hold the null column instead of a reference 'cause
`column_array` will be assigned and freed below.
+ auto column_array_nullmap =
+ assert_cast<const
ColumnNullable&>(*column_array).get_null_map_column_ptr();
// get the array column from nullable column
column_array = assert_cast<const
ColumnNullable*>(column_array.get())
@@ -104,8 +105,9 @@ public:
->get_nested_type();
// need to union nullmap from all columns
- VectorizedUtils::update_null_map(outside_null_map->get_data(),
-
column_array_nullmap.get_data());
+ VectorizedUtils::update_null_map(
+ outside_null_map->get_data(),
+ assert_cast<const
ColumnUInt8&>(*column_array_nullmap).get_data());
}
// here is the array column
diff --git a/be/src/vec/functions/array/function_array_element.h
b/be/src/vec/functions/array/function_array_element.h
index 60a353b9eb0..b1a55e6f1a5 100644
--- a/be/src/vec/functions/array/function_array_element.h
+++ b/be/src/vec/functions/array/function_array_element.h
@@ -101,6 +101,9 @@ public:
UInt8* dst_null_map = dst_null_column->get_data().data();
const UInt8* src_null_map = nullptr;
ColumnsWithTypeAndName args;
+ block.replace_by_position(
+ arguments[0],
+
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const());
auto col_left = block.get_by_position(arguments[0]);
if (col_left.column->is_nullable()) {
auto null_col =
check_and_get_column<ColumnNullable>(*col_left.column);
@@ -328,7 +331,7 @@ private:
const UInt8* src_null_map, UInt8*
dst_null_map) const {
// check array nested column type and get data
auto left_column =
arguments[0].column->convert_to_full_column_if_const();
- const auto& array_column = reinterpret_cast<const
ColumnArray&>(*left_column);
+ const auto& array_column = assert_cast<const
ColumnArray&>(*left_column);
const auto& offsets = array_column.get_offsets();
DCHECK(offsets.size() == input_rows_count);
const UInt8* nested_null_map = nullptr;
diff --git a/regression-test/data/function_p0/test_array_map.out
b/regression-test/data/function_p0/test_array_map.out
new file mode 100644
index 00000000000..9c9c4c6c8a2
Binary files /dev/null and
b/regression-test/data/function_p0/test_array_map.out differ
diff --git a/regression-test/suites/function_p0/test_array_map.groovy
b/regression-test/suites/function_p0/test_array_map.groovy
new file mode 100644
index 00000000000..9b4b0526fcf
--- /dev/null
+++ b/regression-test/suites/function_p0/test_array_map.groovy
@@ -0,0 +1,232 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_array_map") {
+ sql """
+ drop table if exists mock_table;
+ """
+
+ sql """
+ CREATE ALIAS FUNCTION clean_html_entity_test(string) WITH
PARAMETER(html) AS
+ REPLACE(
+ REPLACE(
+ REPLACE(
+ REPLACE(
+ REPLACE(
+ REPLACE(
+ REPLACE(
+ REPLACE(
+ REPLACE(
+ REPLACE(
+ REPLACE(html, '&', '&'),
+ '<', '<'
+ ),
+ '>', '>'
+ ),
+ '"', '"'
+ ),
+ ''', '\\\''
+ ),'€', '€'
+ ),
+ ' ', ' '
+ ), "Ⅰ", "I"), "Ⅱ", "II"), "Ⅲ", "III"),".", ". ");
+ """
+ sql """ CREATE ALIAS FUNCTION clean_html_tag_test(string) WITH
PARAMETER(html) AS REGEXP_REPLACE(html, '</?[^>]+>', ''); """
+ sql """
+ CREATE TABLE `mock_table` (
+ `aa` varchar(255) NULL,
+ `ab` varchar(255) NULL,
+ `ac` varchar(255) NULL,
+ `ad` text NULL,
+ `ae` text NULL,
+ `af` text NULL,
+ `ag` text NULL,
+ `ah` text NULL,
+ `ai` text NULL,
+ `aj` varchar(255) NULL,
+ `ak` text NULL,
+ `al` text NULL,
+ `am` text NULL,
+ `an` text NULL,
+ `ao` text NULL,
+ `ap` text NULL,
+ `aq` text NULL,
+ `ar` text NULL,
+ `as` text NULL,
+ `at` text NULL,
+ `au` text NULL,
+ `av` bigint NULL,
+ `aw` text NULL,
+ `ax` varchar(255) NULL,
+ `ay` text NULL,
+ `az` varchar(255) NULL,
+ `ba` varchar(255) NULL,
+ `bb` varchar(255) NULL,
+ `bc` int NULL,
+ `bd` int NULL,
+ `be` varchar(255) NULL,
+ `bf` varchar(255) NULL,
+ `bg` array<varchar(255)> NULL,
+ `bh` json NULL,
+ `bi` varchar(255) NULL,
+ `bj` varchar(255) NULL,
+ `bk` array<varchar(255)> NULL,
+ `bl` boolean NULL,
+ INDEX idx_ag (`ag`) USING INVERTED PROPERTIES("support_phrase" =
"true", "parser" = "unicode", "lower_case" = "true"),
+ INDEX idx_ad (`ad`) USING INVERTED PROPERTIES("support_phrase" =
"true", "parser" = "unicode", "lower_case" = "true"),
+ INDEX idx_ah (`ah`) USING INVERTED PROPERTIES("support_phrase" =
"true", "parser" = "unicode", "lower_case" = "true"),
+ INDEX idx_ac (`ac`) USING INVERTED PROPERTIES("support_phrase" =
"true", "parser" = "unicode", "lower_case" = "true"),
+ INDEX idx_ak (`ak`) USING INVERTED PROPERTIES("support_phrase" =
"true", "parser" = "unicode", "lower_case" = "true"),
+ INDEX idx_al (`al`) USING INVERTED PROPERTIES("support_phrase" =
"true", "parser" = "unicode", "lower_case" = "true"),
+ INDEX idx_am (`am`) USING INVERTED PROPERTIES("support_phrase" =
"true", "parser" = "unicode", "lower_case" = "true"),
+ INDEX idx_ag_ngrambf (`ag`) USING NGRAM_BF PROPERTIES("bf_size" =
"256", "gram_size" = "2"),
+ INDEX idx_ad_ngrambf (`ad`) USING NGRAM_BF PROPERTIES("bf_size" =
"256", "gram_size" = "2"),
+ INDEX idx_ac_ngrambf (`ac`) USING NGRAM_BF PROPERTIES("bf_size" =
"256", "gram_size" = "2"),
+ INDEX idx_ah_ngrambf (`ah`) USING NGRAM_BF PROPERTIES("bf_size" =
"256", "gram_size" = "2"),
+ INDEX idx_bi (`bi`) USING INVERTED,
+ INDEX idx_ar (`ar`) USING INVERTED PROPERTIES("support_phrase" =
"true", "parser" = "unicode", "lower_case" = "true"),
+ INDEX idx_ar_ngrambf (`ar`) USING NGRAM_BF PROPERTIES("bf_size" =
"256", "gram_size" = "2"),
+ INDEX idx_bl (`bl`) USING INVERTED
+ ) ENGINE=OLAP
+ UNIQUE KEY(`aa`)
+ DISTRIBUTED BY HASH(`aa`) BUCKETS 16
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "min_load_replica_num" = "-1",
+ "is_being_synced" = "false",
+ "storage_medium" = "hdd",
+ "storage_format" = "V2",
+ "inverted_index_storage_format" = "V1",
+ "enable_unique_key_merge_on_write" = "true",
+ "light_schema_change" = "true",
+ "disable_auto_compaction" = "false",
+ "enable_single_replica_compaction" = "false",
+ "group_commit_interval_ms" = "10000",
+ "group_commit_data_bytes" = "134217728",
+ "enable_mow_light_delete" = "false"
+ );
+ """
+ sql """
+ CREATE VIEW `mock_view` AS
+ WITH
+ bm AS (SELECT
+ `aa`, `ab`, `ac`, `ad`, `ae`, `af`, `ag`, `ah`, `ai`, `aj`,
`ak`, `al`, `am`, `an`, `ao`, `ap`, `aq`, `ar`, `as`, `at`, `au`, `av`, `aw`,
`ax`, `ay`, `az`, `ba`, `bb`, `bc`, `bd`, `be`, `bf`, `bg`, `bh`, `bi`, `bj`,
`bk`, `bl`,
+ CASE WHEN YEAR(`as`) >= 1970 THEN `as` ELSE NULL END as `bn`,
+ CASE WHEN YEAR(`au`) >= 1970 THEN `au` ELSE NULL END as `bo`,
+ CASE WHEN YEAR(`at`) >= 1970 THEN `at` ELSE NULL END as `bp`,
+ LENGTH(`aw`) as `bq`,
+ TRIM(`clean_html_entity_test`(`clean_html_tag_test`(`ah`)))
as `br`,
+ TRIM(`clean_html_entity_test`(`clean_html_tag_test`(`ad`)))
as `bs`,
+ ARRAY_MAP(x-> if(least((left(x, 5) = '6841-'), (length(x) =
10)), concat_ws('-', substring(x, 1, 7), substring(x, 8)), if(least((left(x, 5)
= '6841-'), (length(x) = 9)), concat_ws('-', substring(x, 1, 6), substring(x,
7)), x)), `bk`) as `bt`,
+
ARRAY_JOIN(TOKENIZE(TRIM(`clean_html_entity_test`(`clean_html_tag_test`(`ad`))),'"parser"="unicode",
"lower_case"="false", "stopwords"="none"'), " ") as `bu`,
+
ARRAY_JOIN(TOKENIZE(TRIM(`clean_html_entity_test`(`clean_html_tag_test`(`ah`))),'"parser"="unicode",
"lower_case"="false", "stopwords"="none"'), " ") as `bv`
+ FROM mock_table),
+ bw AS (SELECT
+ `aa`, `ab`, `ac`, `ad`, `ae`, `af`, `ag`, `ah`, `ai`, `aj`,
`ak`, `al`, `am`, `an`, `ao`, `ap`, `aq`, `ar`, `as`, `at`, `au`, `av`, `aw`,
`ax`, `ay`, `az`, `ba`, `bb`, `bc`, `bd`, `be`, `bf`, `bg`, `bh`, `bi`, `bj`,
`bk`, `bl`, `bn`, `bo`, `bp`, `bq`, `br`, `bs`, `bt`, `bu`, `bv`,
+ CASE
+ WHEN LENGTH(`bn`) = 10 THEN STR_TO_DATE(`bn`,
'yyyy-MM-dd')
+ WHEN LENGTH(`bn`) = 19 THEN STR_TO_DATE(`bn`, 'yyyy-MM-dd
HH:mm:ss')
+ WHEN LENGTH(`bn`) = 26 THEN STR_TO_DATE(`bn`, 'yyyy-MM-dd
HH:mm:ss.SSSSSS')
+ ELSE NULL
+ END AS `bx`,
+ CASE
+ WHEN LENGTH(`bo`) = 10 THEN STR_TO_DATE(`bo`,
'yyyy-MM-dd')
+ WHEN LENGTH(`bo`) = 19 THEN STR_TO_DATE(`bo`, 'yyyy-MM-dd
HH:mm:ss')
+ WHEN LENGTH(`bo`) = 26 THEN STR_TO_DATE(`bo`, 'yyyy-MM-dd
HH:mm:ss.SSSSSS')
+ ELSE NULL
+ END AS `by`,
+ CASE
+ WHEN LENGTH(`bp`) = 10 THEN STR_TO_DATE(`bp`,
'yyyy-MM-dd')
+ WHEN LENGTH(`bp`) = 19 THEN STR_TO_DATE(`bp`, 'yyyy-MM-dd
HH:mm:ss')
+ WHEN LENGTH(`bp`) = 26 THEN STR_TO_DATE(`bp`, 'yyyy-MM-dd
HH:mm:ss.SSSSSS')
+ ELSE NULL
+ END AS `bz`,
+ ARRAY_REMOVE(
+ ARRAY_COMPACT(
+ ARRAY_UNION(
+ ARRAY_MAP(x-> ARRAY_JOIN(ARRAY_SLICE(split_by_string(x,
'-'), 1, size(split_by_string(x, '-')) -1), '-'), `bt`),
+ ARRAY_MAP(x-> ARRAY_JOIN(ARRAY_SLICE(split_by_string(x,
'-'), 1, size(split_by_string(x, '-')) -2), '-'), `bt`),
+ ARRAY_MAP(x-> ARRAY_JOIN(ARRAY_SLICE(split_by_string(x,
'-'), 1, size(split_by_string(x, '-')) -3), '-'), `bt`))), '') as `ca`,
+ SPLIT_BY_STRING(MASK(`bu`, '*', '*', '*'), ' ') as `cb`,
+ SPLIT_BY_STRING(`bu`, ' ') as `cc`,
+ array_first_index(x-> locate('*', x ) = 0,
SPLIT_BY_STRING(MASK(`bu`, '*', '*', '*'), ' ')) as `cd`,
+ array_last_index(x-> locate('*', x ) = 0,
SPLIT_BY_STRING(MASK(`bu`, '*', '*', '*'), ' ')) as `ce`,
+ SPLIT_BY_STRING(MASK(`bv`, '*', '*', '*'), ' ') as `cf`,
+ SPLIT_BY_STRING(`bv`, ' ') as `cg`,
+ array_first_index(x-> locate('*', x ) = 0,
SPLIT_BY_STRING(MASK(`bv`, '*', '*', '*'), ' ')) as `ch`,
+ array_last_index(x-> locate('*', x ) = 0,
SPLIT_BY_STRING(MASK(`bv`, '*', '*', '*'), ' ')) as `ci`
+ FROM bm),
+ cj AS (SELECT
+ `aa`, `ab`, `ac`, `ad`, `ae`, `af`, `ag`, `ah`, `ai`, `aj`,
`ak`, `al`, `am`, `an`, `ao`, `ap`, `aq`, `ar`, `as`, `at`, `au`, `av`, `aw`,
`ax`, `ay`, `az`, `ba`, `bb`, `bc`, `bd`, `be`, `bf`, `bg`, `bh`, `bi`, `bj`,
`bk`, `bl`, `bn`, `bo`, `bp`, `bq`, `br`, `bs`, `bt`, `bu`, `bv`, `bx`, `by`,
`bz`, `ca`, `cb`, `cc`, `cd`, `ce`, `cf`, `cg`, `ch`, `ci`,
+ ARRAY_COMPACT(ARRAY_EXCEPT(`bt`, `ca`)) as `ck`,
+ ARRAY_COMPACT(ARRAY_UNION(`bt`, `ca`)) as `cl`,
+ CASE
+ WHEN SIZE(`cc`) = 0 THEN `bs`
+ WHEN `cd`=1 AND `ce` < size(`cb`) and `ce` - `cd` > 1
THEN ARRAY_JOIN(ARRAY_SLICE(`cc`, 1, `ce`), " ")
+ WHEN `cd`=2 AND `ce` < size(`cb`) and `ce` - `cd` > 1
THEN ARRAY_JOIN(ARRAY_SLICE(`cc`, 1, `ce`), " ")
+ WHEN `cd` >2 AND `ce` = size(`cb`) and `ce` - `cd` > 1
THEN
+ CASE
+ WHEN element_at(`cc`, 1) = element_at(`cc`, `cd`-1)
THEN ARRAY_JOIN(ARRAY_SLICE(`cc`, `cd`-1), "")
+ ELSE ARRAY_JOIN(ARRAY_SLICE(`cc`, `cd`), " ")
+ END
+ ELSE ARRAY_JOIN(`cc`, " ")
+ END AS `cm`,
+ CASE
+ WHEN size(`cc`) = 0 THEN "tokenize_failed"
+ WHEN `cd` = 0 THEN "en"
+ WHEN `cd`=1 AND `ce` = size(`cb`) THEN "zh"
+ WHEN `cd`=1 AND `ce` < size(`cb`) THEN "zh_en"
+ WHEN `cd`=2 AND `ce` < size(`cb`) THEN "zh_en"
+ WHEN `cd` >2 AND `ce` = size(`cb`) THEN "en_zh"
+ ELSE "mixed"
+ END AS `cn`,
+ CASE
+ WHEN SIZE(`cg`) = 0 THEN `br`
+ WHEN `ch`=1 AND `ci` < size(`cf`) and `ci` - `ch` > 1
THEN ARRAY_JOIN(ARRAY_SLICE(`cg`, 1, `ci`), " ")
+ WHEN `ch`=2 AND `ci` < size(`cf`) and `ci` - `ch` > 1
THEN ARRAY_JOIN(ARRAY_SLICE(`cg`, 1, `ci`), " ")
+ WHEN `ch` >2 AND `ci` = size(`cf`) and `ci` - `ch` > 1
THEN
+ CASE
+ WHEN element_at(`cg`, 1) = element_at(`cg`, `ch`-1)
THEN ARRAY_JOIN(ARRAY_SLICE(`cg`, `ch`-1), "")
+ ELSE ARRAY_JOIN(ARRAY_SLICE(`cg`, `ch`), " ")
+ END
+ ELSE ARRAY_JOIN(`cg`, " ")
+ END AS `co`,
+ CASE
+ WHEN size(`cg`) = 0 THEN "tokenize_failed"
+ WHEN `ch` = 0 THEN "en"
+ WHEN `ch`=1 AND `ci` = size(`cf`) THEN "zh"
+ WHEN `ch`=1 AND `ci` < size(`cf`) THEN "zh_en"
+ WHEN `ch`=2 AND `ci` < size(`cf`) THEN "zh_en"
+ WHEN `ch` >2 AND `ci` = size(`cf`) THEN "en_zh"
+ ELSE "mixed"
+ END AS `cp`
+ FROM bw)
+ SELECT * FROM cj;
+ """
+ sql """
+ insert into mock_table(aa, ab,ac,ad) values('1','2','3','4');
+ """
+
+
+/*
+FIXME
+qt_sql """
+ SELECT * FROM mock_view LIMIT 530000,1000;
+ """
+*/
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]