This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 67a80999915 [fix](multi-catalog)fix max compute array and map type
read offset (#39822)
67a80999915 is described below
commit 67a809999152d458dc9703e839bf623487ed7c68
Author: slothever <[email protected]>
AuthorDate: Fri Aug 23 16:53:52 2024 +0800
[fix](multi-catalog)fix max compute array and map type read offset (#39822)
bp #39680
---
.../doris/maxcompute/MaxComputeColumnValue.java | 47 +++++-
.../mc/test_max_compute_complex_type.out | 17 ++
.../mc/test_max_compute_complex_type.groovy | 175 +++++++++++++++++++++
3 files changed, 234 insertions(+), 5 deletions(-)
diff --git
a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java
b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java
index 65810163840..644caf80d97 100644
---
a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java
+++
b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java
@@ -32,8 +32,12 @@ import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.SmallIntVector;
import org.apache.arrow.vector.TimeStampNanoVector;
import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.StructVector;
import org.apache.log4j.Logger;
import java.math.BigDecimal;
@@ -49,15 +53,22 @@ import java.util.List;
public class MaxComputeColumnValue implements ColumnValue {
private static final Logger LOG =
Logger.getLogger(MaxComputeColumnValue.class);
private int idx;
- private FieldVector column;
+ private int offset = 0; // for complex type
+ private ValueVector column;
public MaxComputeColumnValue() {
idx = 0;
}
- public void reset(FieldVector column) {
+ public MaxComputeColumnValue(ValueVector valueVector, int i) {
+ this.column = valueVector;
+ this.idx = i;
+ }
+
+ public void reset(ValueVector column) {
this.column = column;
this.idx = 0;
+ this.offset = 0;
}
@Override
@@ -222,16 +233,42 @@ public class MaxComputeColumnValue implements ColumnValue
{
@Override
public void unpackArray(List<ColumnValue> values) {
-
+ skippedIfNull();
+ ListVector listCol = (ListVector) column;
+ int elemSize = listCol.getObject(idx).size();
+ for (int i = 0; i < elemSize; i++) {
+ MaxComputeColumnValue val = new
MaxComputeColumnValue(listCol.getDataVector(), offset);
+ values.add(val);
+ offset++;
+ }
+ idx++;
}
@Override
public void unpackMap(List<ColumnValue> keys, List<ColumnValue> values) {
-
+ skippedIfNull();
+ MapVector mapCol = (MapVector) column;
+ int elemSize = mapCol.getObject(idx).size();
+ FieldVector keyList =
mapCol.getDataVector().getChildrenFromFields().get(0);
+ FieldVector valList =
mapCol.getDataVector().getChildrenFromFields().get(1);
+ for (int i = 0; i < elemSize; i++) {
+ MaxComputeColumnValue key = new MaxComputeColumnValue(keyList,
offset);
+ keys.add(key);
+ MaxComputeColumnValue val = new MaxComputeColumnValue(valList,
offset);
+ values.add(val);
+ offset++;
+ }
+ idx++;
}
@Override
public void unpackStruct(List<Integer> structFieldIndex, List<ColumnValue>
values) {
-
+ skippedIfNull();
+ StructVector structCol = (StructVector) column;
+ for (Integer fieldIndex : structFieldIndex) {
+ MaxComputeColumnValue val = new
MaxComputeColumnValue(structCol.getChildByOrdinal(fieldIndex), idx);
+ values.add(val);
+ }
+ idx++;
}
}
diff --git
a/regression-test/data/external_table_p0/mc/test_max_compute_complex_type.out
b/regression-test/data/external_table_p0/mc/test_max_compute_complex_type.out
new file mode 100644
index 00000000000..86df55f16c5
--- /dev/null
+++
b/regression-test/data/external_table_p0/mc/test_max_compute_complex_type.out
@@ -0,0 +1,17 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !mc_q1 --
+3 [1.3] [1, 2, 3] ["2023-05-23 05:55:12.000"] ["a", "b", "c"]
+2 [1.2, 1.3] [1, 2, 3] ["2023-05-23 05:55:12.000"] ["a",
"b", "c"]
+1 [1.2, 1.3] [1, 2, 3] ["2023-05-23 05:55:12.000"] ["a",
"b", "c"]
+
+-- !mc_q2 --
+{1:"example1", 2:"example2"} {1:2.5, 2:3.75}
+{349:"asd", 324:"uid"} {3:2.5, 99:3.75}
+
+-- !mc_q3 --
+{"phone_number":123450, "email":"[email protected]", "addr":"Addr1"}
{"id":"user1", "age":25}
+{"phone_number":2345671, "email":"[email protected]", "addr":"Addr2"}
{"id":"user2", "age":30}
+
+-- !mc_q4 --
+user1 [{"activity_date":"2024-08-01",
"activities":{"cooking":{"details":"Made vegan meal",
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched
action movie", "metrics":{"time_spent":1.5, "calories":500}}}},
{"activity_date":"2024-08-02", "activities":{"cooking":{"details":"Made vegan
meal", "metrics":{"time_spent":1.5, "calories":500}},
"movie":{"details":"Watched action movie", "metrics":{"time_spent":1.5,
"calories":500}}}}]
+user2 [{"activity_date":"2024-08-01",
"activities":{"cooking":{"details":"Made vegan meal",
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched
action movie", "metrics":{"time_spent":1.5, "calories":500}}}},
{"activity_date":"2024-08-02", "activities":{"cooking":{"details":"Made vegan
meal", "metrics":{"time_spent":1.5, "calories":500}},
"movie":{"details":"Watched action movie", "metrics":{"time_spent":1.5,
"calories":500}}}}]
diff --git
a/regression-test/suites/external_table_p0/mc/test_max_compute_complex_type.groovy
b/regression-test/suites/external_table_p0/mc/test_max_compute_complex_type.groovy
new file mode 100644
index 00000000000..a3de3715f91
--- /dev/null
+++
b/regression-test/suites/external_table_p0/mc/test_max_compute_complex_type.groovy
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/*
+ // Test Case DDL
+ create table array_table (
+ id int,
+ arr1 ARRAY<BIGINT>,
+ arr2 ARRAY<VARCHAR(10)>,
+ arr3 ARRAY<DOUBLE>,
+ arr4 ARRAY<DATE>,
+ arr5 ARRAY<DATETIME>
+ );
+ INSERT INTO array_table VALUES(1, array(1, 2, 3), array('a', 'b', 'c'),
array(1.2, 1.3), array(date('2023-05-23')), array(datetime('2023-05-23
13:55:12')));
+ INSERT INTO array_table VALUES(2, array(1, 2, 3), array('a', 'b', 'c'),
array(1.2, 1.3), array(date('2023-05-23')), array(datetime('2023-05-23
13:55:12')));
+ INSERT INTO array_table VALUES(3, array(1, 2, 3), array('a', 'b', 'c'),
array(1.3), array(date('2023-05-23')), array(datetime('2023-05-23 13:55:12')));
+
+ create table map_table (
+ arr1 MAP<BIGINT, DOUBLE>,
+ arr2 MAP<BIGINT, STRING>
+ );
+ INSERT INTO map_table (arr1, arr2)
+ VALUES (
+ MAP(1, 2.5, 2, 3.75),
+ MAP(1, 'example1', 2, 'example2')
+ );
+ INSERT INTO map_table (arr1, arr2)
+ VALUES (
+ MAP(3, 2.5, 99, 3.75),
+ MAP(349, 'asd', 324, 'uid')
+ );
+
+ create table struct_table (
+ user_info STRUCT<id: STRING,age: INT>,
+ contact_info STRUCT<phone_number: BIGINT, email: STRING, addr:
VARCHAR(10)>
+ );
+
+ INSERT INTO struct_table VALUES
+ (
+ named_struct('id', 'user1', 'age', 25),
+ named_struct('phone_number', 123450, 'email', '[email protected]',
'addr', 'Addr1')
+ ),
+ (
+ named_struct('id', 'user2', 'age', 30),
+ named_struct('phone_number', 2345671, 'email', '[email protected]',
'addr', 'Addr2')
+ ),
+ (
+ named_struct('id', 'user3', 'age', 35),
+ named_struct('phone_number', 3456789, 'email', '[email protected]',
'addr', 'Addr3')
+ );
+
+ CREATE TABLE nested_complex_table (
+ user_id STRING,
+ user_profile STRUCT<
+ name: STRING,
+ age: INT,
+ preferences: MAP<
+ STRING,
+ STRUCT<
+ preference_id: INT,
+ preference_values: ARRAY<STRING>
+ >
+ >
+ >,
+ activity_log ARRAY<
+ STRUCT<
+ activity_date: STRING,
+ activities: MAP<
+ STRING,
+ STRUCT<
+ details: STRING,
+ metrics: MAP<STRING, DOUBLE>
+ >
+ >
+ >
+ >
+ );
+ INSERT INTO nested_complex_table VALUES
+ (
+ 'user1',
+ named_struct('name', 'Alice', 'age', 28, 'preferences', map(
+ 'sports', named_struct('preference_id', 101, 'preference_values',
array('soccer', 'tennis')),
+ 'music', named_struct('preference_id', 102, 'preference_values',
array('rock', 'classical'))
+ )),
+ array(
+ named_struct('activity_date', '2024-08-01', 'activities', map(
+ 'workout', named_struct('details', 'Morning run', 'metrics',
map('duration', 30.5, 'calories', 200.0)),
+ 'reading', named_struct('details', 'Read book on Hive',
'metrics', map('pages', 50.0, 'time', 2.0))
+ )),
+ named_struct('activity_date', '2024-08-02', 'activities', map(
+ 'travel', named_struct('details', 'Flight to NY', 'metrics',
map('distance', 500.0, 'time', 3.0)),
+ 'meeting', named_struct('details', 'Project meeting',
'metrics', map('duration', 1.5, 'participants', 5.0))
+ ))
+ )
+ ),
+ (
+ 'user2',
+ named_struct('name', 'Bob', 'age', 32, 'preferences', map(
+ 'books', named_struct('preference_id', 201, 'preference_values',
array('fiction', 'non-fiction')),
+ 'travel', named_struct('preference_id', 202, 'preference_values',
array('beaches', 'mountains'))
+ )),
+ array(
+ named_struct('activity_date', '2024-08-01', 'activities', map(
+ 'hiking', named_struct('details', 'Mountain trail', 'metrics',
map('distance', 10.0, 'elevation', 500.0)),
+ 'photography', named_struct('details', 'Wildlife photoshoot',
'metrics', map('photos_taken', 100.0, 'time', 4.0))
+ )),
+ named_struct('activity_date', '2024-08-02', 'activities', map(
+ 'workshop', named_struct('details', 'Photography workshop',
'metrics', map('duration', 3.0, 'participants', 15.0)),
+ 'shopping', named_struct('details', 'Bought camera gear',
'metrics', map('items', 5.0, 'cost', 1500.0))
+ ))
+ )
+ ),
+ (
+ 'user3',
+ named_struct('name', 'Carol', 'age', 24, 'preferences', map(
+ 'food', named_struct('preference_id', 301, 'preference_values',
array('vegan', 'desserts')),
+ 'movies', named_struct('preference_id', 302, 'preference_values',
array('action', 'comedy'))
+ )),
+ array(
+ named_struct('activity_date', '2024-08-01', 'activities', map(
+ 'cooking', named_struct('details', 'Made vegan meal',
'metrics', map('time_spent', 1.5, 'calories', 500.0)),
+ 'movie', named_struct('details', 'Watched action movie',
'metrics', map('duration', 2.0, 'rating', 8.5))
+ )),
+ named_struct('activity_date', '2024-08-02', 'activities', map(
+ 'gym', named_struct('details', 'Strength training', 'metrics',
map('duration', 1.0, 'calories', 300.0)),
+ 'shopping', named_struct('details', 'Bought groceries',
'metrics', map('items', 10.0, 'cost', 100.0))
+ ))
+ )
+ );
+ */
+suite("test_max_compute_complex_type",
"p0,external,doris,external_docker,external_docker_doris") {
+ String enabled = context.config.otherConfigs.get("enableMaxComputeTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ String ak = context.config.otherConfigs.get("aliYunAk")
+ String sk = context.config.otherConfigs.get("aliYunSk")
+ String mc_catalog_name = "test_max_compute_complex_type"
+ sql """drop catalog if exists ${mc_catalog_name} """
+ sql """
+ CREATE CATALOG IF NOT EXISTS ${mc_catalog_name} PROPERTIES (
+ "type" = "max_compute",
+ "mc.default.project" = "mc_datalake",
+ "mc.region" = "cn-beijing",
+ "mc.access_key" = "${ak}",
+ "mc.secret_key" = "${sk}",
+ "mc.public_access" = "true"
+ );
+ """
+
+ logger.info("catalog " + mc_catalog_name + " created")
+ sql """switch ${mc_catalog_name};"""
+ logger.info("switched to catalog " + mc_catalog_name)
+ sql """ use mc_datalake """
+
+ qt_mc_q1 """ select id,arr3,arr1,arr5,arr2 from array_table order by
id desc """
+ qt_mc_q2 """ select arr2,arr1 from map_table order by id limit 2 """
+ qt_mc_q3 """ select contact_info,user_info from struct_table order by
id limit 2 """
+ qt_mc_q4 """ select user_id,activity_log from nested_complex_table
order by user_id limit 2 """
+
+ sql """drop catalog ${mc_catalog_name};"""
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]