This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 67a80999915 [fix](multi-catalog)fix max compute array and map type 
read offset (#39822)
67a80999915 is described below

commit 67a809999152d458dc9703e839bf623487ed7c68
Author: slothever <[email protected]>
AuthorDate: Fri Aug 23 16:53:52 2024 +0800

    [fix](multi-catalog)fix max compute array and map type read offset (#39822)
    
    bp #39680
---
 .../doris/maxcompute/MaxComputeColumnValue.java    |  47 +++++-
 .../mc/test_max_compute_complex_type.out           |  17 ++
 .../mc/test_max_compute_complex_type.groovy        | 175 +++++++++++++++++++++
 3 files changed, 234 insertions(+), 5 deletions(-)

diff --git 
a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java
 
b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java
index 65810163840..644caf80d97 100644
--- 
a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java
+++ 
b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java
@@ -32,8 +32,12 @@ import org.apache.arrow.vector.IntVector;
 import org.apache.arrow.vector.SmallIntVector;
 import org.apache.arrow.vector.TimeStampNanoVector;
 import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.ValueVector;
 import org.apache.arrow.vector.VarBinaryVector;
 import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.StructVector;
 import org.apache.log4j.Logger;
 
 import java.math.BigDecimal;
@@ -49,15 +53,22 @@ import java.util.List;
 public class MaxComputeColumnValue implements ColumnValue {
     private static final Logger LOG = 
Logger.getLogger(MaxComputeColumnValue.class);
     private int idx;
-    private FieldVector column;
+    private int offset = 0; // for complex type
+    private ValueVector column;
 
     public MaxComputeColumnValue() {
         idx = 0;
     }
 
-    public void reset(FieldVector column) {
+    public MaxComputeColumnValue(ValueVector valueVector, int i) {
+        this.column = valueVector;
+        this.idx = i;
+    }
+
+    public void reset(ValueVector column) {
         this.column = column;
         this.idx = 0;
+        this.offset = 0;
     }
 
     @Override
@@ -222,16 +233,42 @@ public class MaxComputeColumnValue implements ColumnValue 
{
 
     @Override
     public void unpackArray(List<ColumnValue> values) {
-
+        skippedIfNull();
+        ListVector listCol = (ListVector) column;
+        int elemSize = listCol.getObject(idx).size();
+        for (int i = 0; i < elemSize; i++) {
+            MaxComputeColumnValue val = new 
MaxComputeColumnValue(listCol.getDataVector(), offset);
+            values.add(val);
+            offset++;
+        }
+        idx++;
     }
 
     @Override
     public void unpackMap(List<ColumnValue> keys, List<ColumnValue> values) {
-
+        skippedIfNull();
+        MapVector mapCol = (MapVector) column;
+        int elemSize = mapCol.getObject(idx).size();
+        FieldVector keyList = 
mapCol.getDataVector().getChildrenFromFields().get(0);
+        FieldVector valList = 
mapCol.getDataVector().getChildrenFromFields().get(1);
+        for (int i = 0; i < elemSize; i++) {
+            MaxComputeColumnValue key = new MaxComputeColumnValue(keyList, 
offset);
+            keys.add(key);
+            MaxComputeColumnValue val = new MaxComputeColumnValue(valList, 
offset);
+            values.add(val);
+            offset++;
+        }
+        idx++;
     }
 
     @Override
     public void unpackStruct(List<Integer> structFieldIndex, List<ColumnValue> 
values) {
-
+        skippedIfNull();
+        StructVector structCol = (StructVector) column;
+        for (Integer fieldIndex : structFieldIndex) {
+            MaxComputeColumnValue val = new 
MaxComputeColumnValue(structCol.getChildByOrdinal(fieldIndex), idx);
+            values.add(val);
+        }
+        idx++;
     }
 }
diff --git 
a/regression-test/data/external_table_p0/mc/test_max_compute_complex_type.out 
b/regression-test/data/external_table_p0/mc/test_max_compute_complex_type.out
new file mode 100644
index 00000000000..86df55f16c5
--- /dev/null
+++ 
b/regression-test/data/external_table_p0/mc/test_max_compute_complex_type.out
@@ -0,0 +1,17 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !mc_q1 --
+3      [1.3]   [1, 2, 3]       ["2023-05-23 05:55:12.000"]     ["a", "b", "c"]
+2      [1.2, 1.3]      [1, 2, 3]       ["2023-05-23 05:55:12.000"]     ["a", 
"b", "c"]
+1      [1.2, 1.3]      [1, 2, 3]       ["2023-05-23 05:55:12.000"]     ["a", 
"b", "c"]
+
+-- !mc_q2 --
+{1:"example1", 2:"example2"}   {1:2.5, 2:3.75}
+{349:"asd", 324:"uid"} {3:2.5, 99:3.75}
+
+-- !mc_q3 --
+{"phone_number":123450, "email":"[email protected]", "addr":"Addr1"}   
{"id":"user1", "age":25}
+{"phone_number":2345671, "email":"[email protected]", "addr":"Addr2"}  
{"id":"user2", "age":30}
+
+-- !mc_q4 --
+user1  [{"activity_date":"2024-08-01", 
"activities":{"cooking":{"details":"Made vegan meal", 
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched 
action movie", "metrics":{"time_spent":1.5, "calories":500}}}}, 
{"activity_date":"2024-08-02", "activities":{"cooking":{"details":"Made vegan 
meal", "metrics":{"time_spent":1.5, "calories":500}}, 
"movie":{"details":"Watched action movie", "metrics":{"time_spent":1.5, 
"calories":500}}}}]
+user2  [{"activity_date":"2024-08-01", 
"activities":{"cooking":{"details":"Made vegan meal", 
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched 
action movie", "metrics":{"time_spent":1.5, "calories":500}}}}, 
{"activity_date":"2024-08-02", "activities":{"cooking":{"details":"Made vegan 
meal", "metrics":{"time_spent":1.5, "calories":500}}, 
"movie":{"details":"Watched action movie", "metrics":{"time_spent":1.5, 
"calories":500}}}}]
diff --git 
a/regression-test/suites/external_table_p0/mc/test_max_compute_complex_type.groovy
 
b/regression-test/suites/external_table_p0/mc/test_max_compute_complex_type.groovy
new file mode 100644
index 00000000000..a3de3715f91
--- /dev/null
+++ 
b/regression-test/suites/external_table_p0/mc/test_max_compute_complex_type.groovy
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/*
+    // Test Case DDL
+    create table array_table (
+        id int,
+        arr1 ARRAY<BIGINT>,
+        arr2 ARRAY<VARCHAR(10)>,
+        arr3 ARRAY<DOUBLE>,
+        arr4 ARRAY<DATE>,
+        arr5 ARRAY<DATETIME>
+    );
+    INSERT INTO array_table VALUES(1, array(1, 2, 3), array('a', 'b', 'c'), 
array(1.2, 1.3), array(date('2023-05-23')), array(datetime('2023-05-23 
13:55:12')));
+    INSERT INTO array_table VALUES(2, array(1, 2, 3), array('a', 'b', 'c'), 
array(1.2, 1.3), array(date('2023-05-23')), array(datetime('2023-05-23 
13:55:12')));
+    INSERT INTO array_table VALUES(3, array(1, 2, 3), array('a', 'b', 'c'), 
array(1.3), array(date('2023-05-23')), array(datetime('2023-05-23 13:55:12')));
+
+    create table map_table (
+        arr1 MAP<BIGINT, DOUBLE>,
+        arr2 MAP<BIGINT, STRING>
+    );
+    INSERT INTO map_table (arr1, arr2)
+    VALUES (
+        MAP(1, 2.5, 2, 3.75),
+        MAP(1, 'example1', 2, 'example2')
+    );
+    INSERT INTO map_table (arr1, arr2)
+    VALUES (
+        MAP(3, 2.5, 99, 3.75),
+        MAP(349, 'asd', 324, 'uid')
+    );
+
+    create table struct_table (
+        user_info STRUCT<id: STRING,age: INT>,
+        contact_info STRUCT<phone_number: BIGINT, email: STRING, addr: 
VARCHAR(10)>
+    );
+
+    INSERT INTO struct_table VALUES
+    (
+        named_struct('id', 'user1', 'age', 25),
+        named_struct('phone_number', 123450, 'email', '[email protected]', 
'addr', 'Addr1')
+    ),
+    (
+        named_struct('id', 'user2', 'age', 30),
+        named_struct('phone_number', 2345671, 'email', '[email protected]', 
'addr', 'Addr2')
+    ),
+    (
+        named_struct('id', 'user3', 'age', 35),
+        named_struct('phone_number', 3456789, 'email', '[email protected]', 
'addr', 'Addr3')
+    );
+
+    CREATE TABLE nested_complex_table (
+        user_id STRING,
+        user_profile STRUCT<
+            name: STRING,
+            age: INT,
+            preferences: MAP<
+                STRING,
+                STRUCT<
+                    preference_id: INT,
+                    preference_values: ARRAY<STRING>
+                >
+            >
+        >,
+        activity_log ARRAY<
+            STRUCT<
+                activity_date: STRING,
+                activities: MAP<
+                    STRING,
+                    STRUCT<
+                        details: STRING,
+                        metrics: MAP<STRING, DOUBLE>
+                    >
+                >
+            >
+        >
+    );
+    INSERT INTO nested_complex_table VALUES
+    (
+        'user1',
+        named_struct('name', 'Alice', 'age', 28, 'preferences', map(
+            'sports', named_struct('preference_id', 101, 'preference_values', 
array('soccer', 'tennis')),
+            'music', named_struct('preference_id', 102, 'preference_values', 
array('rock', 'classical'))
+        )),
+        array(
+            named_struct('activity_date', '2024-08-01', 'activities', map(
+                'workout', named_struct('details', 'Morning run', 'metrics', 
map('duration', 30.5, 'calories', 200.0)),
+                'reading', named_struct('details', 'Read book on Hive', 
'metrics', map('pages', 50.0, 'time', 2.0))
+            )),
+            named_struct('activity_date', '2024-08-02', 'activities', map(
+                'travel', named_struct('details', 'Flight to NY', 'metrics', 
map('distance', 500.0, 'time', 3.0)),
+                'meeting', named_struct('details', 'Project meeting', 
'metrics', map('duration', 1.5, 'participants', 5.0))
+            ))
+        )
+    ),
+    (
+        'user2',
+        named_struct('name', 'Bob', 'age', 32, 'preferences', map(
+            'books', named_struct('preference_id', 201, 'preference_values', 
array('fiction', 'non-fiction')),
+            'travel', named_struct('preference_id', 202, 'preference_values', 
array('beaches', 'mountains'))
+        )),
+        array(
+            named_struct('activity_date', '2024-08-01', 'activities', map(
+                'hiking', named_struct('details', 'Mountain trail', 'metrics', 
map('distance', 10.0, 'elevation', 500.0)),
+                'photography', named_struct('details', 'Wildlife photoshoot', 
'metrics', map('photos_taken', 100.0, 'time', 4.0))
+            )),
+            named_struct('activity_date', '2024-08-02', 'activities', map(
+                'workshop', named_struct('details', 'Photography workshop', 
'metrics', map('duration', 3.0, 'participants', 15.0)),
+                'shopping', named_struct('details', 'Bought camera gear', 
'metrics', map('items', 5.0, 'cost', 1500.0))
+            ))
+        )
+    ),
+    (
+        'user3',
+        named_struct('name', 'Carol', 'age', 24, 'preferences', map(
+            'food', named_struct('preference_id', 301, 'preference_values', 
array('vegan', 'desserts')),
+            'movies', named_struct('preference_id', 302, 'preference_values', 
array('action', 'comedy'))
+        )),
+        array(
+            named_struct('activity_date', '2024-08-01', 'activities', map(
+                'cooking', named_struct('details', 'Made vegan meal', 
'metrics', map('time_spent', 1.5, 'calories', 500.0)),
+                'movie', named_struct('details', 'Watched action movie', 
'metrics', map('duration', 2.0, 'rating', 8.5))
+            )),
+            named_struct('activity_date', '2024-08-02', 'activities', map(
+                'gym', named_struct('details', 'Strength training', 'metrics', 
map('duration', 1.0, 'calories', 300.0)),
+                'shopping', named_struct('details', 'Bought groceries', 
'metrics', map('items', 10.0, 'cost', 100.0))
+            ))
+        )
+    );
+ */
+suite("test_max_compute_complex_type", 
"p0,external,doris,external_docker,external_docker_doris") {
+    String enabled = context.config.otherConfigs.get("enableMaxComputeTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        String ak = context.config.otherConfigs.get("aliYunAk")
+        String sk = context.config.otherConfigs.get("aliYunSk")
+        String mc_catalog_name = "test_max_compute_complex_type"
+        sql """drop catalog if exists ${mc_catalog_name} """
+        sql """
+        CREATE CATALOG IF NOT EXISTS ${mc_catalog_name} PROPERTIES (
+                "type" = "max_compute",
+                "mc.default.project" = "mc_datalake",
+                "mc.region" = "cn-beijing",
+                "mc.access_key" = "${ak}",
+                "mc.secret_key" = "${sk}",
+                "mc.public_access" = "true"
+        );
+        """
+
+        logger.info("catalog " + mc_catalog_name + " created")
+        sql """switch ${mc_catalog_name};"""
+        logger.info("switched to catalog " + mc_catalog_name)
+        sql """ use mc_datalake """
+
+        qt_mc_q1 """ select id,arr3,arr1,arr5,arr2 from array_table order by 
id desc """
+        qt_mc_q2 """ select arr2,arr1 from map_table order by id limit 2 """
+        qt_mc_q3 """ select contact_info,user_info from struct_table order by 
id limit 2 """
+        qt_mc_q4 """ select user_id,activity_log from nested_complex_table 
order by user_id limit 2 """
+
+        sql """drop catalog ${mc_catalog_name};"""
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to