This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit a5d0df342d11ed6fca688c9b2ad3fa12873f0e69 Author: zhangdong <493738...@qq.com> AuthorDate: Tue Oct 17 22:07:13 2023 +0800 [improvement](catalog)compatible with paimon 0.5 (#24985) compatible with paimon 0.5 add p0 for paimon,need set enablePaimonTest=true --- .../docker-compose/hive/scripts/README | 5 + .../docker-compose/hive/scripts/hive-metastore.sh | 17 ++ fe/be-java-extensions/paimon-scanner/pom.xml | 47 +---- .../org/apache/doris/paimon/PaimonJniScanner.java | 2 +- .../org/apache/doris/paimon/PaimonTypeUtils.java | 229 +++++++++++++++++++++ fe/pom.xml | 10 +- regression-test/conf/regression-conf.groovy | 3 +- .../paimon/test_paimon_catalog.out | 68 ++++++ .../paimon/test_paimon_catalog.groovy | 61 ++++++ 9 files changed, 399 insertions(+), 43 deletions(-) diff --git a/docker/thirdparties/docker-compose/hive/scripts/README b/docker/thirdparties/docker-compose/hive/scripts/README index a50efc78f0b..4eaf3893486 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/README +++ b/docker/thirdparties/docker-compose/hive/scripts/README @@ -2,3 +2,8 @@ https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/load/tpch1_parquet/tpch1.db.tar.gz 2. Unzip and name it to "tpch1.db" + +3. Download paimon file from: + https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/paimon/paimon1.tar.gz + +4. Unzip and name it to "paimon1" diff --git a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh index 3ac47e4c118..2d19c7aa1c3 100755 --- a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh +++ b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh @@ -40,6 +40,23 @@ hadoop fs -mkdir -p /user/doris/ echo "hadoop fs -put /mnt/scripts/tpch1.db /user/doris/" hadoop fs -put /mnt/scripts/tpch1.db /user/doris/ + +# if you test in your local,better use # to annotation section about paimon +if [[ ! -d "/mnt/scripts/paimon1" ]]; then + echo "/mnt/scripts/paimon1 does not exist" + cd /mnt/scripts/ + curl -O https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/paimon/paimon1.tar.gz + tar -zxf paimon1.tar.gz + rm -rf paimon1.tar.gz + cd - +else + echo "/mnt/scripts/paimon1 exist, continue !" +fi + +## put paimon1 +echo "hadoop fs -put /mnt/scripts/paimon1 /user/doris/" +hadoop fs -put /mnt/scripts/paimon1 /user/doris/ + ## put other preinstalled data echo "hadoop fs -put /mnt/scripts/preinstalled_data /user/doris/" hadoop fs -put /mnt/scripts/preinstalled_data /user/doris/ diff --git a/fe/be-java-extensions/paimon-scanner/pom.xml b/fe/be-java-extensions/paimon-scanner/pom.xml index 76da4288d07..0b513691303 100644 --- a/fe/be-java-extensions/paimon-scanner/pom.xml +++ b/fe/be-java-extensions/paimon-scanner/pom.xml @@ -49,55 +49,26 @@ under the License. <dependency> <groupId>org.apache.paimon</groupId> - <artifactId>paimon-bundle</artifactId> - <version>${paimon.version}</version> - </dependency> - <dependency> - <groupId>org.apache.paimon</groupId> - <artifactId>paimon-hive-connector-2.3</artifactId> + <artifactId>paimon-core</artifactId> <version>${paimon.version}</version> </dependency> + <dependency> <groupId>org.apache.paimon</groupId> - <artifactId>paimon-s3</artifactId> + <artifactId>paimon-common</artifactId> <version>${paimon.version}</version> </dependency> + <dependency> <groupId>org.apache.paimon</groupId> - <artifactId>paimon-oss-impl</artifactId> + <artifactId>paimon-format</artifactId> <version>${paimon.version}</version> </dependency> + <dependency> - <groupId>org.apache.thrift</groupId> - <artifactId>libthrift</artifactId> - <version>0.9.3</version> - </dependency> - <dependency> - <groupId>com.facebook.presto.hive</groupId> - <artifactId>hive-apache</artifactId> - <version>${presto.hive.version}</version> - <exclusions> - <exclusion> - <groupId>org.slf4j</groupId> - <artifactId>slf4j-log4j12</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-common</artifactId> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-hdfs</artifactId> - </dependency> - <dependency> - <groupId>commons-io</groupId> - <artifactId>commons-io</artifactId> + <groupId>org.apache.doris</groupId> + <artifactId>hive-catalog-shade</artifactId> + <version>${doris.hive.catalog.shade.version}</version> </dependency> </dependencies> diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java index 49675627891..4e3cda8222f 100644 --- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java +++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java @@ -118,7 +118,7 @@ public class PaimonJniScanner extends JniScanner { fields[i], paimonAllFieldNames)); } DataType dataType = table.rowType().getTypeAt(index); - columnTypes[i] = ColumnType.parseType(fields[i], dataType.toString()); + columnTypes[i] = PaimonTypeUtils.fromPaimonType(fields[i], dataType); } super.types = columnTypes; } diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonTypeUtils.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonTypeUtils.java new file mode 100644 index 00000000000..b2a9450a6de --- /dev/null +++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonTypeUtils.java @@ -0,0 +1,229 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.paimon; + +import org.apache.doris.common.jni.vec.ColumnType; +import org.apache.doris.common.jni.vec.ColumnType.Type; + +import org.apache.paimon.types.ArrayType; +import org.apache.paimon.types.BigIntType; +import org.apache.paimon.types.BinaryType; +import org.apache.paimon.types.BooleanType; +import org.apache.paimon.types.CharType; +import org.apache.paimon.types.DataType; +import org.apache.paimon.types.DataTypeDefaultVisitor; +import org.apache.paimon.types.DateType; +import org.apache.paimon.types.DecimalType; +import org.apache.paimon.types.DoubleType; +import org.apache.paimon.types.FloatType; +import org.apache.paimon.types.IntType; +import org.apache.paimon.types.LocalZonedTimestampType; +import org.apache.paimon.types.MapType; +import org.apache.paimon.types.MultisetType; +import org.apache.paimon.types.RowType; +import org.apache.paimon.types.SmallIntType; +import org.apache.paimon.types.TimeType; +import org.apache.paimon.types.TimestampType; +import org.apache.paimon.types.TinyIntType; +import org.apache.paimon.types.VarBinaryType; +import org.apache.paimon.types.VarCharType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Convert paimon type to doris type. + */ +public class PaimonTypeUtils { + private static final Logger LOG = LoggerFactory.getLogger(PaimonTypeUtils.class); + + private PaimonTypeUtils() { + } + + public static ColumnType fromPaimonType(String columnName, DataType type) { + PaimonColumnType paimonColumnType = type.accept(PaimonToDorisTypeVisitor.INSTANCE); + return new ColumnType(columnName, paimonColumnType.getType(), paimonColumnType.getLength(), + paimonColumnType.getPrecision(), + paimonColumnType.getScale()); + } + + private static class PaimonToDorisTypeVisitor extends DataTypeDefaultVisitor<PaimonColumnType> { + + private static final PaimonToDorisTypeVisitor INSTANCE = new PaimonToDorisTypeVisitor(); + + @Override + public PaimonColumnType visit(CharType charType) { + return new PaimonColumnType(Type.CHAR, charType.getLength()); + } + + @Override + public PaimonColumnType visit(VarCharType varCharType) { + return new PaimonColumnType(Type.VARCHAR, varCharType.getLength()); + } + + @Override + public PaimonColumnType visit(BooleanType booleanType) { + return new PaimonColumnType(Type.BOOLEAN); + } + + @Override + public PaimonColumnType visit(BinaryType binaryType) { + return new PaimonColumnType(Type.BINARY); + } + + @Override + public PaimonColumnType visit(VarBinaryType varBinaryType) { + return new PaimonColumnType(Type.BINARY); + } + + @Override + public PaimonColumnType visit(DecimalType decimalType) { + return new PaimonColumnType(Type.DECIMAL128, decimalType.getPrecision(), decimalType.getScale()); + } + + @Override + public PaimonColumnType visit(TinyIntType tinyIntType) { + return new PaimonColumnType(Type.TINYINT); + } + + @Override + public PaimonColumnType visit(SmallIntType smallIntType) { + return new PaimonColumnType(Type.SMALLINT); + } + + @Override + public PaimonColumnType visit(IntType intType) { + return new PaimonColumnType(Type.INT); + } + + @Override + public PaimonColumnType visit(BigIntType bigIntType) { + return new PaimonColumnType(Type.BIGINT); + } + + @Override + public PaimonColumnType visit(FloatType floatType) { + return new PaimonColumnType(Type.FLOAT); + } + + @Override + public PaimonColumnType visit(DoubleType doubleType) { + return new PaimonColumnType(Type.DOUBLE); + } + + @Override + public PaimonColumnType visit(DateType dateType) { + return new PaimonColumnType(Type.DATEV2); + } + + @Override + public PaimonColumnType visit(TimeType timeType) { + PaimonColumnType paimonColumnType = new PaimonColumnType(Type.DATETIMEV2); + paimonColumnType.setPrecision(timeType.getPrecision()); + return paimonColumnType; + } + + @Override + public PaimonColumnType visit(TimestampType timestampType) { + PaimonColumnType paimonColumnType = new PaimonColumnType(Type.DATETIMEV2); + paimonColumnType.setPrecision(timestampType.getPrecision()); + return paimonColumnType; + } + + @Override + public PaimonColumnType visit(LocalZonedTimestampType localZonedTimestampType) { + PaimonColumnType paimonColumnType = new PaimonColumnType(Type.DATETIMEV2); + paimonColumnType.setPrecision(localZonedTimestampType.getPrecision()); + return paimonColumnType; + } + + @Override + public PaimonColumnType visit(ArrayType arrayType) { + return this.defaultMethod(arrayType); + } + + @Override + public PaimonColumnType visit(MultisetType multisetType) { + return this.defaultMethod(multisetType); + } + + @Override + public PaimonColumnType visit(MapType mapType) { + return this.defaultMethod(mapType); + } + + @Override + public PaimonColumnType visit(RowType rowType) { + return this.defaultMethod(rowType); + } + + @Override + protected PaimonColumnType defaultMethod(DataType dataType) { + LOG.info("UNSUPPORTED type:" + dataType); + return new PaimonColumnType(Type.UNSUPPORTED); + } + } + + private static class PaimonColumnType { + private Type type; + // only used in char & varchar + private int length; + private int precision; + private int scale; + + public PaimonColumnType(Type type) { + this.type = type; + this.length = -1; + this.precision = -1; + this.scale = -1; + } + + public PaimonColumnType(Type type, int length) { + this.type = type; + this.length = length; + this.precision = -1; + this.scale = -1; + } + + public PaimonColumnType(Type type, int precision, int scale) { + this.type = type; + this.precision = precision; + this.scale = scale; + this.length = -1; + } + + public Type getType() { + return type; + } + + public int getLength() { + return length; + } + + public int getPrecision() { + return precision; + } + + public int getScale() { + return scale; + } + + public void setPrecision(int precision) { + this.precision = precision; + } + } +} diff --git a/fe/pom.xml b/fe/pom.xml index 32f495e65d4..5f1d8547972 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -195,7 +195,7 @@ under the License. <doris.home>${fe.dir}/../</doris.home> <revision>1.2-SNAPSHOT</revision> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> - <doris.hive.catalog.shade.version>1.0.1</doris.hive.catalog.shade.version> + <doris.hive.catalog.shade.version>1.0.2</doris.hive.catalog.shade.version> <maven.compiler.source>1.8</maven.compiler.source> <maven.compiler.target>1.8</maven.compiler.target> <!--plugin parameters--> @@ -316,8 +316,12 @@ under the License. <!--todo waiting release--> <quartz.version>2.3.2</quartz.version> <!-- paimon --> - <paimon.version>0.4.0-incubating</paimon.version> - <disruptor.version>3.3.4</disruptor.version> + <paimon.version>0.5.0-incubating</paimon.version> + <disruptor.version>3.4.4</disruptor.version> + <trino.parser.version>395</trino.parser.version> + <!-- arrow flight sql --> + <arrow.vector.classifier>shade-format-flatbuffers</arrow.vector.classifier> + <flatbuffers.version>1.12.0</flatbuffers.version> </properties> <profiles> <profile> diff --git a/regression-test/conf/regression-conf.groovy b/regression-test/conf/regression-conf.groovy index f360de78851..be5971448ae 100644 --- a/regression-test/conf/regression-conf.groovy +++ b/regression-test/conf/regression-conf.groovy @@ -95,9 +95,10 @@ clickhouse_22_port=8123 doris_port=9030 // hive catalog test config -// To enable hive test, you need first start hive container. +// To enable hive/paimon test, you need first start hive container. // See `docker/thirdparties/start-thirdparties-docker.sh` enableHiveTest=false +enablePaimonTest=false hms_port=9183 hdfs_port=8120 hiveServerPort=10000 diff --git a/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out b/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out new file mode 100644 index 00000000000..4918db4555c --- /dev/null +++ b/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out @@ -0,0 +1,68 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !all -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 +10 20 30 40 50 60 70 80 90.1 100.1 0.00 2020-03-02 130str 140varchar b false bbbb 2023-08-14T08:32:52.821 + +-- !c1 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c2 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c3 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c4 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c5 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c6 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c7 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c8 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c9 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c10 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c11 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c12 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c13 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c14 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c15 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c16 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c18 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c19 -- +11 22 aa bb cc +1 2 a b c + +-- !c20 -- +1 2 a b c + +-- !c21 -- +1 2 a b c + +-- !c22 -- + diff --git a/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy b/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy index a6e687b3e0e..ad72a47e64d 100644 --- a/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy +++ b/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy @@ -51,4 +51,65 @@ suite("test_paimon_catalog", "p0,external,doris,external_docker,external_docker_ "hadoop.username"="hadoop" ); """ + + String enabled = context.config.otherConfigs.get("enablePaimonTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + def all = """select * from all_table;""" + def c1 = """select * from all_table where c1=1;""" + def c2 = """select * from all_table where c2=2;""" + def c3 = """select * from all_table where c3=3;""" + def c4 = """select * from all_table where c4=4;""" + def c5 = """select * from all_table where c5=5;""" + def c6 = """select * from all_table where c6=6;""" + def c7 = """select * from all_table where c7=7;""" + def c8 = """select * from all_table where c8=8;""" + def c9 = """select * from all_table where c9<10;""" + def c10 = """select * from all_table where c10=10.1;""" + def c11 = """select * from all_table where c11=11.1;""" + def c12 = """select * from all_table where c12='2020-02-02';""" + def c13 = """select * from all_table where c13='13str';""" + def c14 = """select * from all_table where c14='14varchar';""" + def c15 = """select * from all_table where c15='a';""" + def c16 = """select * from all_table where c16=true;""" + def c18 = """select * from all_table where c18='2023-08-13 09:32:38.53';""" + def c19 = """select * from auto_bucket;""" + def c20 = """select * from auto_bucket where dt="b";""" + def c21 = """select * from auto_bucket where dt="b" and hh="c";""" + def c22 = """select * from auto_bucket where dt="d";""" + + String hdfs_port = context.config.otherConfigs.get("hdfs_port") + String catalog_name = "paimon1" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type" = "paimon", + "paimon.catalog.type"="filesystem", + "warehouse" = "hdfs://${externalEnvIp}:${hdfs_port}/user/doris/paimon1" + );""" + sql """use `${catalog_name}`.`db1`""" + + qt_all all + qt_c1 c1 + qt_c2 c2 + qt_c3 c3 + qt_c4 c4 + qt_c5 c5 + qt_c6 c6 + qt_c7 c7 + qt_c8 c8 + qt_c9 c9 + qt_c10 c10 + qt_c11 c11 + qt_c12 c12 + qt_c13 c13 + qt_c14 c14 + qt_c15 c15 + qt_c16 c16 + qt_c18 c18 + qt_c19 c19 + qt_c20 c20 + qt_c21 c21 + qt_c22 c22 + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org