This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 224bca3794 [docker](hudi) add hudi docker compose (#19048) 224bca3794 is described below commit 224bca379470b3a396d40e680dd40436c9f6d2b0 Author: hechao <73096722+hechao-u...@users.noreply.github.com> AuthorDate: Tue May 2 09:54:52 2023 +0800 [docker](hudi) add hudi docker compose (#19048) --- docker/thirdparties/docker-compose/hudi/hadoop.env | 52 ++++ .../thirdparties/docker-compose/hudi/hudi.yaml.tpl | 267 +++++++++++++++++++++ .../hudi/scripts/config/base.properties | 25 ++ .../hudi/scripts/config/dfs-source.properties | 31 +++ .../hudi/scripts/config/hoodie-incr.properties | 34 +++ .../hudi/scripts/config/hoodie-schema.avsc | 146 +++++++++++ .../hudi/scripts/config/kafka-source.properties | 30 +++ .../hudi/scripts/config/log4j2.properties | 61 +++++ .../docker-compose/hudi/scripts/config/schema.avsc | 59 +++++ .../hudi/scripts/config/spark-defaults.conf | 30 +++ .../docker-compose/hudi/scripts/run_sync_tool.sh | 56 +++++ .../hudi/scripts/setup_demo_container_adhoc_1.sh | 31 +++ .../hudi/scripts/setup_demo_container_adhoc_2.sh | 77 ++++++ docker/thirdparties/run-thirdparties-docker.sh | 31 ++- .../developer-guide/regression-testing.md | 127 +++++++--- 15 files changed, 1021 insertions(+), 36 deletions(-) diff --git a/docker/thirdparties/docker-compose/hudi/hadoop.env b/docker/thirdparties/docker-compose/hudi/hadoop.env new file mode 100644 index 0000000000..28ef46c3eb --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/hadoop.env @@ -0,0 +1,52 @@ + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql/metastore +HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver +HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive +HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive +HIVE_SITE_CONF_datanucleus_autoCreateSchema=false +HIVE_SITE_CONF_hive_metastore_uris=thrift://hivemetastore:9083 + +HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false +HDFS_CONF_dfs_webhdfs_enabled=true +HDFS_CONF_dfs_permissions_enabled=false +#HDFS_CONF_dfs_client_use_datanode_hostname=true +#HDFS_CONF_dfs_namenode_use_datanode_hostname=true +HDFS_CONF_dfs_replication=1 + +CORE_CONF_fs_defaultFS=hdfs://namenode:8020 +CORE_CONF_hadoop_http_staticuser_user=root +CORE_CONF_hadoop_proxyuser_hue_hosts=* +CORE_CONF_hadoop_proxyuser_hue_groups=* + +YARN_CONF_yarn_log___aggregation___enable=true +YARN_CONF_yarn_resourcemanager_recovery_enabled=true +YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore +YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate +YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs +YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/ +YARN_CONF_yarn_timeline___service_enabled=true +YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true +YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true +YARN_CONF_yarn_resourcemanager_hostname=resourcemanager +YARN_CONF_yarn_timeline___service_hostname=historyserver +YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032 +YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030 +YARN_CONF_yarn_resourcemanager_resource___tracker_address=resourcemanager:8031 +YARN_CONF_yarn_nodemanager_vmem___check___enabled=false diff --git a/docker/thirdparties/docker-compose/hudi/hudi.yaml.tpl b/docker/thirdparties/docker-compose/hudi/hudi.yaml.tpl new file mode 100644 index 0000000000..f0878e452b --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/hudi.yaml.tpl @@ -0,0 +1,267 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +version: "3.3" + +networks: + doris--hudi: + driver: bridge + +services: + + namenode: + image: apachehudi/hudi-hadoop_2.8.4-namenode:latest + hostname: namenode + container_name: namenode + environment: + - CLUSTER_NAME=hudi_hadoop284_hive232_spark244 + ports: + - "50070:50070" + - "8020:8020" + # JVM debugging port (will be mapped to a random port on host) + - "5005" + env_file: + - ./hadoop.env + healthcheck: + test: ["CMD", "curl", "-f", "http://namenode:50070"] + interval: 30s + timeout: 10s + retries: 3 + networks: + - doris--hudi + + datanode1: + image: apachehudi/hudi-hadoop_2.8.4-datanode:latest + container_name: datanode1 + hostname: datanode1 + environment: + - CLUSTER_NAME=hudi_hadoop284_hive232_spark244 + env_file: + - ./hadoop.env + ports: + - "50075:50075" + - "50010:50010" + # JVM debugging port (will be mapped to a random port on host) + - "5005" + links: + - "namenode" + - "historyserver" + healthcheck: + test: ["CMD", "curl", "-f", "http://datanode1:50075"] + interval: 30s + timeout: 10s + retries: 3 + depends_on: + - namenode + networks: + - doris--hudi + + historyserver: + image: apachehudi/hudi-hadoop_2.8.4-history:latest + hostname: historyserver + container_name: historyserver + environment: + - CLUSTER_NAME=hudi_hadoop284_hive232_spark244 + depends_on: + - "namenode" + links: + - "namenode" + ports: + - "58188:8188" + healthcheck: + test: ["CMD", "curl", "-f", "http://historyserver:8188"] + interval: 30s + timeout: 10s + retries: 3 + env_file: + - ./hadoop.env + volumes: + - ./historyserver:/hadoop/yarn/timeline + networks: + - doris--hudi + + hive-metastore-postgresql: + image: bde2020/hive-metastore-postgresql:2.3.0 + volumes: + - ./hive-metastore-postgresql:/var/lib/postgresql + hostname: hive-metastore-postgresql + container_name: hive-metastore-postgresql + networks: + - doris--hudi + + hivemetastore: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:latest + hostname: hivemetastore + container_name: hivemetastore + links: + - "hive-metastore-postgresql" + - "namenode" + env_file: + - ./hadoop.env + command: /opt/hive/bin/hive --service metastore + environment: + SERVICE_PRECONDITION: "namenode:50070 hive-metastore-postgresql:5432" + ports: + - "9083:9083" + # JVM debugging port (will be mapped to a random port on host) + - "5005" + healthcheck: + test: ["CMD", "nc", "-z", "hivemetastore", "9083"] + interval: 30s + timeout: 10s + retries: 3 + depends_on: + - "hive-metastore-postgresql" + - "namenode" + networks: + - doris--hudi + + hiveserver: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:latest + hostname: hiveserver + container_name: hiveserver + env_file: + - ./hadoop.env + environment: + SERVICE_PRECONDITION: "hivemetastore:9083" + ports: + - "10000:10000" + # JVM debugging port (will be mapped to a random port on host) + - "5005" + depends_on: + - "hivemetastore" + links: + - "hivemetastore" + - "hive-metastore-postgresql" + - "namenode" + volumes: + - ./scripts:/var/scripts + networks: + - doris--hudi + + sparkmaster: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkmaster_2.4.4:latest + hostname: sparkmaster + container_name: sparkmaster + env_file: + - ./hadoop.env + ports: + - "8080:8080" + - "7077:7077" + # JVM debugging port (will be mapped to a random port on host) + - "5005" + environment: + - INIT_DAEMON_STEP=setup_spark + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + networks: + - doris--hudi + + spark-worker-1: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkworker_2.4.4:latest + hostname: spark-worker-1 + container_name: spark-worker-1 + env_file: + - ./hadoop.env + depends_on: + - sparkmaster + ports: + - "8081:8081" + # JVM debugging port (will be mapped to a random port on host) + - "5005" + environment: + - "SPARK_MASTER=spark://sparkmaster:7077" + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + networks: + - doris--hudi + +# zookeeper: +# image: 'bitnami/zookeeper:3.4.12-r68' +# hostname: zookeeper +# container_name: zookeeper +# ports: +# - "2181:2181" +# environment: +# - ALLOW_ANONYMOUS_LOGIN=yes +# networks: +# - doris--hudi + +# kafka: +# image: 'bitnami/kafka:2.0.0' +# hostname: kafkabroker +# container_name: kafkabroker +# ports: +# - "9092:9092" +# environment: +# - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 +# - ALLOW_PLAINTEXT_LISTENER=yes +# networks: +# - doris--hudi + + adhoc-1: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest + hostname: adhoc-1 + container_name: adhoc-1 + env_file: + - ./hadoop.env + depends_on: + - sparkmaster + ports: + - '4040:4040' + # JVM debugging port (mapped to 5006 on the host) + - "5006:5005" + environment: + - "SPARK_MASTER=spark://sparkmaster:7077" + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + volumes: + - ./scripts:/var/scripts + networks: + - doris--hudi + + adhoc-2: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest + hostname: adhoc-2 + container_name: adhoc-2 + env_file: + - ./hadoop.env + ports: + # JVM debugging port (mapped to 5005 on the host) + - "5005:5005" + depends_on: + - sparkmaster + environment: + - "SPARK_MASTER=spark://sparkmaster:7077" + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + volumes: + - ./scripts:/var/scripts + networks: + - doris--hudi diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/base.properties b/docker/thirdparties/docker-compose/hudi/scripts/config/base.properties new file mode 100644 index 0000000000..0666245758 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/config/base.properties @@ -0,0 +1,25 @@ + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +hoodie.upsert.shuffle.parallelism=2 +hoodie.insert.shuffle.parallelism=2 +hoodie.delete.shuffle.parallelism=2 +hoodie.bulkinsert.shuffle.parallelism=2 +hoodie.embed.timeline.server=true +hoodie.filesystem.view.type=EMBEDDED_KV_STORE +hoodie.compact.inline=false diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/dfs-source.properties b/docker/thirdparties/docker-compose/hudi/scripts/config/dfs-source.properties new file mode 100644 index 0000000000..04c16e272a --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/config/dfs-source.properties @@ -0,0 +1,31 @@ + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +include=base.properties +# Key fields, for kafka example +hoodie.datasource.write.recordkey.field=key +hoodie.datasource.write.partitionpath.field=date +# NOTE: We have to duplicate configuration since this is being used +# w/ both Spark and DeltaStreamer +hoodie.table.recordkey.fields=key +hoodie.table.partition.fields=date +# Schema provider props (change to absolute path based on your installation) +hoodie.deltastreamer.schemaprovider.source.schema.file=/var/demo/config/schema.avsc +hoodie.deltastreamer.schemaprovider.target.schema.file=/var/demo/config/schema.avsc +# DFS Source +hoodie.deltastreamer.source.dfs.root=/usr/hive/data/input/ diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-incr.properties b/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-incr.properties new file mode 100644 index 0000000000..c796063ff1 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-incr.properties @@ -0,0 +1,34 @@ + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +hoodie.upsert.shuffle.parallelism=2 +hoodie.insert.shuffle.parallelism=2 +hoodie.delete.shuffle.parallelism=2 +hoodie.bulkinsert.shuffle.parallelism=2 +hoodie.datasource.write.recordkey.field=_row_key +hoodie.datasource.write.partitionpath.field=partition +hoodie.deltastreamer.schemaprovider.source.schema.file=file:///var/hoodie/ws/docker/demo/config/hoodie-schema.avsc +hoodie.deltastreamer.schemaprovider.target.schema.file=file:///var/hoodie/ws/docker/demo/config/hoodie-schema.avsc +hoodie.deltastreamer.source.hoodieincr.partition.fields=partition +hoodie.deltastreamer.source.hoodieincr.path=/docker_hoodie_sync_valid_test +hoodie.deltastreamer.source.hoodieincr.read_latest_on_missing_ckpt=true +# hive sync +hoodie.datasource.hive_sync.table=docker_hoodie_sync_valid_test_2 +hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/ +hoodie.datasource.hive_sync.partition_fields=partition +hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-schema.avsc b/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-schema.avsc new file mode 100644 index 0000000000..f97742c947 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-schema.avsc @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +{ + "type": "record", + "name": "triprec", + "fields": [ + { + "name": "timestamp", + "type": "double" + }, + { + "name": "_row_key", + "type": "string" + }, + { + "name": "rider", + "type": "string" + }, + { + "name": "driver", + "type": "string" + }, + { + "name": "begin_lat", + "type": "double" + }, + { + "name": "begin_lon", + "type": "double" + }, + { + "name": "end_lat", + "type": "double" + }, + { + "name": "end_lon", + "type": "double" + }, + { + "name": "distance_in_meters", + "type": "int" + }, + { + "name": "seconds_since_epoch", + "type": "long" + }, + { + "name": "weight", + "type": "float" + }, + { + "name": "nation", + "type": "bytes" + }, + { + "name": "current_date", + "type": { + "type": "int", + "logicalType": "date" + } + }, + { + "name": "current_ts", + "type": { + "type": "long", + "logicalType": "timestamp-micros" + } + }, + { + "name": "height", + "type": { + "type": "fixed", + "name": "abc", + "size": 5, + "logicalType": "decimal", + "precision": 10, + "scale": 6 + } + }, + { + "name": "city_to_state", + "type": { + "type": "map", + "values": "string" + } + }, + { + "name": "fare", + "type": { + "type": "record", + "name": "fare", + "fields": [ + { + "name": "amount", + "type": "double" + }, + { + "name": "currency", + "type": "string" + } + ] + } + }, + { + "name": "tip_history", + "type": { + "type": "array", + "items": { + "type": "record", + "name": "tip_history", + "fields": [ + { + "name": "amount", + "type": "double" + }, + { + "name": "currency", + "type": "string" + } + ] + } + } + }, + { + "name": "_hoodie_is_deleted", + "type": "boolean", + "default": false + } + ] +} diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/kafka-source.properties b/docker/thirdparties/docker-compose/hudi/scripts/config/kafka-source.properties new file mode 100644 index 0000000000..5ba5290ca6 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/config/kafka-source.properties @@ -0,0 +1,30 @@ + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +include=base.properties +# Key fields, for kafka example +hoodie.datasource.write.recordkey.field=key +hoodie.datasource.write.partitionpath.field=date +# Schema provider props (change to absolute path based on your installation) +hoodie.deltastreamer.schemaprovider.source.schema.file=/var/demo/config/schema.avsc +hoodie.deltastreamer.schemaprovider.target.schema.file=/var/demo/config/schema.avsc +# Kafka Source +hoodie.deltastreamer.source.kafka.topic=stock_ticks +#Kafka props +bootstrap.servers=kafkabroker:9092 +auto.offset.reset=earliest diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/log4j2.properties b/docker/thirdparties/docker-compose/hudi/scripts/config/log4j2.properties new file mode 100644 index 0000000000..86450ead3e --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/config/log4j2.properties @@ -0,0 +1,61 @@ +### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +### +status = warn +name = HudiConsoleLog + +# Set everything to be logged to the console +appender.console.type = Console +appender.console.name = CONSOLE +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + +# Root logger level +rootLogger.level = warn +# Root logger referring to console appender +rootLogger.appenderRef.stdout.ref = CONSOLE + +# Set the default spark-shell log level to WARN. When running the spark-shell, the +# log level for this class is used to overwrite the root logger's log level, so that +# the user can have different defaults for the shell and regular Spark apps. +logger.apache_spark_repl.name = org.apache.spark.repl.Main +logger.apache_spark_repl.level = warn +# Set logging of integration testsuite to INFO level +logger.hudi_integ.name = org.apache.hudi.integ.testsuite +logger.hudi_integ.level = info +# Settings to quiet third party logs that are too verbose +logger.apache_spark_jetty.name = org.spark_project.jetty +logger.apache_spark_jetty.level = warn +logger.apache_spark_jett_lifecycle.name = org.spark_project.jetty.util.component.AbstractLifeCycle +logger.apache_spark_jett_lifecycle.level = error +logger.apache_spark_repl_imain.name = org.apache.spark.repl.SparkIMain$exprTyper +logger.apache_spark_repl_imain.level = info +logger.apache_spark_repl_iloop.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter +logger.apache_spark_repl_iloop.level = info +logger.parquet.name = org.apache.parquet +logger.parquet.level = error +logger.spark.name = org.apache.spark +logger.spark.level = warn +# Disabling Jetty logs +logger.jetty.name = org.apache.hudi.org.eclipse.jetty +logger.jetty.level = error +# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support +logger.hive_handler.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler +logger.hive_handler.level = fatal +logger.hive_func_registry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry +logger.hive_func_registry.level = error diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/schema.avsc b/docker/thirdparties/docker-compose/hudi/scripts/config/schema.avsc new file mode 100644 index 0000000000..aa8baaf44b --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/config/schema.avsc @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +{ + "type":"record", + "name":"stock_ticks", + "fields":[{ + "name": "volume", + "type": "long" + }, { + "name": "ts", + "type": "string" + }, { + "name": "symbol", + "type": "string" + },{ + "name": "year", + "type": "int" + },{ + "name": "month", + "type": "string" + },{ + "name": "high", + "type": "double" + },{ + "name": "low", + "type": "double" + },{ + "name": "key", + "type": "string" + },{ + "name": "date", + "type":"string" + }, { + "name": "close", + "type": "double" + }, { + "name": "open", + "type": "double" + }, { + "name": "day", + "type":"string" + } +]} diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/spark-defaults.conf b/docker/thirdparties/docker-compose/hudi/scripts/config/spark-defaults.conf new file mode 100644 index 0000000000..d085bfe588 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/config/spark-defaults.conf @@ -0,0 +1,30 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Default system properties included when running spark-submit. +# This is useful for setting default environmental settings. + +# Example: +spark.master local[3] +spark.eventLog.dir hdfs://namenode:8020/tmp/spark-events +spark.serializer org.apache.spark.serializer.KryoSerializer +spark.kryo.registrator org.apache.spark.HoodieSparkKryoRegistrar + +#spark.executor.memory 4g +# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" diff --git a/docker/thirdparties/docker-compose/hudi/scripts/run_sync_tool.sh b/docker/thirdparties/docker-compose/hudi/scripts/run_sync_tool.sh new file mode 100755 index 0000000000..390d09f967 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/run_sync_tool.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +function error_exit { + echo "$1" >&2 ## Send message to stderr. Exclude >&2 if you don't want it that way. + exit "${2:-1}" ## Return a code specified by $2 or 1 by default. +} + +if [ -z "${HADOOP_HOME}" ]; then + error_exit "Please make sure the environment variable HADOOP_HOME is setup" +fi + +if [ -z "${HIVE_HOME}" ]; then + error_exit "Please make sure the environment variable HIVE_HOME is setup" +fi + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +#Ensure we pick the right jar even for hive11 builds +HUDI_HIVE_UBER_JAR=`ls -c $DIR/./hudi_docker_compose_attached_file/jar/hoodie-hive-sync-bundle.jar | grep -v source | head -1` + +if [ -z "$HADOOP_CONF_DIR" ]; then + echo "setting hadoop conf dir" + HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop" +fi + +## Include only specific packages from HIVE_HOME/lib to avoid version mismatches +HIVE_EXEC=`ls ${HIVE_HOME}/lib/hive-exec-*.jar | tr '\n' ':'` +HIVE_SERVICE=`ls ${HIVE_HOME}/lib/hive-service-*.jar | grep -v rpc | tr '\n' ':'` +HIVE_METASTORE=`ls ${HIVE_HOME}/lib/hive-metastore-*.jar | tr '\n' ':'` +HIVE_JDBC=`ls ${HIVE_HOME}/lib/hive-jdbc-*.jar | tr '\n' ':'` +if [ -z "${HIVE_JDBC}" ]; then + HIVE_JDBC=`ls ${HIVE_HOME}/lib/hive-jdbc-*.jar | grep -v handler | tr '\n' ':'` +fi +HIVE_JACKSON=`ls ${HIVE_HOME}/lib/jackson-*.jar | tr '\n' ':'` +HIVE_JARS=$HIVE_METASTORE:$HIVE_SERVICE:$HIVE_EXEC:$HIVE_JDBC:$HIVE_JACKSON + +HADOOP_HIVE_JARS=${HIVE_JARS}:${HADOOP_HOME}/share/hadoop/common/*:${HADOOP_HOME}/share/hadoop/mapreduce/*:${HADOOP_HOME}/share/hadoop/hdfs/*:${HADOOP_HOME}/share/hadoop/common/lib/*:${HADOOP_HOME}/share/hadoop/hdfs/lib/* + +echo "Running Command : java -cp ${HADOOP_HIVE_JARS}:${HADOOP_CONF_DIR}:$HUDI_HIVE_UBER_JAR org.apache.hudi.hive.HiveSyncTool $@" +java -cp $HUDI_HIVE_UBER_JAR:${HADOOP_HIVE_JARS}:${HADOOP_CONF_DIR} org.apache.hudi.hive.HiveSyncTool "$@" diff --git a/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_1.sh b/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_1.sh new file mode 100755 index 0000000000..a5edb7676a --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_1.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +echo "Copying spark default config and setting up configs" +cp /var/scripts/config/spark-defaults.conf $SPARK_CONF_DIR/. +cp /var/scripts/config/log4j2.properties $SPARK_CONF_DIR/. +echo "sleep 10, wait hdfs start" +sleep 10 +echo "hadoop fs -mkdir -p /var/demo/" +hadoop fs -mkdir -p /var/demo/ +echo "hadoop fs -mkdir -p /tmp/spark-events" +hadoop fs -mkdir -p /tmp/spark-events +echo "hadoop fs -copyFromLocal -f /var/scripts/config /var/demo/." +hadoop fs -copyFromLocal -f /var/scripts/config /var/demo/. +echo "chmod +x /var/scripts/run_sync_tool.sh" +chmod +x /var/scripts/run_sync_tool.sh diff --git a/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_2.sh b/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_2.sh new file mode 100755 index 0000000000..a55dddd86d --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_2.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +echo "Copying spark default config and setting up configs" +cp /var/scripts/config/spark-defaults.conf $SPARK_CONF_DIR/. +cp /var/scripts/config/log4j2.properties $SPARK_CONF_DIR/. +echo "sleep 10, wait hdfs start" +sleep 10 +echo "hadoop fs -mkdir -p /var/demo/" +hadoop fs -mkdir -p /var/demo/ +echo "hadoop fs -mkdir -p /tmp/spark-events" +hadoop fs -mkdir -p /tmp/spark-events +echo "hadoop fs -mkdir -p /user/hive/" +hadoop fs -mkdir -p /user/hive/ +echo "hadoop fs -copyFromLocal -f /var/scripts/config /var/demo/." +hadoop fs -copyFromLocal -f /var/scripts/config /var/demo/. +echo "hadoop fs -copyFromLocal -f /var/scripts/hudi_docker_compose_attached_file/warehouse /user/hive/" +hadoop fs -copyFromLocal -f /var/scripts/hudi_docker_compose_attached_file/warehouse /user/hive/ +echo "chmod +x /var/scripts/run_sync_tool.sh" +chmod +x /var/scripts/run_sync_tool.sh + +echo "Start synchronizing the stock_ticks_cow table" +/var/scripts/run_sync_tool.sh \ + --jdbc-url jdbc:hive2://hiveserver:10000 \ + --user hive \ + --pass hive \ + --partitioned-by date \ + --base-path /user/hive/warehouse/stock_ticks_cow \ + --database default \ + --table stock_ticks_cow \ + --partition-value-extractor org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor + +echo "Start synchronizing the stock_ticks_mor table" +/var/scripts/run_sync_tool.sh \ + --jdbc-url jdbc:hive2://hiveserver:10000 \ + --user hive \ + --pass hive \ + --partitioned-by date \ + --base-path /user/hive/warehouse/stock_ticks_mor \ + --database default \ + --table stock_ticks_mor \ + --partition-value-extractor org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor + +echo "Start synchronizing the hudi_cow_pt_tbl table" +/var/scripts/run_sync_tool.sh \ + --jdbc-url jdbc:hive2://hiveserver:10000 \ + --user hive \ + --pass hive \ + --partitioned-by dt \ + --base-path /user/hive/warehouse/hudi_cow_pt_tbl \ + --database default \ + --table hudi_cow_pt_tbl \ + --partition-value-extractor org.apache.hudi.hive.HiveStylePartitionValueExtractor + +echo "Start synchronizing the hudi_non_part_cow table" +/var/scripts/run_sync_tool.sh \ + --jdbc-url jdbc:hive2://hiveserver:10000 \ + --user hive \ + --pass hive \ + --base-path /user/hive/warehouse/hudi_non_part_cow \ + --database default \ + --table hudi_non_part_cow \ diff --git a/docker/thirdparties/run-thirdparties-docker.sh b/docker/thirdparties/run-thirdparties-docker.sh index 1851b03051..283ed7b35b 100755 --- a/docker/thirdparties/run-thirdparties-docker.sh +++ b/docker/thirdparties/run-thirdparties-docker.sh @@ -37,7 +37,7 @@ Usage: $0 <options> --stop stop the specified components All valid components: - mysql,pg,oracle,sqlserver,clickhouse,es,hive,iceberg + mysql,pg,oracle,sqlserver,clickhouse,es,hive,iceberg,hudi " exit 1 } @@ -60,7 +60,7 @@ STOP=0 if [[ "$#" == 1 ]]; then # default - COMPONENTS="mysql,pg,oracle,sqlserver,clickhouse,hive,iceberg" + COMPONENTS="mysql,pg,oracle,sqlserver,clickhouse,hive,iceberg,hudi" else while true; do case "$1" in @@ -92,7 +92,7 @@ else done if [[ "${COMPONENTS}"x == ""x ]]; then if [[ "${STOP}" -eq 1 ]]; then - COMPONENTS="mysql,pg,oracle,sqlserver,clickhouse,hive,iceberg" + COMPONENTS="mysql,pg,oracle,sqlserver,clickhouse,hive,iceberg,hudi" fi fi fi @@ -128,6 +128,7 @@ RUN_CLICKHOUSE=0 RUN_HIVE=0 RUN_ES=0 RUN_ICEBERG=0 +RUN_HUDI=0 for element in "${COMPONENTS_ARR[@]}"; do if [[ "${element}"x == "mysql"x ]]; then RUN_MYSQL=1 @@ -145,6 +146,8 @@ for element in "${COMPONENTS_ARR[@]}"; do RUN_HIVE=1 elif [[ "${element}"x == "iceberg"x ]]; then RUN_ICEBERG=1 + elif [[ "${element}"x == "hudi"x ]]; then + RUN_HUDI=1 else echo "Invalid component: ${element}" usage @@ -265,3 +268,25 @@ if [[ "${RUN_ICEBERG}" -eq 1 ]]; then sudo docker compose -f "${ROOT}"/docker-compose/iceberg/iceberg.yaml --env-file "${ROOT}"/docker-compose/iceberg/iceberg.env up -d fi fi + +if [[ "${RUN_HUDI}" -eq 1 ]]; then + # hudi + cp "${ROOT}"/docker-compose/hudi/hudi.yaml.tpl "${ROOT}"/docker-compose/hudi/hudi.yaml + sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/hudi/hudi.yaml + sudo docker compose -f "${ROOT}"/docker-compose/hudi/hudi.yaml --env-file "${ROOT}"/docker-compose/hudi/hadoop.env down + if [[ "${STOP}" -ne 1 ]]; then + sudo rm -rf "${ROOT}"/docker-compose/hudi/historyserver + sudo mkdir "${ROOT}"/docker-compose/hudi/historyserver + sudo rm -rf "${ROOT}"/docker-compose/hudi/hive-metastore-postgresql + sudo mkdir "${ROOT}"/docker-compose/hudi/hive-metastore-postgresql + if [[ ! -d "${ROOT}/docker-compose/hudi/scripts/hudi_docker_compose_attached_file" ]]; then + echo "Attached files does not exist, please download the https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/load/hudi/hudi_docker_compose_attached_file.zip file to the docker-compose/hudi/scripts/ directory and unzip it." + exit 1 + fi + sudo docker compose -f "${ROOT}"/docker-compose/hudi/hudi.yaml --env-file "${ROOT}"/docker-compose/hudi/hadoop.env up -d + echo "sleep 15, wait server start" + sleep 15 + docker exec -it adhoc-1 /bin/bash /var/scripts/setup_demo_container_adhoc_1.sh + docker exec -it adhoc-2 /bin/bash /var/scripts/setup_demo_container_adhoc_2.sh + fi +fi diff --git a/docs/zh-CN/community/developer-guide/regression-testing.md b/docs/zh-CN/community/developer-guide/regression-testing.md index 48c6de8f7c..3617b4d769 100644 --- a/docs/zh-CN/community/developer-guide/regression-testing.md +++ b/docs/zh-CN/community/developer-guide/regression-testing.md @@ -605,10 +605,10 @@ Doris 支持一些外部署数据源的查询。所以回归框架也提供了 1. 启动 Container - Doris 目前支持 es, mysql, pg, hive, sqlserver, oracle, iceberg 等数据源的 Docker compose。相关文件存放在 `docker/thirdparties/docker-compose` 目录下。 + Doris 目前支持 es, mysql, pg, hive, sqlserver, oracle, iceberg, hudi 等数据源的 Docker compose。相关文件存放在 `docker/thirdparties/docker-compose` 目录下。 默认情况下,可以直接通过以下命令启动所有外部数据源的 Docker container: - (注意,hive container 需要下载预制的数据文件,请参阅下面 hive 相关的文档。) + (注意,hive和hudi container 需要下载预制的数据文件,请参阅下面 hive和hudi 相关的文档。) ``` cd docker/thirdparties && sh run-thirdparties-docker.sh @@ -692,48 +692,109 @@ Doris 支持一些外部署数据源的查询。所以回归框架也提供了 * `clickhouse.yaml.tpl`:Docker compose 文件模板。无需修改。 * `clickhouse.env`:配置 ClickHouse 对外端口,默认为 8123。 - 8. Iceberg + 8. Iceberg - 提供 Iceberg + Spark + Minio 镜像组合。存放在 docker/thirdparties/docker-compose/iceberg/ 下。 + 提供 Iceberg + Spark + Minio 镜像组合。存放在 docker/thirdparties/docker-compose/iceberg/ 下。 - * `iceberg.yaml.tpl`:Docker compose 文件模板。无需修改。 - * `entrypoint.sh.tpl`:镜像启动后的初始化脚本模板。无需修改。 - * `spark-defaults.conf.tpl`:Spark 的配置文件模板。无需修改。 - * `iceberg.env`:对外端口配置文件,需修改各个对外端口,避免端口冲突。 + * `iceberg.yaml.tpl`:Docker compose 文件模板。无需修改。 + * `entrypoint.sh.tpl`:镜像启动后的初始化脚本模板。无需修改。 + * `spark-defaults.conf.tpl`:Spark 的配置文件模板。无需修改。 + * `iceberg.env`:对外端口配置文件,需修改各个对外端口,避免端口冲突。 - 启动后,可以通过如下命令启动 spark-sql + 启动后,可以通过如下命令启动 spark-sql - `docker exec -it doris-xx-spark-iceberg spark-sql` + `docker exec -it doris-xx-spark-iceberg spark-sql` - 其中 `doris-xx-spark-iceberg` 为 container 名称。 + 其中 `doris-xx-spark-iceberg` 为 container 名称。 - spark-sql iceberg 操作示例: + spark-sql iceberg 操作示例: - ``` - create database db1; - show databases; - create table db1.test1(k1 bigint, k2 bigint, k3 string) partitioned by (k1); - insert into db1.test1 values(1,2,'abc'); - select * from db1.test1; - quit; - ``` + ``` + create database db1; + show databases; + create table db1.test1(k1 bigint, k2 bigint, k3 string) partitioned by (k1); + insert into db1.test1 values(1,2,'abc'); + select * from db1.test1; + quit; + ``` - 也可以通过 spark-shell 进行访问: + 也可以通过 spark-shell 进行访问: - ``` - docker exec -it doris-xx-spark-iceberg spark-shell - - spark.sql(s"create database db1") - spark.sql(s"show databases").show() - spark.sql(s"create table db1.test1(k1 bigint, k2 bigint, k3 string) partitioned by (k1)").show() - spark.sql(s"show tables from db1").show() - spark.sql(s"insert into db1.test1 values(1,2,'abc')").show() - spark.sql(s"select * from db1.test1").show() - :q - ``` + ``` + docker exec -it doris-xx-spark-iceberg spark-shell + + spark.sql(s"create database db1") + spark.sql(s"show databases").show() + spark.sql(s"create table db1.test1(k1 bigint, k2 bigint, k3 string) partitioned by (k1)").show() + spark.sql(s"show tables from db1").show() + spark.sql(s"insert into db1.test1 values(1,2,'abc')").show() + spark.sql(s"select * from db1.test1").show() + :q + ``` + + 更多使用方式可参阅 [Tabular 官方文档](https://tabular.io/blog/docker-spark-and-iceberg/)。 + 9. Hudi - 更多使用方式可参阅 [Tabular 官方文档](https://tabular.io/blog/docker-spark-and-iceberg/)。 + Hudi 相关的 Docker compose 文件存放在 docker/thirdparties/docker-compose/hudi 下。 + * `hudi.yaml.tpl`:Docker compose 文件模板,无需修改。 + * `hadoop.env`:配置文件的模板,无需修改。 + * `scripts/` 目录会在 container 启动后挂载到 container 中。其中的文件内容无需修改。但须注意,在启动 container 之前,需要先下载预制文件: + 将 `https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/load/hudi/hudi_docker_compose_attached_file.zip` 文件下载到 `scripts/` 目录并解压即可。 + + * + 启动前,可以将以下设置添加到`/etc/hosts`中,以避免出现`UnknownHostException`错误 + ``` + 127.0.0.1 adhoc-1 + 127.0.0.1 adhoc-2 + 127.0.0.1 namenode + 127.0.0.1 datanode1 + 127.0.0.1 hiveserver + 127.0.0.1 hivemetastore + 127.0.0.1 sparkmaster + ``` + + 启动后,可以通过如下命令启动 hive query + + ``` + docker exec -it adhoc-2 /bin/bash + + beeline -u jdbc:hive2://hiveserver:10000 \ + --hiveconf hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat \ + --hiveconf hive.stats.autogather=false + + show tables; + show partitions stock_ticks_mor_rt; + select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = 'GOOG'; + select symbol, max(ts) from stock_ticks_mor_ro group by symbol HAVING symbol = 'GOOG'; + exit; + ``` + + 也可以通过 spark-shell 进行访问: + + ``` + docker exec -it adhoc-1 /bin/bash + + $SPARK_INSTALL/bin/spark-shell \ + --jars /var/scripts/hudi_docker_compose_attached_file/jar/hoodie-hive-sync-bundle.jar \ + --master local[2] \ + --driver-class-path $HADOOP_CONF_DIR \ + --conf spark.sql.hive.convertMetastoreParquet=false \ + --deploy-mode client \ + --driver-memory 1G \ + --executor-memory 3G \ + --num-executors 1 + + spark.sql("show tables").show(100, false) + spark.sql("select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = 'GOOG'").show(100, false) + spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow where symbol = 'GOOG'").show(100, false) + spark.sql("select symbol, max(ts) from stock_ticks_mor_ro group by symbol HAVING symbol = 'GOOG'").show(100, false) + spark.sql("select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG'").show(100, false) + spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = 'GOOG'").show(100, false) + :q + ``` + + 更多使用方式可参阅 [Hudi 官方文档](https://hudi.apache.org/docs/docker_demo)。 2. 运行回归测试 外表相关的回归测试默认是关闭的,可以修改 `regression-test/conf/regression-conf.groovy` 中的以下配置来开启: --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org