This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 224bca3794 [docker](hudi) add hudi docker compose (#19048)
224bca3794 is described below

commit 224bca379470b3a396d40e680dd40436c9f6d2b0
Author: hechao <73096722+hechao-u...@users.noreply.github.com>
AuthorDate: Tue May 2 09:54:52 2023 +0800

    [docker](hudi) add hudi docker compose (#19048)
---
 docker/thirdparties/docker-compose/hudi/hadoop.env |  52 ++++
 .../thirdparties/docker-compose/hudi/hudi.yaml.tpl | 267 +++++++++++++++++++++
 .../hudi/scripts/config/base.properties            |  25 ++
 .../hudi/scripts/config/dfs-source.properties      |  31 +++
 .../hudi/scripts/config/hoodie-incr.properties     |  34 +++
 .../hudi/scripts/config/hoodie-schema.avsc         | 146 +++++++++++
 .../hudi/scripts/config/kafka-source.properties    |  30 +++
 .../hudi/scripts/config/log4j2.properties          |  61 +++++
 .../docker-compose/hudi/scripts/config/schema.avsc |  59 +++++
 .../hudi/scripts/config/spark-defaults.conf        |  30 +++
 .../docker-compose/hudi/scripts/run_sync_tool.sh   |  56 +++++
 .../hudi/scripts/setup_demo_container_adhoc_1.sh   |  31 +++
 .../hudi/scripts/setup_demo_container_adhoc_2.sh   |  77 ++++++
 docker/thirdparties/run-thirdparties-docker.sh     |  31 ++-
 .../developer-guide/regression-testing.md          | 127 +++++++---
 15 files changed, 1021 insertions(+), 36 deletions(-)

diff --git a/docker/thirdparties/docker-compose/hudi/hadoop.env 
b/docker/thirdparties/docker-compose/hudi/hadoop.env
new file mode 100644
index 0000000000..28ef46c3eb
--- /dev/null
+++ b/docker/thirdparties/docker-compose/hudi/hadoop.env
@@ -0,0 +1,52 @@
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql/metastore
+HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver
+HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive
+HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive
+HIVE_SITE_CONF_datanucleus_autoCreateSchema=false
+HIVE_SITE_CONF_hive_metastore_uris=thrift://hivemetastore:9083
+
+HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
+HDFS_CONF_dfs_webhdfs_enabled=true
+HDFS_CONF_dfs_permissions_enabled=false
+#HDFS_CONF_dfs_client_use_datanode_hostname=true
+#HDFS_CONF_dfs_namenode_use_datanode_hostname=true
+HDFS_CONF_dfs_replication=1
+
+CORE_CONF_fs_defaultFS=hdfs://namenode:8020
+CORE_CONF_hadoop_http_staticuser_user=root
+CORE_CONF_hadoop_proxyuser_hue_hosts=*
+CORE_CONF_hadoop_proxyuser_hue_groups=*
+
+YARN_CONF_yarn_log___aggregation___enable=true
+YARN_CONF_yarn_resourcemanager_recovery_enabled=true
+YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
+YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
+YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
+YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
+YARN_CONF_yarn_timeline___service_enabled=true
+YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
+YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
+YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
+YARN_CONF_yarn_timeline___service_hostname=historyserver
+YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
+YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
+YARN_CONF_yarn_resourcemanager_resource___tracker_address=resourcemanager:8031
+YARN_CONF_yarn_nodemanager_vmem___check___enabled=false
diff --git a/docker/thirdparties/docker-compose/hudi/hudi.yaml.tpl 
b/docker/thirdparties/docker-compose/hudi/hudi.yaml.tpl
new file mode 100644
index 0000000000..f0878e452b
--- /dev/null
+++ b/docker/thirdparties/docker-compose/hudi/hudi.yaml.tpl
@@ -0,0 +1,267 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+version: "3.3"
+
+networks:
+  doris--hudi:
+    driver: bridge
+
+services:
+
+  namenode:
+    image: apachehudi/hudi-hadoop_2.8.4-namenode:latest
+    hostname: namenode
+    container_name: namenode
+    environment:
+      - CLUSTER_NAME=hudi_hadoop284_hive232_spark244
+    ports:
+      - "50070:50070"
+      - "8020:8020"
+      # JVM debugging port (will be mapped to a random port on host)
+      - "5005"
+    env_file:
+      - ./hadoop.env
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://namenode:50070";]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+    networks:
+      - doris--hudi
+
+  datanode1:
+    image: apachehudi/hudi-hadoop_2.8.4-datanode:latest
+    container_name: datanode1
+    hostname: datanode1
+    environment:
+      - CLUSTER_NAME=hudi_hadoop284_hive232_spark244
+    env_file:
+      - ./hadoop.env
+    ports:
+      - "50075:50075"
+      - "50010:50010"
+      # JVM debugging port (will be mapped to a random port on host)
+      - "5005"
+    links:
+      - "namenode"
+      - "historyserver"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://datanode1:50075";]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+    depends_on:
+      - namenode
+    networks:
+      - doris--hudi
+
+  historyserver:
+    image: apachehudi/hudi-hadoop_2.8.4-history:latest
+    hostname: historyserver
+    container_name: historyserver
+    environment:
+      - CLUSTER_NAME=hudi_hadoop284_hive232_spark244
+    depends_on:
+      - "namenode"
+    links:
+      - "namenode"
+    ports:
+      - "58188:8188"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://historyserver:8188";]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+    env_file:
+      - ./hadoop.env
+    volumes:
+      - ./historyserver:/hadoop/yarn/timeline
+    networks:
+      - doris--hudi
+
+  hive-metastore-postgresql:
+    image: bde2020/hive-metastore-postgresql:2.3.0
+    volumes:
+      - ./hive-metastore-postgresql:/var/lib/postgresql
+    hostname: hive-metastore-postgresql
+    container_name: hive-metastore-postgresql
+    networks:
+      - doris--hudi
+
+  hivemetastore:
+    image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:latest
+    hostname: hivemetastore
+    container_name: hivemetastore
+    links:
+      - "hive-metastore-postgresql"
+      - "namenode"
+    env_file:
+      - ./hadoop.env
+    command: /opt/hive/bin/hive --service metastore
+    environment:
+      SERVICE_PRECONDITION: "namenode:50070 hive-metastore-postgresql:5432"
+    ports:
+      - "9083:9083"
+      # JVM debugging port (will be mapped to a random port on host)
+      - "5005"
+    healthcheck:
+      test: ["CMD", "nc", "-z", "hivemetastore", "9083"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+    depends_on:
+      - "hive-metastore-postgresql"
+      - "namenode"
+    networks:
+      - doris--hudi
+
+  hiveserver:
+    image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:latest
+    hostname: hiveserver
+    container_name: hiveserver
+    env_file:
+      - ./hadoop.env
+    environment:
+      SERVICE_PRECONDITION: "hivemetastore:9083"
+    ports:
+      - "10000:10000"
+      # JVM debugging port (will be mapped to a random port on host)
+      - "5005"
+    depends_on:
+      - "hivemetastore"
+    links:
+      - "hivemetastore"
+      - "hive-metastore-postgresql"
+      - "namenode"
+    volumes:
+      - ./scripts:/var/scripts
+    networks:
+      - doris--hudi
+
+  sparkmaster:
+    image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkmaster_2.4.4:latest
+    hostname: sparkmaster
+    container_name: sparkmaster
+    env_file:
+      - ./hadoop.env
+    ports:
+      - "8080:8080"
+      - "7077:7077"
+      # JVM debugging port (will be mapped to a random port on host)
+      - "5005"
+    environment:
+      - INIT_DAEMON_STEP=setup_spark
+    links:
+      - "hivemetastore"
+      - "hiveserver"
+      - "hive-metastore-postgresql"
+      - "namenode"
+    networks:
+      - doris--hudi
+
+  spark-worker-1:
+    image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkworker_2.4.4:latest
+    hostname: spark-worker-1
+    container_name: spark-worker-1
+    env_file:
+      - ./hadoop.env
+    depends_on:
+      - sparkmaster
+    ports:
+      - "8081:8081"
+      # JVM debugging port (will be mapped to a random port on host)
+      - "5005"
+    environment:
+      - "SPARK_MASTER=spark://sparkmaster:7077"
+    links:
+      - "hivemetastore"
+      - "hiveserver"
+      - "hive-metastore-postgresql"
+      - "namenode"
+    networks:
+      - doris--hudi
+
+#  zookeeper:
+#    image: 'bitnami/zookeeper:3.4.12-r68'
+#    hostname: zookeeper
+#    container_name: zookeeper
+#    ports:
+#      - "2181:2181"
+#    environment:
+#      - ALLOW_ANONYMOUS_LOGIN=yes
+#    networks:
+#      - doris--hudi
+
+#  kafka:
+#    image: 'bitnami/kafka:2.0.0'
+#    hostname: kafkabroker
+#    container_name: kafkabroker
+#    ports:
+#      - "9092:9092"
+#    environment:
+#      - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
+#      - ALLOW_PLAINTEXT_LISTENER=yes
+#    networks:
+#      - doris--hudi
+
+  adhoc-1:
+    image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest
+    hostname: adhoc-1
+    container_name: adhoc-1
+    env_file:
+      - ./hadoop.env
+    depends_on:
+      - sparkmaster
+    ports:
+      - '4040:4040'
+      # JVM debugging port (mapped to 5006 on the host)
+      - "5006:5005"
+    environment:
+      - "SPARK_MASTER=spark://sparkmaster:7077"
+    links:
+      - "hivemetastore"
+      - "hiveserver"
+      - "hive-metastore-postgresql"
+      - "namenode"
+    volumes:
+      - ./scripts:/var/scripts
+    networks:
+      - doris--hudi
+
+  adhoc-2:
+    image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest
+    hostname: adhoc-2
+    container_name: adhoc-2
+    env_file:
+      - ./hadoop.env
+    ports:
+      # JVM debugging port (mapped to 5005 on the host)
+      - "5005:5005"
+    depends_on:
+      - sparkmaster
+    environment:
+      - "SPARK_MASTER=spark://sparkmaster:7077"
+    links:
+      - "hivemetastore"
+      - "hiveserver"
+      - "hive-metastore-postgresql"
+      - "namenode"
+    volumes:
+      - ./scripts:/var/scripts
+    networks:
+      - doris--hudi
diff --git 
a/docker/thirdparties/docker-compose/hudi/scripts/config/base.properties 
b/docker/thirdparties/docker-compose/hudi/scripts/config/base.properties
new file mode 100644
index 0000000000..0666245758
--- /dev/null
+++ b/docker/thirdparties/docker-compose/hudi/scripts/config/base.properties
@@ -0,0 +1,25 @@
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+hoodie.upsert.shuffle.parallelism=2
+hoodie.insert.shuffle.parallelism=2
+hoodie.delete.shuffle.parallelism=2
+hoodie.bulkinsert.shuffle.parallelism=2
+hoodie.embed.timeline.server=true
+hoodie.filesystem.view.type=EMBEDDED_KV_STORE
+hoodie.compact.inline=false
diff --git 
a/docker/thirdparties/docker-compose/hudi/scripts/config/dfs-source.properties 
b/docker/thirdparties/docker-compose/hudi/scripts/config/dfs-source.properties
new file mode 100644
index 0000000000..04c16e272a
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hudi/scripts/config/dfs-source.properties
@@ -0,0 +1,31 @@
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+include=base.properties
+# Key fields, for kafka example
+hoodie.datasource.write.recordkey.field=key
+hoodie.datasource.write.partitionpath.field=date
+# NOTE: We have to duplicate configuration since this is being used
+#       w/ both Spark and DeltaStreamer
+hoodie.table.recordkey.fields=key
+hoodie.table.partition.fields=date
+# Schema provider props (change to absolute path based on your installation)
+hoodie.deltastreamer.schemaprovider.source.schema.file=/var/demo/config/schema.avsc
+hoodie.deltastreamer.schemaprovider.target.schema.file=/var/demo/config/schema.avsc
+# DFS Source
+hoodie.deltastreamer.source.dfs.root=/usr/hive/data/input/
diff --git 
a/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-incr.properties 
b/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-incr.properties
new file mode 100644
index 0000000000..c796063ff1
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-incr.properties
@@ -0,0 +1,34 @@
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+hoodie.upsert.shuffle.parallelism=2
+hoodie.insert.shuffle.parallelism=2
+hoodie.delete.shuffle.parallelism=2
+hoodie.bulkinsert.shuffle.parallelism=2
+hoodie.datasource.write.recordkey.field=_row_key
+hoodie.datasource.write.partitionpath.field=partition
+hoodie.deltastreamer.schemaprovider.source.schema.file=file:///var/hoodie/ws/docker/demo/config/hoodie-schema.avsc
+hoodie.deltastreamer.schemaprovider.target.schema.file=file:///var/hoodie/ws/docker/demo/config/hoodie-schema.avsc
+hoodie.deltastreamer.source.hoodieincr.partition.fields=partition
+hoodie.deltastreamer.source.hoodieincr.path=/docker_hoodie_sync_valid_test
+hoodie.deltastreamer.source.hoodieincr.read_latest_on_missing_ckpt=true
+# hive sync
+hoodie.datasource.hive_sync.table=docker_hoodie_sync_valid_test_2
+hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
+hoodie.datasource.hive_sync.partition_fields=partition
+hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
diff --git 
a/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-schema.avsc 
b/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-schema.avsc
new file mode 100644
index 0000000000..f97742c947
--- /dev/null
+++ b/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-schema.avsc
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+{
+    "type": "record",
+    "name": "triprec",
+    "fields": [
+        {
+            "name": "timestamp",
+            "type": "double"
+        },
+        {
+            "name": "_row_key",
+            "type": "string"
+        },
+        {
+            "name": "rider",
+            "type": "string"
+        },
+        {
+            "name": "driver",
+            "type": "string"
+        },
+        {
+            "name": "begin_lat",
+            "type": "double"
+        },
+        {
+            "name": "begin_lon",
+            "type": "double"
+        },
+        {
+            "name": "end_lat",
+            "type": "double"
+        },
+        {
+            "name": "end_lon",
+            "type": "double"
+        },
+        {
+            "name": "distance_in_meters",
+            "type": "int"
+        },
+        {
+            "name": "seconds_since_epoch",
+            "type": "long"
+        },
+        {
+            "name": "weight",
+            "type": "float"
+        },
+        {
+            "name": "nation",
+            "type": "bytes"
+        },
+        {
+            "name": "current_date",
+            "type": {
+                "type": "int",
+                "logicalType": "date"
+            }
+        },
+        {
+            "name": "current_ts",
+            "type": {
+                "type": "long",
+                "logicalType": "timestamp-micros"
+            }
+        },
+        {
+            "name": "height",
+            "type": {
+                "type": "fixed",
+                "name": "abc",
+                "size": 5,
+                "logicalType": "decimal",
+                "precision": 10,
+                "scale": 6
+            }
+        },
+        {
+            "name": "city_to_state",
+            "type": {
+                "type": "map",
+                "values": "string"
+            }
+        },
+        {
+            "name": "fare",
+            "type": {
+                "type": "record",
+                "name": "fare",
+                "fields": [
+                    {
+                        "name": "amount",
+                        "type": "double"
+                    },
+                    {
+                        "name": "currency",
+                        "type": "string"
+                    }
+                ]
+            }
+        },
+        {
+            "name": "tip_history",
+            "type": {
+                "type": "array",
+                "items": {
+                    "type": "record",
+                    "name": "tip_history",
+                    "fields": [
+                        {
+                            "name": "amount",
+                            "type": "double"
+                        },
+                        {
+                            "name": "currency",
+                            "type": "string"
+                        }
+                    ]
+                }
+            }
+        },
+        {
+            "name": "_hoodie_is_deleted",
+            "type": "boolean",
+            "default": false
+        }
+    ]
+}
diff --git 
a/docker/thirdparties/docker-compose/hudi/scripts/config/kafka-source.properties
 
b/docker/thirdparties/docker-compose/hudi/scripts/config/kafka-source.properties
new file mode 100644
index 0000000000..5ba5290ca6
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hudi/scripts/config/kafka-source.properties
@@ -0,0 +1,30 @@
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+include=base.properties
+# Key fields, for kafka example
+hoodie.datasource.write.recordkey.field=key
+hoodie.datasource.write.partitionpath.field=date
+# Schema provider props (change to absolute path based on your installation)
+hoodie.deltastreamer.schemaprovider.source.schema.file=/var/demo/config/schema.avsc
+hoodie.deltastreamer.schemaprovider.target.schema.file=/var/demo/config/schema.avsc
+# Kafka Source
+hoodie.deltastreamer.source.kafka.topic=stock_ticks
+#Kafka props
+bootstrap.servers=kafkabroker:9092
+auto.offset.reset=earliest
diff --git 
a/docker/thirdparties/docker-compose/hudi/scripts/config/log4j2.properties 
b/docker/thirdparties/docker-compose/hudi/scripts/config/log4j2.properties
new file mode 100644
index 0000000000..86450ead3e
--- /dev/null
+++ b/docker/thirdparties/docker-compose/hudi/scripts/config/log4j2.properties
@@ -0,0 +1,61 @@
+###
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+###
+status = warn
+name = HudiConsoleLog
+
+# Set everything to be logged to the console
+appender.console.type = Console
+appender.console.name = CONSOLE
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# Root logger level
+rootLogger.level = warn
+# Root logger referring to console appender
+rootLogger.appenderRef.stdout.ref = CONSOLE
+
+# Set the default spark-shell log level to WARN. When running the spark-shell, 
the
+# log level for this class is used to overwrite the root logger's log level, 
so that
+# the user can have different defaults for the shell and regular Spark apps.
+logger.apache_spark_repl.name = org.apache.spark.repl.Main
+logger.apache_spark_repl.level = warn
+# Set logging of integration testsuite to INFO level
+logger.hudi_integ.name = org.apache.hudi.integ.testsuite
+logger.hudi_integ.level = info
+# Settings to quiet third party logs that are too verbose
+logger.apache_spark_jetty.name = org.spark_project.jetty
+logger.apache_spark_jetty.level = warn
+logger.apache_spark_jett_lifecycle.name = 
org.spark_project.jetty.util.component.AbstractLifeCycle
+logger.apache_spark_jett_lifecycle.level = error
+logger.apache_spark_repl_imain.name = 
org.apache.spark.repl.SparkIMain$exprTyper
+logger.apache_spark_repl_imain.level = info
+logger.apache_spark_repl_iloop.name = 
org.apache.spark.repl.SparkILoop$SparkILoopInterpreter
+logger.apache_spark_repl_iloop.level = info
+logger.parquet.name = org.apache.parquet
+logger.parquet.level = error
+logger.spark.name = org.apache.spark
+logger.spark.level = warn
+# Disabling Jetty logs
+logger.jetty.name = org.apache.hudi.org.eclipse.jetty
+logger.jetty.level = error
+# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent 
UDFs in SparkSQL with Hive support
+logger.hive_handler.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler
+logger.hive_handler.level = fatal
+logger.hive_func_registry.name = 
org.apache.hadoop.hive.ql.exec.FunctionRegistry
+logger.hive_func_registry.level = error
diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/schema.avsc 
b/docker/thirdparties/docker-compose/hudi/scripts/config/schema.avsc
new file mode 100644
index 0000000000..aa8baaf44b
--- /dev/null
+++ b/docker/thirdparties/docker-compose/hudi/scripts/config/schema.avsc
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+{
+  "type":"record",
+  "name":"stock_ticks",
+  "fields":[{
+     "name": "volume",
+     "type": "long"
+  }, {
+     "name": "ts", 
+     "type": "string"
+  }, {
+     "name": "symbol", 
+     "type": "string"
+  },{
+     "name": "year", 
+     "type": "int"
+  },{
+     "name": "month", 
+     "type": "string"
+  },{
+     "name": "high", 
+     "type": "double"
+  },{
+     "name": "low", 
+     "type": "double"
+  },{
+     "name": "key", 
+     "type": "string"
+  },{
+     "name": "date", 
+     "type":"string"
+  }, {
+     "name": "close", 
+     "type": "double"
+  }, {
+     "name": "open", 
+     "type": "double"
+  }, {
+     "name": "day", 
+     "type":"string"
+  }
+]}
diff --git 
a/docker/thirdparties/docker-compose/hudi/scripts/config/spark-defaults.conf 
b/docker/thirdparties/docker-compose/hudi/scripts/config/spark-defaults.conf
new file mode 100644
index 0000000000..d085bfe588
--- /dev/null
+++ b/docker/thirdparties/docker-compose/hudi/scripts/config/spark-defaults.conf
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Default system properties included when running spark-submit.
+# This is useful for setting default environmental settings.
+
+# Example:
+spark.master                     local[3]
+spark.eventLog.dir               hdfs://namenode:8020/tmp/spark-events
+spark.serializer                 org.apache.spark.serializer.KryoSerializer
+spark.kryo.registrator           org.apache.spark.HoodieSparkKryoRegistrar
+
+#spark.executor.memory            4g
+# spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value 
-Dnumbers="one two three"
diff --git a/docker/thirdparties/docker-compose/hudi/scripts/run_sync_tool.sh 
b/docker/thirdparties/docker-compose/hudi/scripts/run_sync_tool.sh
new file mode 100755
index 0000000000..390d09f967
--- /dev/null
+++ b/docker/thirdparties/docker-compose/hudi/scripts/run_sync_tool.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+function error_exit {
+    echo "$1" >&2   ## Send message to stderr. Exclude >&2 if you don't want 
it that way.
+    exit "${2:-1}"  ## Return a code specified by $2 or 1 by default.
+}
+
+if [ -z "${HADOOP_HOME}" ]; then
+  error_exit "Please make sure the environment variable HADOOP_HOME is setup"
+fi
+
+if [ -z "${HIVE_HOME}" ]; then
+  error_exit "Please make sure the environment variable HIVE_HOME is setup"
+fi
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+#Ensure we pick the right jar even for hive11 builds
+HUDI_HIVE_UBER_JAR=`ls -c 
$DIR/./hudi_docker_compose_attached_file/jar/hoodie-hive-sync-bundle.jar | grep 
-v source | head -1`
+
+if [ -z "$HADOOP_CONF_DIR" ]; then
+  echo "setting hadoop conf dir"
+  HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
+fi
+
+## Include only specific packages from HIVE_HOME/lib to avoid version 
mismatches
+HIVE_EXEC=`ls ${HIVE_HOME}/lib/hive-exec-*.jar | tr '\n' ':'`
+HIVE_SERVICE=`ls ${HIVE_HOME}/lib/hive-service-*.jar | grep -v rpc | tr '\n' 
':'`
+HIVE_METASTORE=`ls ${HIVE_HOME}/lib/hive-metastore-*.jar | tr '\n' ':'`
+HIVE_JDBC=`ls ${HIVE_HOME}/lib/hive-jdbc-*.jar | tr '\n' ':'`
+if [ -z "${HIVE_JDBC}" ]; then
+  HIVE_JDBC=`ls ${HIVE_HOME}/lib/hive-jdbc-*.jar | grep -v handler | tr '\n' 
':'`
+fi
+HIVE_JACKSON=`ls ${HIVE_HOME}/lib/jackson-*.jar | tr '\n' ':'`
+HIVE_JARS=$HIVE_METASTORE:$HIVE_SERVICE:$HIVE_EXEC:$HIVE_JDBC:$HIVE_JACKSON
+
+HADOOP_HIVE_JARS=${HIVE_JARS}:${HADOOP_HOME}/share/hadoop/common/*:${HADOOP_HOME}/share/hadoop/mapreduce/*:${HADOOP_HOME}/share/hadoop/hdfs/*:${HADOOP_HOME}/share/hadoop/common/lib/*:${HADOOP_HOME}/share/hadoop/hdfs/lib/*
+
+echo "Running Command : java -cp 
${HADOOP_HIVE_JARS}:${HADOOP_CONF_DIR}:$HUDI_HIVE_UBER_JAR 
org.apache.hudi.hive.HiveSyncTool $@"
+java -cp $HUDI_HIVE_UBER_JAR:${HADOOP_HIVE_JARS}:${HADOOP_CONF_DIR} 
org.apache.hudi.hive.HiveSyncTool "$@"
diff --git 
a/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_1.sh
 
b/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_1.sh
new file mode 100755
index 0000000000..a5edb7676a
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_1.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+echo "Copying spark default config and setting up configs"
+cp /var/scripts/config/spark-defaults.conf $SPARK_CONF_DIR/.
+cp /var/scripts/config/log4j2.properties $SPARK_CONF_DIR/.
+echo "sleep 10, wait hdfs start"
+sleep 10
+echo "hadoop fs -mkdir -p /var/demo/"
+hadoop fs -mkdir -p /var/demo/
+echo "hadoop fs -mkdir -p /tmp/spark-events"
+hadoop fs -mkdir -p /tmp/spark-events
+echo "hadoop fs -copyFromLocal -f /var/scripts/config /var/demo/."
+hadoop fs -copyFromLocal -f /var/scripts/config /var/demo/.
+echo "chmod +x /var/scripts/run_sync_tool.sh"
+chmod +x /var/scripts/run_sync_tool.sh
diff --git 
a/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_2.sh
 
b/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_2.sh
new file mode 100755
index 0000000000..a55dddd86d
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_2.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+echo "Copying spark default config and setting up configs"
+cp /var/scripts/config/spark-defaults.conf $SPARK_CONF_DIR/.
+cp /var/scripts/config/log4j2.properties $SPARK_CONF_DIR/.
+echo "sleep 10, wait hdfs start"
+sleep 10
+echo "hadoop fs -mkdir -p /var/demo/"
+hadoop fs -mkdir -p /var/demo/
+echo "hadoop fs -mkdir -p /tmp/spark-events"
+hadoop fs -mkdir -p /tmp/spark-events
+echo "hadoop fs -mkdir -p /user/hive/"
+hadoop fs -mkdir -p /user/hive/
+echo "hadoop fs -copyFromLocal -f /var/scripts/config /var/demo/."
+hadoop fs -copyFromLocal -f /var/scripts/config /var/demo/.
+echo "hadoop fs -copyFromLocal -f 
/var/scripts/hudi_docker_compose_attached_file/warehouse /user/hive/"
+hadoop fs -copyFromLocal -f 
/var/scripts/hudi_docker_compose_attached_file/warehouse /user/hive/
+echo "chmod +x /var/scripts/run_sync_tool.sh"
+chmod +x /var/scripts/run_sync_tool.sh
+
+echo "Start synchronizing the stock_ticks_cow table"
+/var/scripts/run_sync_tool.sh \
+  --jdbc-url jdbc:hive2://hiveserver:10000 \
+  --user hive \
+  --pass hive \
+  --partitioned-by date \
+  --base-path /user/hive/warehouse/stock_ticks_cow \
+  --database default \
+  --table stock_ticks_cow \
+  --partition-value-extractor 
org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
+
+echo "Start synchronizing the stock_ticks_mor table"
+/var/scripts/run_sync_tool.sh \
+  --jdbc-url jdbc:hive2://hiveserver:10000 \
+  --user hive \
+  --pass hive \
+  --partitioned-by date \
+  --base-path /user/hive/warehouse/stock_ticks_mor \
+  --database default \
+  --table stock_ticks_mor \
+  --partition-value-extractor 
org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
+
+echo "Start synchronizing the hudi_cow_pt_tbl table"
+/var/scripts/run_sync_tool.sh \
+  --jdbc-url jdbc:hive2://hiveserver:10000 \
+  --user hive \
+  --pass hive \
+  --partitioned-by dt \
+  --base-path /user/hive/warehouse/hudi_cow_pt_tbl \
+  --database default \
+  --table hudi_cow_pt_tbl \
+  --partition-value-extractor 
org.apache.hudi.hive.HiveStylePartitionValueExtractor
+
+echo "Start synchronizing the hudi_non_part_cow table"
+/var/scripts/run_sync_tool.sh \
+  --jdbc-url jdbc:hive2://hiveserver:10000 \
+  --user hive \
+  --pass hive \
+  --base-path /user/hive/warehouse/hudi_non_part_cow \
+  --database default \
+  --table hudi_non_part_cow \
diff --git a/docker/thirdparties/run-thirdparties-docker.sh 
b/docker/thirdparties/run-thirdparties-docker.sh
index 1851b03051..283ed7b35b 100755
--- a/docker/thirdparties/run-thirdparties-docker.sh
+++ b/docker/thirdparties/run-thirdparties-docker.sh
@@ -37,7 +37,7 @@ Usage: $0 <options>
      --stop             stop the specified components
 
   All valid components:
-    mysql,pg,oracle,sqlserver,clickhouse,es,hive,iceberg
+    mysql,pg,oracle,sqlserver,clickhouse,es,hive,iceberg,hudi
   "
     exit 1
 }
@@ -60,7 +60,7 @@ STOP=0
 
 if [[ "$#" == 1 ]]; then
     # default
-    COMPONENTS="mysql,pg,oracle,sqlserver,clickhouse,hive,iceberg"
+    COMPONENTS="mysql,pg,oracle,sqlserver,clickhouse,hive,iceberg,hudi"
 else
     while true; do
         case "$1" in
@@ -92,7 +92,7 @@ else
     done
     if [[ "${COMPONENTS}"x == ""x ]]; then
         if [[ "${STOP}" -eq 1 ]]; then
-            COMPONENTS="mysql,pg,oracle,sqlserver,clickhouse,hive,iceberg"
+            COMPONENTS="mysql,pg,oracle,sqlserver,clickhouse,hive,iceberg,hudi"
         fi
     fi
 fi
@@ -128,6 +128,7 @@ RUN_CLICKHOUSE=0
 RUN_HIVE=0
 RUN_ES=0
 RUN_ICEBERG=0
+RUN_HUDI=0
 for element in "${COMPONENTS_ARR[@]}"; do
     if [[ "${element}"x == "mysql"x ]]; then
         RUN_MYSQL=1
@@ -145,6 +146,8 @@ for element in "${COMPONENTS_ARR[@]}"; do
         RUN_HIVE=1
     elif [[ "${element}"x == "iceberg"x ]]; then
         RUN_ICEBERG=1
+    elif [[ "${element}"x == "hudi"x ]]; then
+        RUN_HUDI=1
     else
         echo "Invalid component: ${element}"
         usage
@@ -265,3 +268,25 @@ if [[ "${RUN_ICEBERG}" -eq 1 ]]; then
         sudo docker compose -f "${ROOT}"/docker-compose/iceberg/iceberg.yaml 
--env-file "${ROOT}"/docker-compose/iceberg/iceberg.env up -d
     fi
 fi
+
+if [[ "${RUN_HUDI}" -eq 1 ]]; then
+    # hudi
+    cp "${ROOT}"/docker-compose/hudi/hudi.yaml.tpl 
"${ROOT}"/docker-compose/hudi/hudi.yaml
+    sed -i "s/doris--/${CONTAINER_UID}/g" 
"${ROOT}"/docker-compose/hudi/hudi.yaml
+    sudo docker compose -f "${ROOT}"/docker-compose/hudi/hudi.yaml --env-file 
"${ROOT}"/docker-compose/hudi/hadoop.env down
+    if [[ "${STOP}" -ne 1 ]]; then
+        sudo rm -rf "${ROOT}"/docker-compose/hudi/historyserver
+        sudo mkdir "${ROOT}"/docker-compose/hudi/historyserver
+        sudo rm -rf "${ROOT}"/docker-compose/hudi/hive-metastore-postgresql
+        sudo mkdir "${ROOT}"/docker-compose/hudi/hive-metastore-postgresql
+        if [[ ! -d 
"${ROOT}/docker-compose/hudi/scripts/hudi_docker_compose_attached_file" ]]; then
+            echo "Attached files does not exist, please download the 
https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/load/hudi/hudi_docker_compose_attached_file.zip
 file to the docker-compose/hudi/scripts/ directory and unzip it."
+            exit 1
+        fi
+        sudo docker compose -f "${ROOT}"/docker-compose/hudi/hudi.yaml 
--env-file "${ROOT}"/docker-compose/hudi/hadoop.env up -d
+        echo "sleep 15, wait server start"
+        sleep 15
+        docker exec -it adhoc-1 /bin/bash 
/var/scripts/setup_demo_container_adhoc_1.sh
+        docker exec -it adhoc-2 /bin/bash 
/var/scripts/setup_demo_container_adhoc_2.sh
+    fi
+fi
diff --git a/docs/zh-CN/community/developer-guide/regression-testing.md 
b/docs/zh-CN/community/developer-guide/regression-testing.md
index 48c6de8f7c..3617b4d769 100644
--- a/docs/zh-CN/community/developer-guide/regression-testing.md
+++ b/docs/zh-CN/community/developer-guide/regression-testing.md
@@ -605,10 +605,10 @@ Doris 支持一些外部署数据源的查询。所以回归框架也提供了
 
 1. 启动 Container
 
-    Doris 目前支持 es, mysql, pg, hive, sqlserver, oracle, iceberg 等数据源的 Docker 
compose。相关文件存放在 `docker/thirdparties/docker-compose` 目录下。
+    Doris 目前支持 es, mysql, pg, hive, sqlserver, oracle, iceberg, hudi 等数据源的 
Docker compose。相关文件存放在 `docker/thirdparties/docker-compose` 目录下。
 
     默认情况下,可以直接通过以下命令启动所有外部数据源的 Docker container:
-    (注意,hive container 需要下载预制的数据文件,请参阅下面 hive 相关的文档。)
+    (注意,hive和hudi container 需要下载预制的数据文件,请参阅下面 hive和hudi 相关的文档。)
 
     ```
     cd docker/thirdparties && sh run-thirdparties-docker.sh
@@ -692,48 +692,109 @@ Doris 支持一些外部署数据源的查询。所以回归框架也提供了
        * `clickhouse.yaml.tpl`:Docker compose 文件模板。无需修改。
        * `clickhouse.env`:配置 ClickHouse 对外端口,默认为 8123。
 
-    8. Iceberg
+   8. Iceberg
 
-        提供 Iceberg + Spark + Minio 镜像组合。存放在 
docker/thirdparties/docker-compose/iceberg/ 下。
+       提供 Iceberg + Spark + Minio 镜像组合。存放在 
docker/thirdparties/docker-compose/iceberg/ 下。
 
-        * `iceberg.yaml.tpl`:Docker compose 文件模板。无需修改。
-        * `entrypoint.sh.tpl`:镜像启动后的初始化脚本模板。无需修改。
-        * `spark-defaults.conf.tpl`:Spark 的配置文件模板。无需修改。
-        * `iceberg.env`:对外端口配置文件,需修改各个对外端口,避免端口冲突。
+       * `iceberg.yaml.tpl`:Docker compose 文件模板。无需修改。
+       * `entrypoint.sh.tpl`:镜像启动后的初始化脚本模板。无需修改。
+       * `spark-defaults.conf.tpl`:Spark 的配置文件模板。无需修改。
+       * `iceberg.env`:对外端口配置文件,需修改各个对外端口,避免端口冲突。
 
-        启动后,可以通过如下命令启动 spark-sql
+       启动后,可以通过如下命令启动 spark-sql
 
-        `docker exec -it doris-xx-spark-iceberg spark-sql`        
+       `docker exec -it doris-xx-spark-iceberg spark-sql`        
 
-        其中 `doris-xx-spark-iceberg` 为 container 名称。
+       其中 `doris-xx-spark-iceberg` 为 container 名称。
 
-        spark-sql iceberg 操作示例:
+       spark-sql iceberg 操作示例:
 
-        ```
-        create database db1;
-        show databases;
-        create table db1.test1(k1 bigint, k2 bigint, k3 string) partitioned by 
(k1);
-        insert into db1.test1 values(1,2,'abc');
-        select * from db1.test1;
-        quit;
-        ```
+       ```
+       create database db1;
+       show databases;
+       create table db1.test1(k1 bigint, k2 bigint, k3 string) partitioned by 
(k1);
+       insert into db1.test1 values(1,2,'abc');
+       select * from db1.test1;
+       quit;
+       ```
 
-        也可以通过 spark-shell 进行访问:
+       也可以通过 spark-shell 进行访问:
 
-        ```
-        docker exec -it doris-xx-spark-iceberg spark-shell
-        
-        spark.sql(s"create database db1")
-        spark.sql(s"show databases").show()
-        spark.sql(s"create table db1.test1(k1 bigint, k2 bigint, k3 string) 
partitioned by (k1)").show()
-        spark.sql(s"show tables from db1").show()
-        spark.sql(s"insert into db1.test1 values(1,2,'abc')").show()
-        spark.sql(s"select * from db1.test1").show()
-        :q
-        ```
+       ```
+       docker exec -it doris-xx-spark-iceberg spark-shell
+       
+       spark.sql(s"create database db1")
+       spark.sql(s"show databases").show()
+       spark.sql(s"create table db1.test1(k1 bigint, k2 bigint, k3 string) 
partitioned by (k1)").show()
+       spark.sql(s"show tables from db1").show()
+       spark.sql(s"insert into db1.test1 values(1,2,'abc')").show()
+       spark.sql(s"select * from db1.test1").show()
+       :q
+       ```
+
+       更多使用方式可参阅 [Tabular 
官方文档](https://tabular.io/blog/docker-spark-and-iceberg/)。
+   9. Hudi
 
-        更多使用方式可参阅 [Tabular 
官方文档](https://tabular.io/blog/docker-spark-and-iceberg/)。
+      Hudi 相关的 Docker compose 文件存放在 docker/thirdparties/docker-compose/hudi 下。
 
+      * `hudi.yaml.tpl`:Docker compose 文件模板,无需修改。
+      * `hadoop.env`:配置文件的模板,无需修改。
+      * `scripts/` 目录会在 container 启动后挂载到 container 中。其中的文件内容无需修改。但须注意,在启动 
container 之前,需要先下载预制文件:
+        将 
`https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/load/hudi/hudi_docker_compose_attached_file.zip`
 文件下载到 `scripts/` 目录并解压即可。
+        
+      * 
+      启动前,可以将以下设置添加到`/etc/hosts`中,以避免出现`UnknownHostException`错误
+      ```
+      127.0.0.1 adhoc-1
+      127.0.0.1 adhoc-2
+      127.0.0.1 namenode
+      127.0.0.1 datanode1
+      127.0.0.1 hiveserver
+      127.0.0.1 hivemetastore
+      127.0.0.1 sparkmaster
+      ```
+         
+      启动后,可以通过如下命令启动 hive query
+      
+      ```
+      docker exec -it adhoc-2 /bin/bash
+      
+      beeline -u jdbc:hive2://hiveserver:10000 \
+      --hiveconf 
hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat \
+      --hiveconf hive.stats.autogather=false
+      
+      show tables;
+      show partitions stock_ticks_mor_rt;
+      select symbol, max(ts) from stock_ticks_cow group by symbol HAVING 
symbol = 'GOOG';
+      select symbol, max(ts) from stock_ticks_mor_ro group by symbol HAVING 
symbol = 'GOOG';
+      exit;
+      ```
+
+      也可以通过 spark-shell 进行访问:
+
+      ```
+      docker exec -it adhoc-1 /bin/bash
+      
+      $SPARK_INSTALL/bin/spark-shell \
+        --jars 
/var/scripts/hudi_docker_compose_attached_file/jar/hoodie-hive-sync-bundle.jar \
+        --master local[2] \
+        --driver-class-path $HADOOP_CONF_DIR \
+        --conf spark.sql.hive.convertMetastoreParquet=false \
+        --deploy-mode client \
+        --driver-memory 1G \
+        --executor-memory 3G \
+        --num-executors 1
+      
+      spark.sql("show tables").show(100, false)
+      spark.sql("select symbol, max(ts) from stock_ticks_cow group by symbol 
HAVING symbol = 'GOOG'").show(100, false)
+      spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close 
 from stock_ticks_cow where  symbol = 'GOOG'").show(100, false)
+      spark.sql("select symbol, max(ts) from stock_ticks_mor_ro group by 
symbol HAVING symbol = 'GOOG'").show(100, false)
+      spark.sql("select symbol, max(ts) from stock_ticks_mor_rt group by 
symbol HAVING symbol = 'GOOG'").show(100, false)
+      spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close 
 from stock_ticks_mor_ro where  symbol = 'GOOG'").show(100, false)
+      :q
+      ```
+
+      更多使用方式可参阅 [Hudi 官方文档](https://hudi.apache.org/docs/docker_demo)。
 2. 运行回归测试
 
     外表相关的回归测试默认是关闭的,可以修改 `regression-test/conf/regression-conf.groovy` 
中的以下配置来开启:


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to