This is an automated email from the ASF dual-hosted git repository.
xxyu pushed a commit to branch kylin-on-parquet-v2
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/kylin-on-parquet-v2 by this
push:
new e6565f8 KYLIN-4452 Kylin 4 with Docker
e6565f8 is described below
commit e6565f87fdeaa9e6bb8e2a5951ff7f3046997b96
Author: Zhichao Zhang <[email protected]>
AuthorDate: Tue Sep 15 13:36:27 2020 +0800
KYLIN-4452 Kylin 4 with Docker
Docker image for Apache Kylin 4 Alpha
---
docker/Dockerfile | 29 +--
docker/Dockerfile_dev | 43 ----
docker/Dockerfile_hadoop | 43 ++--
docker/README.md | 15 +-
docker/build_image.sh | 4 +-
docker/conf/hadoop/capacity-scheduler.xml | 134 +++++++++++
docker/conf/hbase/hbase-site.xml | 32 ---
docker/conf/kylin/kylin.properties | 371 ++++++++++++++++++++++++++++++
docker/conf/spark/spark-defaults.conf | 55 +++++
docker/conf/spark/spark-env.sh | 77 +++++++
docker/conf/zk/zoo.cfg | 45 ++++
docker/entrypoint.sh | 66 ++++--
docker/run_container.sh | 5 +-
13 files changed, 760 insertions(+), 159 deletions(-)
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 4ca399f..a168e6c 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -16,26 +16,19 @@
#
# Docker image for apache kylin, based on the Hadoop image
-FROM hadoop2.7-all-in-one
+FROM hadoop2.7-all-in-one-for-kylin4
-ENV KYLIN_VERSION 3.0.1
-ENV KYLIN_HOME /home/admin/apache-kylin-$KYLIN_VERSION-bin-hbase1x
-
-# Download released Kylin
-RUN wget
https://archive.apache.org/dist/kylin/apache-kylin-$KYLIN_VERSION/apache-kylin-$KYLIN_VERSION-bin-hbase1x.tar.gz
\
- && tar -zxvf /home/admin/apache-kylin-$KYLIN_VERSION-bin-hbase1x.tar.gz \
- && rm -f /home/admin/apache-kylin-$KYLIN_VERSION-bin-hbase1x.tar.gz
-
-RUN echo "kylin.engine.spark-conf.spark.executor.memory=1G" >>
$KYLIN_HOME/conf/kylin.properties \
- && echo "kylin.engine.spark-conf-mergedict.spark.executor.memory=1.5G" >>
$KYLIN_HOME/conf/kylin.properties \
- && echo "kylin.engine.livy-conf.livy-url=http://127.0.0.1:8998" >>
$KYLIN_HOME/conf/kylin.properties \
- && echo
kylin.engine.livy-conf.livy-key.file=hdfs://localhost:9000/kylin/livy/kylin-job-$KYLIN_VERSION.jar
>> $KYLIN_HOME/conf/kylin.properties \
- && echo
kylin.engine.livy-conf.livy-arr.jars=hdfs://localhost:9000/kylin/livy/hbase-client-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-common-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-hadoop-compat-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-hadoop2-compat-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-server-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/htrace-core-*-incubating.jar,hdfs://localhost:9000/kylin/livy/metrics-
[...]
- && echo kylin.source.hive.quote-enabled=false >>
$KYLIN_HOME/conf/kylin.properties \
- && echo
kylin.engine.spark-conf.spark.eventLog.dir=hdfs://localhost:9000/kylin/spark-history
>> $KYLIN_HOME/conf/kylin.properties \
- && echo
kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs://localhost:9000/kylin/spark-history
>> $KYLIN_HOME/conf/kylin.properties \
- && echo kylin.source.hive.redistribute-flat-table=false >>
$KYLIN_HOME/conf/kylin.properties
+ENV KYLIN_VERSION 4.0.0-alpha
+ENV KYLIN_HOME /home/admin/apache-kylin-$KYLIN_VERSION-bin-hadoop2
+# Download Kylin
+RUN wget
https://archive.apache.org/dist/kylin/apache-kylin-$KYLIN_VERSION/apache-kylin-$KYLIN_VERSION-bin-hadoop2.tar.gz
\
+ && tar -zxvf /home/admin/apache-kylin-$KYLIN_VERSION-bin-hadoop2.tar.gz \
+ && rm -f /home/admin/apache-kylin-$KYLIN_VERSION-bin-hadoop2.tar.gz
+RUN rm -f $KYLIN_HOME/conf/kylin.properties
+COPY conf/kylin/* $KYLIN_HOME/conf/
+RUN cp $HIVE_HOME/lib/mysql-connector-java-5.1.24.jar $KYLIN_HOME/lib/
+RUN sed -i "s/hbase/java/g" $KYLIN_HOME/bin/set-java-home.sh
COPY ./entrypoint.sh /home/admin/entrypoint.sh
RUN chmod u+x /home/admin/entrypoint.sh
diff --git a/docker/Dockerfile_dev b/docker/Dockerfile_dev
deleted file mode 100644
index c0a8933..0000000
--- a/docker/Dockerfile_dev
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Docker image for apache kylin, based on the Hadoop image
-FROM hadoop2.7-all-in-one
-
-## for dev
-ENV KYLIN_VERSION 4.0.0-SNAPSHOT
-ENV KYLIN_HOME /home/admin/apache-kylin-$KYLIN_VERSION-bin
-# Copy compiled Kylin from local
-COPY apache-kylin-$KYLIN_VERSION-bin.tar.gz /home/admin/
-RUN tar -zxvf /home/admin/apache-kylin-$KYLIN_VERSION-bin.tar.gz \
- && rm -f /home/admin/apache-kylin-$KYLIN_VERSION-bin.tar.gz
-
-RUN echo "kylin.engine.spark-conf.spark.executor.memory=1G" >>
$KYLIN_HOME/conf/kylin.properties \
- && echo "kylin.engine.spark-conf-mergedict.spark.executor.memory=1.5G" >>
$KYLIN_HOME/conf/kylin.properties \
- && echo "kylin.engine.livy-conf.livy-url=http://127.0.0.1:8998" >>
$KYLIN_HOME/conf/kylin.properties \
- && echo
kylin.engine.livy-conf.livy-key.file=hdfs://localhost:9000/kylin/livy/kylin-job-$KYLIN_VERSION.jar
>> $KYLIN_HOME/conf/kylin.properties \
- && echo
kylin.engine.livy-conf.livy-arr.jars=hdfs://localhost:9000/kylin/livy/hbase-client-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-common-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-hadoop-compat-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-hadoop2-compat-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-server-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/htrace-core-*-incubating.jar,hdfs://localhost:9000/kylin/livy/metrics-
[...]
- && echo kylin.source.hive.quote-enabled=false >>
$KYLIN_HOME/conf/kylin.properties \
- && echo
kylin.engine.spark-conf.spark.eventLog.dir=hdfs://localhost:9000/kylin/spark-history
>> $KYLIN_HOME/conf/kylin.properties \
- && echo
kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs://localhost:9000/kylin/spark-history
>> $KYLIN_HOME/conf/kylin.properties \
- && echo kylin.source.hive.redistribute-flat-table=false >>
$KYLIN_HOME/conf/kylin.properties
-
-
-COPY ./entrypoint.sh /home/admin/entrypoint.sh
-RUN chmod u+x /home/admin/entrypoint.sh
-
-ENTRYPOINT ["/home/admin/entrypoint.sh"]
diff --git a/docker/Dockerfile_hadoop b/docker/Dockerfile_hadoop
index a0312f4..8e76855 100644
--- a/docker/Dockerfile_hadoop
+++ b/docker/Dockerfile_hadoop
@@ -15,16 +15,14 @@
# limitations under the License.
#
-# Docker image with Hadoop/Hive/HBase/Spark/ZK/Kafka/Livy installed
+# Docker image with Hadoop/Spark/Hive/ZK/Kafka installed
FROM centos:6.9
ENV HIVE_VERSION 1.2.1
ENV HADOOP_VERSION 2.7.0
-ENV HBASE_VERSION 1.1.2
-ENV SPARK_VERSION 2.3.1
+ENV SPARK_VERSION 2.4.6
ENV ZK_VERSION 3.4.6
ENV KAFKA_VERSION 1.1.1
-ENV LIVY_VERSION 0.6.0
ENV JAVA_HOME /home/admin/jdk1.8.0_141
ENV MVN_HOME /home/admin/apache-maven-3.6.1
@@ -32,13 +30,11 @@ ENV HADOOP_HOME /home/admin/hadoop-$HADOOP_VERSION
ENV HIVE_HOME /home/admin/apache-hive-$HIVE_VERSION-bin
ENV HADOOP_CONF $HADOOP_HOME/etc/hadoop
ENV HADOOP_CONF_DIR $HADOOP_HOME/etc/hadoop
-ENV HBASE_HOME /home/admin/hbase-$HBASE_VERSION
-ENV SPARK_HOME /home/admin/spark-$SPARK_VERSION-bin-hadoop2.6
-ENV SPARK_CONF_DIR /home/admin/spark-$SPARK_VERSION-bin-hadoop2.6/conf
+ENV SPARK_HOME /home/admin/spark-$SPARK_VERSION-bin-hadoop2.7
+ENV SPARK_CONF_DIR $SPARK_HOME/conf
ENV ZK_HOME /home/admin/zookeeper-$ZK_VERSION
ENV KAFKA_HOME /home/admin/kafka_2.11-$KAFKA_VERSION
-ENV LIVY_HOME /home/admin/apache-livy-$LIVY_VERSION-incubating-bin
-ENV PATH
$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HIVE_HOME/bin:$HBASE_HOME/bin:$MVN_HOME/bin:spark-$SPARK_VERSION-bin-hadoop2.6/bin:$KAFKA_HOME/bin
+ENV PATH
$PATH:$JAVA_HOME/bin:$ZK_HOME/bin:$HADOOP_HOME/bin:$HIVE_HOME/bin:$MVN_HOME/bin:$KAFKA_HOME/bin
USER root
@@ -69,37 +65,32 @@ RUN wget
https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hado
&& mkdir -p /data/hadoop
COPY conf/hadoop/* $HADOOP_CONF/
-# setup hbase
-RUN wget
https://archive.apache.org/dist/hbase/$HBASE_VERSION/hbase-$HBASE_VERSION-bin.tar.gz
\
- && tar -zxvf /home/admin/hbase-$HBASE_VERSION-bin.tar.gz \
- && rm -f /home/admin/hbase-$HBASE_VERSION-bin.tar.gz \
- && mkdir -p /data/hbase \
- && mkdir -p /data/zookeeper
-COPY conf/hbase/hbase-site.xml $HBASE_HOME/conf
-
# setup hive
RUN wget
https://archive.apache.org/dist/hive/hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz
\
&& tar -zxvf /home/admin/apache-hive-$HIVE_VERSION-bin.tar.gz \
&& rm -f /home/admin/apache-hive-$HIVE_VERSION-bin.tar.gz \
&& wget -P $HIVE_HOME/lib
https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.24/mysql-connector-java-5.1.24.jar
COPY conf/hive/hive-site.xml $HIVE_HOME/conf
+COPY conf/hive/hive-site.xml $HADOOP_CONF/
# setup spark
-RUN wget
https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.6.tgz
\
- && tar -zxvf /home/admin/spark-$SPARK_VERSION-bin-hadoop2.6.tgz \
- && rm -f /home/admin/spark-$SPARK_VERSION-bin-hadoop2.6.tgz \
+RUN wget
https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz
\
+ && tar -zxvf /home/admin/spark-$SPARK_VERSION-bin-hadoop2.7.tgz \
+ && rm -f /home/admin/spark-$SPARK_VERSION-bin-hadoop2.7.tgz \
&& cp $HIVE_HOME/conf/hive-site.xml $SPARK_HOME/conf \
&& cp $SPARK_HOME/yarn/*.jar $HADOOP_HOME/share/hadoop/yarn/lib
RUN cp $HIVE_HOME/lib/mysql-connector-java-5.1.24.jar $SPARK_HOME/jars
-RUN echo spark.sql.catalogImplementation=hive >
$SPARK_HOME/conf/spark-defaults.conf
+RUN cp $HIVE_HOME/hcatalog/share/hcatalog/hive-hcatalog-core-1.2.1.jar
$SPARK_HOME/jars/
+COPY conf/spark/* $SPARK_CONF_DIR/
# setup kafka
RUN wget
https://archive.apache.org/dist/kafka/$KAFKA_VERSION/kafka_2.11-$KAFKA_VERSION.tgz
\
&& tar -zxvf /home/admin/kafka_2.11-$KAFKA_VERSION.tgz \
&& rm -f /home/admin/kafka_2.11-$KAFKA_VERSION.tgz
-# setup livy
-RUN wget
https://www.apache.org/dist/incubator/livy/$LIVY_VERSION-incubating/apache-livy-$LIVY_VERSION-incubating-bin.zip
\
- && unzip /home/admin/apache-livy-$LIVY_VERSION-incubating-bin.zip \
- && rm -f /home/admin/apache-livy-$LIVY_VERSION-incubating-bin.zip
-
+# setup zk
+RUN wget
https://archive.apache.org/dist/zookeeper/zookeeper-$ZK_VERSION/zookeeper-$ZK_VERSION.tar.gz
\
+ && tar -zxvf /home/admin/zookeeper-$ZK_VERSION.tar.gz \
+ && rm -f /home/admin/zookeeper-$ZK_VERSION.tar.gz \
+ && mkdir -p /data/zookeeper
+COPY conf/zk/zoo.cfg $ZK_HOME/conf
diff --git a/docker/README.md b/docker/README.md
index 376547f..d137c8b 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -4,8 +4,7 @@ In order to allow users to easily try Kylin, and to facilitate
developers to ver
- Jdk 1.8
- Hadoop 2.7.0
- Hive 1.2.1
-- Hbase 1.1.2
-- Spark 2.3.1
+- Spark 2.4.6
- Zookeeper 3.4.6
- Kafka 1.1.1
- MySQL 5.1.73
@@ -16,7 +15,7 @@ In order to allow users to easily try Kylin, and to
facilitate developers to ver
We have pushed the Kylin images to the [docker
hub](https://hub.docker.com/r/apachekylin/apache-kylin-standalone). You do not
need to build the image locally, just pull the image from remote (you can
browse docker hub to check the available versions):
```
-docker pull apachekylin/apache-kylin-standalone:3.0.1
+docker pull apachekylin/apache-kylin-standalone:4.0.0-alpha
```
After the pull is successful, execute "sh run_container.sh" or the following
command to start the container:
@@ -29,16 +28,14 @@ docker run -d \
-p 50070:50070 \
-p 8032:8032 \
-p 8042:8042 \
--p 16010:16010 \
-apache-kylin-standalone
+-p 2181:2181 \
+apachekylin/apache-kylin-standalone:4.0.0-alpha
```
The following services are automatically started when the container starts:
- NameNode, DataNode
- ResourceManager, NodeManager
-- HBase
-- Kafka
- Kylin
and run automatically `$KYLIN_HOME/bin/sample.sh `, create a
kylin_streaming_topic topic in Kafka and continue to send data to this topic.
This is to let the users start the container and then experience the batch and
streaming way to build the cube and query.
@@ -48,7 +45,6 @@ After the container is started, we can enter the container
through the `docker e
- Kylin Web UI:
[http://127.0.0.1:7070/kylin/login](http://127.0.0.1:7070/kylin/login)
- HDFS NameNode Web UI: [http://127.0.0.1:50070](http://127.0.0.1:50070/)
- YARN ResourceManager Web UI: [http://127.0.0.1:8088](http://127.0.0.1:8088/)
-- HBase Web UI: [http://127.0.0.1:16010](http://127.0.0.1:16010/)
In the container, the relevant environment variables are as follows:
@@ -56,8 +52,7 @@ In the container, the relevant environment variables are as
follows:
JAVA_HOME=/home/admin/jdk1.8.0_141
HADOOP_HOME=/home/admin/hadoop-2.7.0
KAFKA_HOME=/home/admin/kafka_2.11-1.1.1
-SPARK_HOME=/home/admin/spark-2.3.1-bin-hadoop2.6
-HBASE_HOME=/home/admin/hbase-1.1.2
+SPARK_HOME=/home/admin/spark-2.4.6-bin-hadoop2.7
HIVE_HOME=/home/admin/apache-hive-1.2.1-bin
```
diff --git a/docker/build_image.sh b/docker/build_image.sh
old mode 100644
new mode 100755
index 19fbec5..9c0b925
--- a/docker/build_image.sh
+++ b/docker/build_image.sh
@@ -23,5 +23,5 @@ echo "build image in dir "${DIR}
echo "start build Hadoop docker image"
-docker build -f Dockerfile_hadoop -t hadoop2.7-all-in-one .
-docker build -f Dockerfile -t apache-kylin-standalone .
+docker build -f Dockerfile_hadoop -t hadoop2.7-all-in-one-for-kylin4 .
+docker build -f Dockerfile -t apachekylin/apache-kylin-standalone:4.0.0-alpha .
diff --git a/docker/conf/hadoop/capacity-scheduler.xml
b/docker/conf/hadoop/capacity-scheduler.xml
new file mode 100644
index 0000000..503ee3e
--- /dev/null
+++ b/docker/conf/hadoop/capacity-scheduler.xml
@@ -0,0 +1,134 @@
+<!--
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. See accompanying LICENSE file.
+-->
+<configuration>
+
+ <property>
+ <name>yarn.scheduler.capacity.maximum-applications</name>
+ <value>2</value>
+ <description>
+ Maximum number of applications that can be pending and running.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
+ <value>0.3</value>
+ <description>
+ Maximum percent of resources in the cluster which can be used to run
+ application masters i.e. controls number of concurrent running
+ applications.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.resource-calculator</name>
+
<value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
+ <description>
+ The ResourceCalculator implementation to be used to compare
+ Resources in the scheduler.
+ The default i.e. DefaultResourceCalculator only uses Memory while
+ DominantResourceCalculator uses dominant-resource to compare
+ multi-dimensional resources such as Memory, CPU etc.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.root.queues</name>
+ <value>default</value>
+ <description>
+ The queues at the this level (root is the root queue).
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.root.default.capacity</name>
+ <value>100</value>
+ <description>Default queue target capacity.</description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
+ <value>1</value>
+ <description>
+ Default queue user limit a percentage from 0.0 to 1.0.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
+ <value>100</value>
+ <description>
+ The maximum capacity of the default queue.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.root.default.state</name>
+ <value>RUNNING</value>
+ <description>
+ The state of the default queue. State can be one of RUNNING or STOPPED.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
+ <value>*</value>
+ <description>
+ The ACL of who can submit jobs to the default queue.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
+ <value>*</value>
+ <description>
+ The ACL of who can administer jobs on the default queue.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.node-locality-delay</name>
+ <value>40</value>
+ <description>
+ Number of missed scheduling opportunities after which the
CapacityScheduler
+ attempts to schedule rack-local containers.
+ Typically this should be set to number of nodes in the cluster, By
default is setting
+ approximately number of nodes in one rack which is 40.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.queue-mappings</name>
+ <value></value>
+ <description>
+ A list of mappings that will be used to assign jobs to queues
+ The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
+ Typically this list will be used to map users to queues,
+ for example, u:%user:%user maps all users to queues with the same name
+ as the user.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.queue-mappings-override.enable</name>
+ <value>false</value>
+ <description>
+ If a queue mapping is present, will it override the value specified
+ by the user? This can be used by administrators to place jobs in queues
+ that are different than the one specified by the user.
+ The default is false.
+ </description>
+ </property>
+
+</configuration>
diff --git a/docker/conf/hbase/hbase-site.xml b/docker/conf/hbase/hbase-site.xml
deleted file mode 100644
index 0c77926..0000000
--- a/docker/conf/hbase/hbase-site.xml
+++ /dev/null
@@ -1,32 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-<!--
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. See accompanying LICENSE file.
--->
-
-<!-- Put site-specific property overrides in this file. -->
-
-<configuration>
- <property>
- <name>hbase.rootdir</name>
- <value>file:///data/hbase</value>
- </property>
- <property>
- <name>hbase.zookeeper.property.dataDir</name>
- <value>/data/zookeeper</value>
- </property>
- <property>
- <name>hbase.master.info.port</name>
- <value>16010</value>
- </property>
-</configuration>
\ No newline at end of file
diff --git a/docker/conf/kylin/kylin.properties
b/docker/conf/kylin/kylin.properties
new file mode 100644
index 0000000..7054d83
--- /dev/null
+++ b/docker/conf/kylin/kylin.properties
@@ -0,0 +1,371 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+
+
+# The below commented values will effect as default settings
+# Uncomment and override them if necessary
+
+
+
+#
+#### METADATA | ENV ###
+#
+## The metadata store has two implementations(RDBMS/HBase), while RDBMS is
recommended in Kylin 4.X
+## Please refer to
https://cwiki.apache.org/confluence/display/KYLIN/How+to+use+HBase+metastore+in+Kylin+4.0
if you prefer HBase
+#kylin.metadata.url=kylin_metadata@jdbc,url=jdbc:mysql://localhost:3306/kylin,username=XXXX,password=XXXXXX,maxActive=10,maxIdle=10
+#
+## metadata cache sync retry times
+#kylin.metadata.sync-retries=3
+#
+## Working folder in HDFS, better be qualified absolute path, make sure user
has the right permission to this directory
+#kylin.env.hdfs-working-dir=/kylin
+#
+## DEV|QA|PROD. DEV will turn on some dev features, QA and PROD has no
difference in terms of functions.
+#kylin.env=QA
+#
+## kylin zk base path
+#kylin.env.zookeeper-base-path=/kylin
+#
+## Run a TestingServer for curator locally
+#kylin.env.zookeeper-is-local=false
+#
+## Connect to a remote zookeeper with the url, should set
kylin.env.zookeeper-is-local to false
+#kylin.env.zookeeper-connect-string=sandbox.hortonworks.com
+#
+#### SERVER | WEB | RESTCLIENT ###
+#
+## Kylin server mode, valid value [all, query, job]
+#kylin.server.mode=all
+#
+## List of web servers in use, this enables one web server instance to sync up
with other servers.
+#kylin.server.cluster-servers=localhost:7070
+#
+## Display timezone on UI,format like[GMT+N or GMT-N]
+#kylin.web.timezone=
+#
+## Timeout value for the queries submitted through the Web UI, in milliseconds
+#kylin.web.query-timeout=300000
+#
+#kylin.web.cross-domain-enabled=true
+#
+##allow user to export query result
+#kylin.web.export-allow-admin=true
+#kylin.web.export-allow-other=true
+#
+## Hide measures in measure list of cube designer, separate by comma
+#kylin.web.hide-measures=RAW
+#
+##max connections of one route
+#kylin.restclient.connection.default-max-per-route=20
+#
+##max connections of one rest-client
+#kylin.restclient.connection.max-total=200
+#
+#### PUBLIC CONFIG ###
+#kylin.engine.default=6
+#kylin.storage.default=4
+#kylin.web.hive-limit=20
+#kylin.web.help.length=4
+#kylin.web.help.0=start|Getting
Started|http://kylin.apache.org/docs/tutorial/kylin_sample.html
+#kylin.web.help.1=odbc|ODBC
Driver|http://kylin.apache.org/docs/tutorial/odbc.html
+#kylin.web.help.2=tableau|Tableau
Guide|http://kylin.apache.org/docs/tutorial/tableau_91.html
+#kylin.web.help.3=onboard|Cube Design
Tutorial|http://kylin.apache.org/docs/howto/howto_optimize_cubes.html
+#kylin.web.link-streaming-guide=http://kylin.apache.org/
+#kylin.htrace.show-gui-trace-toggle=false
+#kylin.web.link-hadoop=
+#kylin.web.link-diagnostic=
+#kylin.web.contact-mail=
+#kylin.server.external-acl-provider=
+#
+## Default time filter for job list, 0->current day, 1->last one day, 2->last
one week, 3->last one year, 4->all
+#kylin.web.default-time-filter=1
+#
+#### SOURCE ###
+## Define how to access to hive metadata
+## When user deploy kylin on AWS EMR and Glue is used as external metadata,
use gluecatalog instead
+#kylin.source.hive.metadata-type=hcatalog
+#
+## Hive client, valid value [cli, beeline]
+#kylin.source.hive.client=cli
+#
+## Absolute path to beeline shell, can be set to spark beeline instead of the
default hive beeline on PATH
+#kylin.source.hive.beeline-shell=beeline
+#
+## Hive database name for putting the intermediate flat tables
+#kylin.source.hive.database-for-flat-table=default
+#
+#### STORAGE ###
+#
+## The storage for final cube file in hbase
+#kylin.storage.url=hbase
+#
+## clean real storage after delete operation
+## if you want to delete the real storage like htable of deleting segment, you
can set it to true
+#kylin.storage.clean-after-delete-operation=false
+#
+#### JOB ###
+#
+## Max job retry on error, default 0: no retry
+#kylin.job.retry=0
+#
+## Max count of concurrent jobs running
+#kylin.job.max-concurrent-jobs=10
+#
+## The percentage of the sampling, default 100%
+#kylin.job.sampling-percentage=100
+#
+## If true, will send email notification on job complete
+##kylin.job.notification-enabled=true
+##kylin.job.notification-mail-enable-starttls=true
+##kylin.job.notification-mail-host=smtp.office365.com
+##kylin.job.notification-mail-port=587
+##[email protected]
+##kylin.job.notification-mail-password=mypassword
+##[email protected]
+#kylin.job.scheduler.provider.100=org.apache.kylin.job.impl.curator.CuratorScheduler
+#kylin.job.scheduler.default=0
+#
+#### CUBE | DICTIONARY ###
+#
+#kylin.cube.cuboid-scheduler=org.apache.kylin.cube.cuboid.DefaultCuboidScheduler
+#kylin.cube.segment-advisor=org.apache.kylin.cube.CubeSegmentAdvisor
+#
+## 'auto', 'inmem', 'layer' or 'random' for testing
+#kylin.cube.algorithm=layer
+#
+## A smaller threshold prefers layer, a larger threshold prefers in-mem
+#kylin.cube.algorithm.layer-or-inmem-threshold=7
+#
+## auto use inmem algorithm:
+## 1, cube planner optimize job
+## 2, no source record
+#kylin.cube.algorithm.inmem-auto-optimize=true
+#
+#kylin.cube.aggrgroup.max-combination=32768
+#
+#kylin.cube.cubeplanner.enabled=false
+#kylin.cube.cubeplanner.enabled-for-existing-cube=false
+#kylin.cube.cubeplanner.expansion-threshold=15.0
+#kylin.cube.cubeplanner.recommend-cache-max-size=200
+#kylin.cube.cubeplanner.mandatory-rollup-threshold=1000
+#kylin.cube.cubeplanner.algorithm-threshold-greedy=8
+#kylin.cube.cubeplanner.algorithm-threshold-genetic=23
+#
+#### QUERY ###
+#
+## Controls the maximum number of bytes a query is allowed to scan storage.
+## The default value 0 means no limit.
+## The counterpart kylin.storage.partition.max-scan-bytes sets the maximum per
coprocessor.
+#kylin.query.max-scan-bytes=0
+#
+#kylin.query.cache-enabled=true
+#kylin.query.cache-threshold-scan-count=10240
+#kylin.query.cache-threshold-duration=2000
+#kylin.query.cache-threshold-scan-bytes=1048576
+#kylin.query.large-query-threshold=1000000
+#
+## Controls extras properties for Calcite jdbc driver
+## all extras properties should undder prefix
"kylin.query.calcite.extras-props."
+## case sensitive, default: true, to enable case insensitive set it to false
+## @see org.apache.calcite.config.CalciteConnectionProperty.CASE_SENSITIVE
+#kylin.query.calcite.extras-props.caseSensitive=true
+## how to handle unquoted identity, defualt: TO_UPPER, available options:
UNCHANGED, TO_UPPER, TO_LOWER
+## @see org.apache.calcite.config.CalciteConnectionProperty.UNQUOTED_CASING
+#kylin.query.calcite.extras-props.unquotedCasing=TO_UPPER
+## quoting method, default: DOUBLE_QUOTE, available options: DOUBLE_QUOTE,
BACK_TICK, BRACKET
+## @see org.apache.calcite.config.CalciteConnectionProperty.QUOTING
+#kylin.query.calcite.extras-props.quoting=DOUBLE_QUOTE
+## change SqlConformance from DEFAULT to LENIENT to enable group by ordinal
+## @see org.apache.calcite.sql.validate.SqlConformance.SqlConformanceEnum
+#kylin.query.calcite.extras-props.conformance=LENIENT
+#
+## TABLE ACL
+#kylin.query.security.table-acl-enabled=true
+#
+## Usually should not modify this
+#kylin.query.interceptors=org.apache.kylin.rest.security.TableInterceptor
+#
+#kylin.query.escape-default-keyword=false
+#
+## Usually should not modify this
+#kylin.query.transformers=org.apache.kylin.query.util.DefaultQueryTransformer,org.apache.kylin.query.util.KeywordDefaultDirtyHack
+#
+#### SECURITY ###
+#
+## Spring security profile, options: testing, ldap, saml
+## with "testing" profile, user can use pre-defined name/pwd like KYLIN/ADMIN
to login
+#kylin.security.profile=testing
+#
+## Admin roles in LDAP, for ldap and saml
+#kylin.security.acl.admin-role=admin
+#
+## LDAP authentication configuration
+#kylin.security.ldap.connection-server=ldap://ldap_server:389
+#kylin.security.ldap.connection-username=
+#kylin.security.ldap.connection-password=
+## When you use the customized CA certificate library for user authentication
based on LDAPs, you need to configure this item.
+## The value of this item will be added to the JVM parameter
javax.net.ssl.trustStore.
+#kylin.security.ldap.connection-truststore=
+#
+## LDAP user account directory;
+#kylin.security.ldap.user-search-base=
+#kylin.security.ldap.user-search-pattern=
+#kylin.security.ldap.user-group-search-base=
+#kylin.security.ldap.user-group-search-filter=(|(member={0})(memberUid={1}))
+#
+## LDAP service account directory
+#kylin.security.ldap.service-search-base=
+#kylin.security.ldap.service-search-pattern=
+#kylin.security.ldap.service-group-search-base=
+#
+### SAML configurations for SSO
+## SAML IDP metadata file location
+#kylin.security.saml.metadata-file=classpath:sso_metadata.xml
+#kylin.security.saml.metadata-entity-base-url=https://hostname/kylin
+#kylin.security.saml.keystore-file=classpath:samlKeystore.jks
+#kylin.security.saml.context-scheme=https
+#kylin.security.saml.context-server-name=hostname
+#kylin.security.saml.context-server-port=443
+#kylin.security.saml.context-path=/kylin
+#
+#### SPARK BUILD ENGINE CONFIGS ###
+#
+## Hadoop conf folder, will export this as "HADOOP_CONF_DIR" to run
spark-submit
+## This must contain site xmls of core, yarn, hive, and hbase in one folder
+##kylin.env.hadoop-conf-dir=/etc/hadoop/conf
+#
+## Spark conf (default is in spark/conf/spark-defaults.conf)
+#kylin.engine.spark-conf.spark.master=yarn
+##kylin.engine.spark-conf.spark.submit.deployMode=client
+#kylin.engine.spark-conf.spark.yarn.queue=default
+#kylin.engine.spark-conf.spark.executor.cores=1
+#kylin.engine.spark-conf.spark.executor.memory=4G
+#kylin.engine.spark-conf.spark.executor.instances=1
+#kylin.engine.spark-conf.spark.executor.memoryOverhead=1024M
+#kylin.engine.spark-conf.spark.driver.cores=1
+#kylin.engine.spark-conf.spark.driver.memory=1G
+#kylin.engine.spark-conf.spark.shuffle.service.enabled=true
+#kylin.engine.spark-conf.spark.eventLog.enabled=true
+#kylin.engine.spark-conf.spark.eventLog.dir=hdfs\:///kylin/spark-history
+#kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs\:///kylin/spark-history
+#kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false
+##kylin.engine.spark-conf.spark.sql.shuffle.partitions=1
+#
+## manually upload spark-assembly jar to HDFS and then set this property will
avoid repeatedly uploading jar at runtime
+##kylin.engine.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
+##kylin.engine.spark-conf.spark.io.compression.codec=org.apache.spark.io.SnappyCompressionCodec
+#
+## uncomment for HDP
+##kylin.engine.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current
+##kylin.engine.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current
+##kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current
+#
+#### SPARK QUERY ENGINE CONFIGS (a.k.a. Sparder Context) ###
+## Enlarge cores and memory to improve query performance in production env,
please check https://cwiki.apache.org/confluence/display/KYLIN/User+Manual+4.X
+#
+#kylin.query.spark-conf.spark.master=yarn
+##kylin.query.spark-conf.spark.submit.deployMode=client
+#kylin.query.spark-conf.spark.driver.cores=1
+#kylin.query.spark-conf.spark.driver.memory=4G
+#kylin.query.spark-conf.spark.driver.memoryOverhead=1G
+#kylin.query.spark-conf.spark.executor.cores=1
+#kylin.query.spark-conf.spark.executor.instances=1
+#kylin.query.spark-conf.spark.executor.memory=4G
+#kylin.query.spark-conf.spark.executor.memoryOverhead=1G
+#kylin.query.spark-conf.spark.serializer=org.apache.spark.serializer.JavaSerializer
+##kylin.query.spark-conf.spark.sql.shuffle.partitions=40
+##kylin.query.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
+#
+## uncomment for HDP
+##kylin.query.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current
+##kylin.query.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current
+##kylin.query.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current
+#
+#### QUERY PUSH DOWN ###
+#
+##kylin.query.pushdown.runner-class-name=org.apache.kylin.query.pushdown.PushDownRunnerSparkImpl
+##kylin.query.pushdown.update-enabled=false
+
+kylin.env=QA
+kylin.server.mode=all
+kylin.server.host-address=127.0.0.1:7777
+# Display timezone on UI,format like[GMT+N or GMT-N]
+kylin.web.timezone=GMT+8
+
+kylin.source.hive.client=cli
+kylin.source.hive.database-for-flat-table=kylin4
+
+kylin.engine.spark-conf.spark.eventLog.enabled=true
+kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs://localhost:9000/kylin4/spark-history
+kylin.engine.spark-conf.spark.eventLog.dir=hdfs://localhost:9000/kylin4/spark-history
+kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false
+
+kylin.engine.spark-conf.spark.yarn.submit.file.replication=1
+kylin.engine.spark-conf.spark.master=yarn
+kylin.engine.spark-conf.spark.driver.memory=512M
+kylin.engine.spark-conf.spark.driver.memoryOverhead=512M
+kylin.engine.spark-conf.spark.executor.memory=1G
+kylin.engine.spark-conf.spark.executor.instances=1
+kylin.engine.spark-conf.spark.executor.memoryOverhead=512M
+kylin.engine.spark-conf.spark.executor.cores=1
+kylin.engine.spark-conf.spark.sql.shuffle.partitions=1
+kylin.engine.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
+
+kylin.storage.columnar.shard-rowcount=2500000
+kylin.storage.columnar.shard-countdistinct-rowcount=1000000
+kylin.storage.columnar.repartition-threshold-size-mb=128
+kylin.storage.columnar.shard-size-mb=128
+
+kylin.query.auto-sparder-context=true
+kylin.query.spark-conf.spark.master=yarn
+kylin.query.spark-conf.spark.driver.memory=512M
+kylin.query.spark-conf.spark.driver.memoryOverhead=512M
+kylin.query.spark-conf.spark.executor.memory=1G
+kylin.query.spark-conf.spark.executor.instances=1
+kylin.query.spark-conf.spark.executor.memoryOverhead=512M
+kylin.query.spark-conf.spark.executor.cores=1
+kylin.query.spark-conf.spark.serializer=org.apache.spark.serializer.JavaSerializer
+kylin.query.spark-conf.spark.sql.shuffle.partitions=1
+kylin.query.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
+kylin.query.spark-conf.spark.eventLog.enabled=true
+kylin.query.spark-conf.spark.history.fs.logDirectory=hdfs://localhost:9000/kylin4/spark-history
+kylin.query.spark-conf.spark.eventLog.dir=hdfs://localhost:9000/kylin4/spark-history
+
+# for local cache
+kylin.query.cache-enabled=false
+
+# for pushdown query
+kylin.query.pushdown.update-enabled=false
+kylin.query.pushdown.enabled=true
+kylin.query.pushdown.runner-class-name=org.apache.kylin.query.pushdown.PushDownRunnerSparkImpl
+
+# for Cube Planner
+kylin.cube.cubeplanner.enabled=false
+kylin.server.query-metrics2-enabled=false
+kylin.metrics.reporter-query-enabled=false
+kylin.metrics.reporter-job-enabled=false
+kylin.metrics.monitor-enabled=false
+kylin.web.dashboard-enabled=false
+kylin.web.set-config-enable=true
+
+# metadata for mysql
+kylin.metadata.url=kylin4@jdbc,url=jdbc:mysql://localhost:3306/kylin4,username=root,password=123456,maxActive=10,maxIdle=10
+kylin.env.hdfs-working-dir=/kylin4_metadata
+kylin.env.zookeeper-base-path=/kylin4
+kylin.env.zookeeper-connect-string=127.0.0.1
diff --git a/docker/conf/spark/spark-defaults.conf
b/docker/conf/spark/spark-defaults.conf
new file mode 100644
index 0000000..dac2e3c
--- /dev/null
+++ b/docker/conf/spark/spark-defaults.conf
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default system properties included when running spark-submit.
+# This is useful for setting default environmental settings.
+
+# Example:
+# spark.master spark://master:7077
+# spark.eventLog.enabled true
+# spark.eventLog.dir hdfs://namenode:8021/directory
+# spark.serializer org.apache.spark.serializer.KryoSerializer
+# spark.driver.memory 5g
+# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value
-Dnumbers="one two three"
+
+spark.sql.catalogImplementation hive
+spark.driver.maxResultSize 1g
+spark.sql.hive.thriftServer.singleSession false
+
+spark.serializer
org.apache.spark.serializer.JavaSerializer
+
+spark.memory.useLegacyMode false
+spark.memory.fraction 0.3
+spark.memory.storageFraction 0.3
+
+spark.rdd.compress true
+spark.io.compression.codec snappy
+
+spark.locality.wait 100ms
+spark.speculation false
+
+spark.task.maxFailures 4
+
+spark.scheduler.minRegisteredResourcesRatio 1.0
+spark.scheduler.maxRegisteredResourcesWaitingTime 60s
+
+spark.yarn.jars hdfs://localhost:9000/spark2_jars/*
+
+
+
+
+
diff --git a/docker/conf/spark/spark-env.sh b/docker/conf/spark/spark-env.sh
new file mode 100755
index 0000000..3c5837d
--- /dev/null
+++ b/docker/conf/spark/spark-env.sh
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This file is sourced when running various Spark programs.
+# Copy it as spark-env.sh and edit that to configure Spark for your site.
+
+# Options read when launching programs locally with
+# ./bin/run-example or ./bin/spark-submit
+# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
+# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
+
+# Options read by executors and drivers running inside the cluster
+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
+# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
+# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and
RDD data
+# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
+
+# Options read in YARN client/cluster mode
+# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf)
+# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
+# - YARN_CONF_DIR, to point Spark towards YARN configuration files when you
use YARN
+# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
+# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
+# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
+
+# Options for the daemons used in the standalone deploy mode
+# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname
+# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for
the master
+# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g.
"-Dx=y")
+# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
+# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give
executors (e.g. 1000m, 2g)
+# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for
the worker
+# - SPARK_WORKER_DIR, to set the working directory of worker processes
+# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g.
"-Dx=y")
+# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server
themselves (default: 1g).
+# - SPARK_HISTORY_OPTS, to set config properties only for the history server
(e.g. "-Dx=y")
+# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle
service (e.g. "-Dx=y")
+# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g.
"-Dx=y")
+# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons
+# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
+
+# Generic options for the daemons used in the standalone deploy mode
+# - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf)
+# - SPARK_LOG_DIR Where log files are stored. (Default:
${SPARK_HOME}/logs)
+# - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp)
+# - SPARK_IDENT_STRING A string representing this instance of spark.
(Default: $USER)
+# - SPARK_NICENESS The scheduling priority for daemons. (Default: 0)
+# - SPARK_NO_DAEMONIZE Run the proposed command in the foreground. It will
not output a PID file.
+# Options for native BLAS, like Intel MKL, OpenBLAS, and so on.
+# You might get better performance to enable these options if using native
BLAS (see SPARK-21305).
+# - MKL_NUM_THREADS=1 Disable multi-threading of Intel MKL
+# - OPENBLAS_NUM_THREADS=1 Disable multi-threading of OpenBLAS
+
+export JAVA_HOME=/home/admin/jdk1.8.0_141
+export CLASSPATH=.:$JAVA_HOME/lib
+export JAVA_LIBRARY_PATH=$JAVA_LIBRARY_PATH:/home/admin/hadoop-2.7.0/lib/native
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/admin/hadoop-2.7.0/lib/native
+
+export SPARK_PID_DIR=${SPARK_HOME}/
+
diff --git a/docker/conf/zk/zoo.cfg b/docker/conf/zk/zoo.cfg
new file mode 100644
index 0000000..1a576de
--- /dev/null
+++ b/docker/conf/zk/zoo.cfg
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# The number of milliseconds of each tick
+tickTime=2000
+# The number of ticks that the initial
+# synchronization phase can take
+initLimit=10
+# The number of ticks that can pass between
+# sending a request and getting an acknowledgement
+syncLimit=5
+# the directory where the snapshot is stored.
+# do not use /tmp for storage, /tmp here is just
+# example sakes.
+dataDir=/data/zookeeper
+# the port at which the clients will connect
+clientPort=2181
+# the maximum number of client connections.
+# increase this if you need to handle more clients
+#maxClientCnxns=60
+#
+# Be sure to read the maintenance section of the
+# administrator guide before turning on autopurge.
+#
+# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
+#
+# The number of snapshots to retain in dataDir
+#autopurge.snapRetainCount=3
+# Purge task interval in hours
+# Set to "0" to disable auto purge feature
+#autopurge.purgeInterval=1
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
index edeefec..31664f7 100644
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -16,19 +16,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-echo "127.0.0.1 sandbox.hortonworks.com" >> /etc/hosts
+echo "127.0.0.1 sandbox sandbox.hortonworks.com" >> /etc/hosts
+
+# clean pid files
+rm -f /tmp/*.pid
# start mysql
-service mysqld start
-mysqladmin -uroot password 123456
-mysql -uroot -p123456 -e "grant all privileges on root.* to root@'%'
identified by '123456';"
+if [ ! -f "/home/admin/first_run" ]
+then
+ service mysqld start
+ mysqladmin -uroot password 123456
+ mysql -uroot -p123456 -e "CREATE DATABASE IF NOT EXISTS kylin4 default
charset utf8 COLLATE utf8_general_ci;"
+ mysql -uroot -p123456 -e "grant all privileges on root.* to root@'%'
identified by '123456';"
+fi
+service mysqld restart
# start hdfs
if [ ! -f "/home/admin/first_run" ]
then
hdfs namenode -format
fi
-touch /home/admin/first_run
$HADOOP_HOME/sbin/hadoop-daemon.sh start namenode
$HADOOP_HOME/sbin/hadoop-daemon.sh start datanode
@@ -39,33 +46,42 @@ $HADOOP_HOME/sbin/yarn-daemon.sh start nodemanager
# start mr jobhistory
$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver
-# start hbase
+# start zk
rm -rf /data/zookeeper/*
-$HBASE_HOME/bin/start-hbase.sh
+rm -f /data/zookeeper/zookeeper_server.pid
+$ZK_HOME/bin/zkServer.sh start
# start kafka
-rm -rf /tmp/kafka-logs
-nohup $KAFKA_HOME/bin/kafka-server-start.sh
$KAFKA_HOME/config/server.properties &
+# rm -rf /tmp/kafka-logs
+# nohup $KAFKA_HOME/bin/kafka-server-start.sh
$KAFKA_HOME/config/server.properties &
+
+sleep 10s
+mkdir -p ${KYLIN_HOME}/logs
+# check hive usability first, this operation will insert one version record
into VERSION table
+$KYLIN_HOME/bin/check-hive-usability.sh > ${KYLIN_HOME}/logs/kylin-verbose.log
2>&1
-# start livy
-hdfs dfs -mkdir -p /kylin/livy
-hdfs dfs -put -f $HBASE_HOME/lib/hbase-client-$HBASE_VERSION.jar
hdfs://localhost:9000/kylin/livy/
-hdfs dfs -put -f $HBASE_HOME/lib/hbase-common-$HBASE_VERSION.jar
hdfs://localhost:9000/kylin/livy/
-hdfs dfs -put -f $HBASE_HOME/lib/hbase-hadoop-compat-$HBASE_VERSION.jar
hdfs://localhost:9000/kylin/livy/
-hdfs dfs -put -f $HBASE_HOME/lib/hbase-hadoop2-compat-$HBASE_VERSION.jar
hdfs://localhost:9000/kylin/livy/
-hdfs dfs -put -f $HBASE_HOME/lib/hbase-server-$HBASE_VERSION.jar
hdfs://localhost:9000/kylin/livy/
-hdfs dfs -put -f $HBASE_HOME/lib/htrace-core-*-incubating.jar
hdfs://localhost:9000/kylin/livy/
-hdfs dfs -put -f $HBASE_HOME/lib/metrics-core-*.jar
hdfs://localhost:9000/kylin/livy/
-hdfs dfs -put -f $KYLIN_HOME/lib/kylin-job-$KYLIN_VERSION.jar
hdfs://localhost:9000/kylin/livy/
-$LIVY_HOME/bin/livy-server start
+if [ ! -f "/home/admin/first_run" ]
+then
+ hdfs dfs -mkdir -p /kylin4/spark-history
+ hdfs dfs -mkdir -p /spark2_jars
+ hdfs dfs -put -f $SPARK_HOME/jars/* hdfs://localhost:9000/spark2_jars/
+fi
# prepare kafka topic and data
-$KAFKA_HOME/bin/kafka-topics.sh --create --zookeeper localhost:2181
--replication-factor 1 --partitions 3 --topic kylin_streaming_topic
-nohup $KYLIN_HOME/bin/kylin.sh
org.apache.kylin.source.kafka.util.KafkaSampleProducer --topic
kylin_streaming_topic --broker localhost:9092 < /dev/null 2>&1 >
/tmp/kafka-sample.log &
-# create sample cube
-sh $KYLIN_HOME/bin/sample.sh
+# if [ ! -f "/home/admin/first_run" ]
+# then
+# $KAFKA_HOME/bin/kafka-topics.sh --create --zookeeper localhost:2181
--replication-factor 1 --partitions 3 --topic kylin_streaming_topic
+# fi
+
+# create sample data at the first time
+if [ ! -f "/home/admin/first_run" ]
+then
+ sh $KYLIN_HOME/bin/sample.sh >> ${KYLIN_HOME}/logs/kylin-verbose.log 2>&1
+fi
+touch /home/admin/first_run
+sleep 10s
# start kylin
-$KYLIN_HOME/bin/kylin.sh start
+$KYLIN_HOME/bin/kylin.sh -v start >> ${KYLIN_HOME}/logs/kylin-verbose.log 2>&1
while :
do
diff --git a/docker/run_container.sh b/docker/run_container.sh
index 8f03d75..3ed32ce 100755
--- a/docker/run_container.sh
+++ b/docker/run_container.sh
@@ -22,6 +22,5 @@ docker run -d \
-p 50070:50070 \
-p 8032:8032 \
-p 8042:8042 \
--p 16010:16010 \
--p 8998:8998 \
-apache-kylin-standalone
\ No newline at end of file
+-p 2181:2181 \
+apachekylin/apache-kylin-standalone:4.0.0-alpha