This is an automated email from the ASF dual-hosted git repository.
xxyu pushed a commit to branch kylin-on-parquet-v2
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/kylin-on-parquet-v2 by this
push:
new e1f7157 KYLIN-4913 Update docker image for Kylin 4.0 Beta
e1f7157 is described below
commit e1f7157dd5dd52b8f6e3e76d7dc873fe9e973f67
Author: Zhichao Zhang <[email protected]>
AuthorDate: Wed Feb 24 19:42:09 2021 +0800
KYLIN-4913 Update docker image for Kylin 4.0 Beta
---
docker/build_standalone_image.sh | 4 +-
docker/dockerfile/standalone/Dockerfile | 15 +-
.../standalone}/build_standalone_image.sh | 4 +-
docker/dockerfile/standalone/conf/bin/kylin.sh | 504 +++++++++++++++++++++
.../standalone/conf/hadoop/capacity-scheduler.xml | 134 ++++++
.../dockerfile/standalone/conf/hive/hive-site.xml | 6 +-
.../standalone/conf/kylin/kylin.properties | 377 +++++++++++++++
.../standalone/conf/spark/spark-defaults.conf | 55 +++
.../dockerfile/standalone/conf/spark/spark-env.sh | 77 ++++
docker/dockerfile/standalone/conf/zk/zoo.cfg | 45 ++
docker/setup_standalone.sh | 2 +-
11 files changed, 1211 insertions(+), 12 deletions(-)
diff --git a/docker/build_standalone_image.sh b/docker/build_standalone_image.sh
index 9c0b925..749ebbc 100755
--- a/docker/build_standalone_image.sh
+++ b/docker/build_standalone_image.sh
@@ -23,5 +23,5 @@ echo "build image in dir "${DIR}
echo "start build Hadoop docker image"
-docker build -f Dockerfile_hadoop -t hadoop2.7-all-in-one-for-kylin4 .
-docker build -f Dockerfile -t apachekylin/apache-kylin-standalone:4.0.0-alpha .
+docker build -f Dockerfile_hadoop -t hadoop2.7-all-in-one-for-kylin4-beta .
+docker build -f Dockerfile -t apachekylin/apache-kylin-standalone:4.0.0-beta .
diff --git a/docker/dockerfile/standalone/Dockerfile
b/docker/dockerfile/standalone/Dockerfile
index a168e6c..1d1ee3b 100644
--- a/docker/dockerfile/standalone/Dockerfile
+++ b/docker/dockerfile/standalone/Dockerfile
@@ -16,17 +16,20 @@
#
# Docker image for apache kylin, based on the Hadoop image
-FROM hadoop2.7-all-in-one-for-kylin4
+FROM hadoop2.7-all-in-one-for-kylin4-beta
-ENV KYLIN_VERSION 4.0.0-alpha
-ENV KYLIN_HOME /home/admin/apache-kylin-$KYLIN_VERSION-bin-hadoop2
+ENV KYLIN_VERSION 4.0.0-beta
+ENV KYLIN_HOME /home/admin/apache-kylin-$KYLIN_VERSION-bin
# Download Kylin
-RUN wget
https://archive.apache.org/dist/kylin/apache-kylin-$KYLIN_VERSION/apache-kylin-$KYLIN_VERSION-bin-hadoop2.tar.gz
\
- && tar -zxvf /home/admin/apache-kylin-$KYLIN_VERSION-bin-hadoop2.tar.gz \
- && rm -f /home/admin/apache-kylin-$KYLIN_VERSION-bin-hadoop2.tar.gz
+RUN wget
https://archive.apache.org/dist/kylin/apache-kylin-$KYLIN_VERSION/apache-kylin-$KYLIN_VERSION-bin.tar.gz
\
+ && tar -zxvf /home/admin/apache-kylin-$KYLIN_VERSION-bin.tar.gz \
+ && rm -f /home/admin/apache-kylin-$KYLIN_VERSION-bin.tar.gz
RUN rm -f $KYLIN_HOME/conf/kylin.properties
COPY conf/kylin/* $KYLIN_HOME/conf/
+RUN rm -f $KYLIN_HOME/bin/kylin.sh
+COPY conf/bin/kylin.sh $KYLIN_HOME/bin/
+RUN chmod +x $KYLIN_HOME/bin/kylin.sh
RUN cp $HIVE_HOME/lib/mysql-connector-java-5.1.24.jar $KYLIN_HOME/lib/
RUN sed -i "s/hbase/java/g" $KYLIN_HOME/bin/set-java-home.sh
diff --git a/docker/build_standalone_image.sh
b/docker/dockerfile/standalone/build_standalone_image.sh
similarity index 97%
copy from docker/build_standalone_image.sh
copy to docker/dockerfile/standalone/build_standalone_image.sh
index 9c0b925..749ebbc 100755
--- a/docker/build_standalone_image.sh
+++ b/docker/dockerfile/standalone/build_standalone_image.sh
@@ -23,5 +23,5 @@ echo "build image in dir "${DIR}
echo "start build Hadoop docker image"
-docker build -f Dockerfile_hadoop -t hadoop2.7-all-in-one-for-kylin4 .
-docker build -f Dockerfile -t apachekylin/apache-kylin-standalone:4.0.0-alpha .
+docker build -f Dockerfile_hadoop -t hadoop2.7-all-in-one-for-kylin4-beta .
+docker build -f Dockerfile -t apachekylin/apache-kylin-standalone:4.0.0-beta .
diff --git a/docker/dockerfile/standalone/conf/bin/kylin.sh
b/docker/dockerfile/standalone/conf/bin/kylin.sh
new file mode 100755
index 0000000..a691cec
--- /dev/null
+++ b/docker/dockerfile/standalone/conf/bin/kylin.sh
@@ -0,0 +1,504 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# set verbose=true to print more logs during start up
+
+
+
+
+source ${KYLIN_HOME:-"$(cd -P -- "$(dirname -- "$0")" && pwd
-P)/../"}/bin/header.sh $@
+if [ "$verbose" = true ]; then
+ shift
+fi
+
+mkdir -p ${KYLIN_HOME}/logs
+mkdir -p ${KYLIN_HOME}/ext
+
+source ${dir}/set-java-home.sh
+
+function retrieveDependency() {
+ #retrive $hive_dependency and $hbase_dependency
+ if [[ -z $reload_dependency && `ls -1 ${dir}/cached-* 2>/dev/null | wc -l`
-eq 6 ]]
+ then
+ echo "Using cached dependency..."
+ source ${dir}/cached-hive-dependency.sh
+ #retrive $hbase_dependency
+ metadataUrl=`${dir}/get-properties.sh kylin.metadata.url`
+ if [[ "${metadataUrl##*@}" == "hbase" ]]
+ then
+ source ${dir}/cached-hbase-dependency.sh
+ fi
+ source ${dir}/cached-hadoop-conf-dir.sh
+ # source ${dir}/cached-kafka-dependency.sh
+ source ${dir}/cached-spark-dependency.sh
+ # source ${dir}/cached-flink-dependency.sh
+ else
+ source ${dir}/find-hive-dependency.sh
+ #retrive $hbase_dependency
+ metadataUrl=`${dir}/get-properties.sh kylin.metadata.url`
+ if [[ "${metadataUrl##*@}" == "hbase" ]]
+ then
+ source ${dir}/find-hbase-dependency.sh
+ fi
+ source ${dir}/find-hadoop-conf-dir.sh
+ # source ${dir}/find-kafka-dependency.sh
+ source ${dir}/find-spark-dependency.sh
+ # source ${dir}/find-flink-dependency.sh
+ fi
+
+ # Replace jars for different hadoop dist
+ bash ${dir}/replace-jars-under-spark.sh
+
+ # get hdp_version
+ if [ -z "${hdp_version}" ]; then
+ hdp_version=`/bin/bash -x hadoop 2>&1 | sed -n "s/\(.*\)export
HDP_VERSION=\(.*\)/\2/"p`
+ verbose "hdp_version is ${hdp_version}"
+ fi
+
+ # Replace jars for HDI
+ KYLIN_SPARK_JARS_HOME="${KYLIN_HOME}/spark/jars"
+ if [[ -d "/usr/hdp/current/hdinsight-zookeeper" && $hdp_version == "2"* ]]
+ then
+ echo "The current Hadoop environment is HDI3, will replace some jars
package for ${KYLIN_HOME}/spark/jars"
+ if [[ -f ${KYLIN_HOME}/tomcat/webapps/kylin.war ]]
+ then
+ if [[ ! -d ${KYLIN_HOME}/tomcat/webapps/kylin ]]
+ then
+ mkdir ${KYLIN_HOME}/tomcat/webapps/kylin
+ fi
+ mv ${KYLIN_HOME}/tomcat/webapps/kylin.war
${KYLIN_HOME}/tomcat/webapps/kylin
+ cd ${KYLIN_HOME}/tomcat/webapps/kylin
+ jar -xf ${KYLIN_HOME}/tomcat/webapps/kylin/kylin.war
+ if [[ -f
${KYLIN_HOME}/tomcat/webapps/kylin/WEB-INF/lib/guava-14.0.jar ]]
+ then
+ echo "Remove
${KYLIN_HOME}/tomcat/webapps/kylin/WEB-INF/lib/guava-14.0.jar to avoid version
conflicts"
+ rm -rf
${KYLIN_HOME}/tomcat/webapps/kylin/WEB-INF/lib/guava-14.0.jar
+ rm -rf ${KYLIN_HOME}/tomcat/webapps/kylin/kylin.war
+ cd ${KYLIN_HOME}/
+ fi
+ fi
+
+ if [[ -d "${KYLIN_SPARK_JARS_HOME}" ]]
+ then
+ if [[ -f ${KYLIN_HOME}/hdi3_spark_jars_flag ]]
+ then
+ echo "Required jars have been added to ${KYLIN_HOME}/spark/jars,
skip this step."
+ else
+ rm -rf ${KYLIN_HOME}/spark/jars/hadoop-*
+ cp /usr/hdp/current/spark2-client/jars/hadoop-*
$KYLIN_SPARK_JARS_HOME
+ cp /usr/hdp/current/spark2-client/jars/azure-*
$KYLIN_SPARK_JARS_HOME
+ cp
/usr/hdp/current/hadoop-client/lib/microsoft-log4j-etwappender-1.0.jar
$KYLIN_SPARK_JARS_HOME
+ cp
/usr/hdp/current/hadoop-client/lib/hadoop-lzo-0.6.0.${hdp_version}.jar
$KYLIN_SPARK_JARS_HOME
+
+ rm -rf $KYLIN_HOME/spark/jars/guava-14.0.1.jar
+ cp /usr/hdp/current/spark2-client/jars/guava-24.1.1-jre.jar
$KYLIN_SPARK_JARS_HOME
+
+ echo "Upload spark jars to HDFS"
+ hdfs dfs -test -d /spark2_jars
+ if [ $? -eq 1 ]
+ then
+ hdfs dfs -mkdir /spark2_jars
+ fi
+ hdfs dfs -put $KYLIN_SPARK_JARS_HOME/* /spark2_jars
+
+ touch ${KYLIN_HOME}/hdi3_spark_jars_flag
+ fi
+ else
+ echo "${KYLIN_HOME}/spark/jars dose not exist. You can run
${KYLIN_HOME}/download-spark.sh to download spark."
+ fi
+ fi
+
+ tomcat_root=${dir}/../tomcat
+ export tomcat_root
+
+ # get KYLIN_REST_ADDRESS
+ if [ -z "$KYLIN_REST_ADDRESS" ]
+ then
+ KYLIN_REST_ADDRESS=`hostname -f`":"`grep "<Connector port="
${tomcat_root}/conf/server.xml |grep protocol=\"HTTP/1.1\" | cut -d '=' -f 2 |
cut -d \" -f 2`
+ export KYLIN_REST_ADDRESS
+ verbose "KYLIN_REST_ADDRESS is ${KYLIN_REST_ADDRESS}"
+ fi
+ # the number of Spring active profiles can be greater than 1. Additional
profiles
+ # can be added by setting kylin.security.additional-profiles
+ additional_security_profiles=`bash ${dir}/get-properties.sh
kylin.security.additional-profiles`
+ if [[ "x${additional_security_profiles}" != "x" ]]; then
+ spring_profile="${spring_profile},${additional_security_profiles}"
+ fi
+
+ # compose hadoop_dependencies
+ hadoop_dependencies=${hadoop_dependencies}:`hadoop classpath`
+# if [ -n "${hbase_dependency}" ]; then
+# hadoop_dependencies=${hadoop_dependencies}:${hbase_dependency}
+# fi
+ if [ -n "${hive_dependency}" ]; then
+ #hadoop_dependencies=${hadoop_dependencies}:${hive_dependency}
+ hadoop_dependencies=${hive_dependency}:${hadoop_dependencies}
+ fi
+ if [ -n "${kafka_dependency}" ]; then
+ hadoop_dependencies=${hadoop_dependencies}:${kafka_dependency}
+ fi
+ if [ -n "${spark_dependency}" ]; then
+ #hadoop_dependencies=${hadoop_dependencies}:${spark_dependency}
+ hadoop_dependencies=${spark_dependency}:${hadoop_dependencies}
+ fi
+
+ # compose KYLIN_TOMCAT_CLASSPATH
+
tomcat_classpath=${tomcat_root}/bin/bootstrap.jar:${tomcat_root}/bin/tomcat-juli.jar:${tomcat_root}/lib/*
+ export
KYLIN_TOMCAT_CLASSPATH=${tomcat_classpath}:${KYLIN_HOME}/conf:${KYLIN_HOME}/lib/*:${KYLIN_HOME}/ext/*:${hadoop_dependencies}:${flink_dependency}
+
+ # compose KYLIN_TOOL_CLASSPATH
+ export
KYLIN_TOOL_CLASSPATH=${KYLIN_HOME}/conf:${KYLIN_HOME}/tool/*:${KYLIN_HOME}/ext/*:${hadoop_dependencies}
+
+ # compose kylin_common_opts
+ kylin_common_opts="-Dkylin.hive.dependency=${hive_dependency} \
+ -Dkylin.kafka.dependency=${kafka_dependency} \
+ -Dkylin.hadoop.conf.dir=${kylin_hadoop_conf_dir} \
+ -Dkylin.server.host-address=${KYLIN_REST_ADDRESS} \
+ -Dspring.profiles.active=${spring_profile} \
+ -Dhdp.version=${hdp_version}"
+
+ # compose KYLIN_TOMCAT_OPTS
+
KYLIN_TOMCAT_OPTS="-Dlog4j.configuration=file:${KYLIN_HOME}/conf/kylin-server-log4j.properties
\
+ -Djava.util.logging.manager=org.apache.juli.ClassLoaderLogManager \
+ -Dorg.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH=true \
+ -Dorg.apache.catalina.connector.CoyoteAdapter.ALLOW_BACKSLASH=true \
+ -Djava.endorsed.dirs=${tomcat_root}/endorsed \
+ -Dcatalina.base=${tomcat_root} \
+ -Dcatalina.home=${tomcat_root} \
+ -Djava.io.tmpdir=${tomcat_root}/temp ${kylin_common_opts}"
+ export KYLIN_TOMCAT_OPTS
+
+ # compose KYLIN_TOOL_OPTS
+
KYLIN_TOOL_OPTS="-Dlog4j.configuration=file:${KYLIN_HOME}/conf/kylin-tools-log4j.properties
${kylin_common_opts}"
+ export KYLIN_TOOL_OPTS
+}
+
+function checkBasicKylinProps() {
+ spring_profile=`${dir}/get-properties.sh kylin.security.profile`
+ if [ -z "$spring_profile" ]
+ then
+ quit 'Please set kylin.security.profile in kylin.properties, options
are: testing, ldap, saml.'
+ else
+ verbose "kylin.security.profile is $spring_profile"
+ fi
+}
+
+function prepareFairScheduler() {
+ cat > ${KYLIN_HOME}/conf/fairscheduler.xml <<EOL
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<allocations>
+ <pool name="query_pushdown">
+ <schedulingMode>FAIR</schedulingMode>
+ <weight>1</weight>
+ <minShare>1</minShare>
+ </pool>
+ <pool name="heavy_tasks">
+ <schedulingMode>FAIR</schedulingMode>
+ <weight>5</weight>
+ <minShare>1</minShare>
+ </pool>
+ <pool name="lightweight_tasks">
+ <schedulingMode>FAIR</schedulingMode>
+ <weight>10</weight>
+ <minShare>1</minShare>
+ </pool>
+ <pool name="vip_tasks">
+ <schedulingMode>FAIR</schedulingMode>
+ <weight>15</weight>
+ <minShare>1</minShare>
+ </pool>
+</allocations>
+EOL
+}
+
+function checkRestPort() {
+ kylin_rest_address_arr=(${KYLIN_REST_ADDRESS//:/ })
+ inuse=`netstat -tlpn | grep "\b${kylin_rest_address_arr[1]}\b"`
+ [[ -z ${inuse} ]] || quit "Port ${kylin_rest_address_arr[1]} is not
available. Another kylin server is running?"
+}
+
+
+function classpathDebug() {
+ if [ "${KYLIN_CLASSPATH_DEBUG}" != "" ]; then
+ echo "Finding ${KYLIN_CLASSPATH_DEBUG} on classpath" $@
+ $JAVA -classpath $@ org.apache.kylin.common.util.ClasspathScanner
${KYLIN_CLASSPATH_DEBUG}
+ fi
+}
+
+function runTool() {
+
+ retrieveDependency
+
+ # get KYLIN_EXTRA_START_OPTS
+ if [ -f "${KYLIN_HOME}/conf/setenv-tool.sh" ]; then
+ source ${KYLIN_HOME}/conf/setenv-tool.sh
+ fi
+
+ verbose "java opts for tool is ${KYLIN_EXTRA_START_OPTS}
${KYLIN_TOOL_OPTS}"
+ verbose "java classpath for tool is ${KYLIN_TOOL_CLASSPATH}"
+ classpathDebug ${KYLIN_TOOL_CLASSPATH}
+
+ exec $JAVA ${KYLIN_EXTRA_START_OPTS} ${KYLIN_TOOL_OPTS} -classpath
${KYLIN_TOOL_CLASSPATH} "$@"
+}
+
+if [ "$2" == "--reload-dependency" ]
+then
+ reload_dependency=1
+fi
+
+# start command
+if [ "$1" == "start" ]
+then
+ if [ -f "${KYLIN_HOME}/pid" ]
+ then
+ PID=`cat $KYLIN_HOME/pid`
+ if ps -p $PID > /dev/null
+ then
+ quit "Kylin is running, stop it first"
+ fi
+ fi
+
+ checkBasicKylinProps
+
+ source ${dir}/check-env.sh
+
+ retrieveDependency
+
+ checkRestPort
+
+ prepareFairScheduler
+
+ ${KYLIN_HOME}/bin/check-migration-acl.sh || { exit 1; }
+
+ # get KYLIN_EXTRA_START_OPTS
+ if [ -f "${KYLIN_HOME}/conf/setenv.sh" ]; then
+ source ${KYLIN_HOME}/conf/setenv.sh
+ fi
+
+ security_ldap_truststore=`bash ${dir}/get-properties.sh
kylin.security.ldap.connection-truststore`
+ if [ -f "${security_ldap_truststore}" ]; then
+ KYLIN_EXTRA_START_OPTS="$KYLIN_EXTRA_START_OPTS
-Djavax.net.ssl.trustStore=$security_ldap_truststore"
+ fi
+
+ verbose "java opts is ${KYLIN_EXTRA_START_OPTS} ${KYLIN_TOMCAT_OPTS}"
+ verbose "java classpath is ${KYLIN_TOMCAT_CLASSPATH}"
+ classpathDebug ${KYLIN_TOMCAT_CLASSPATH}
+ $JAVA ${KYLIN_EXTRA_START_OPTS} ${KYLIN_TOMCAT_OPTS} -classpath
${KYLIN_TOMCAT_CLASSPATH} org.apache.catalina.startup.Bootstrap start >>
${KYLIN_HOME}/logs/kylin.out 2>&1 & echo $! > ${KYLIN_HOME}/pid &
+
+ echo ""
+ echo "A new Kylin instance is started by $USER. To stop it, run 'kylin.sh
stop'"
+ echo "Check the log at ${KYLIN_HOME}/logs/kylin.log"
+ echo "Web UI is at http://${KYLIN_REST_ADDRESS}/kylin"
+ exit 0
+
+# run command
+elif [ "$1" == "run" ]
+then
+ retrieveStartCommand
+ ${start_command}
+
+# stop command
+elif [ "$1" == "stop" ]
+then
+ if [ -f "${KYLIN_HOME}/pid" ]
+ then
+ PID=`cat $KYLIN_HOME/pid`
+ WAIT_TIME=2
+ LOOP_COUNTER=10
+ if ps -p $PID > /dev/null
+ then
+ echo "Stopping Kylin: $PID"
+ kill $PID
+
+ for ((i=0; i<$LOOP_COUNTER; i++))
+ do
+ # wait to process stopped
+ sleep $WAIT_TIME
+ if ps -p $PID > /dev/null ; then
+ echo "Stopping in progress. Will check after $WAIT_TIME
secs again..."
+ continue;
+ else
+ break;
+ fi
+ done
+
+ # if process is still around, use kill -9
+ if ps -p $PID > /dev/null
+ then
+ echo "Initial kill failed, getting serious now..."
+ kill -9 $PID
+ sleep 1 #give kill -9 sometime to "kill"
+ if ps -p $PID > /dev/null
+ then
+ quit "Warning, even kill -9 failed, giving up! Sorry..."
+ fi
+ fi
+
+ # process is killed , remove pid file
+ rm -rf ${KYLIN_HOME}/pid
+ echo "Kylin with pid ${PID} has been stopped."
+ exit 0
+ else
+ quit "Kylin with pid ${PID} is not running"
+ fi
+ else
+ quit "Kylin is not running"
+ fi
+
+# streaming command
+elif [ "$1" == "streaming" ]
+then
+ if [ $# -lt 2 ]
+ then
+ echo "Invalid input args $@"
+ exit -1
+ fi
+ if [ "$2" == "start" ]
+ then
+ if [ -f "${KYLIN_HOME}/streaming_receiver_pid" ]
+ then
+ PID=`cat $KYLIN_HOME/streaming_receiver_pid`
+ if ps -p $PID > /dev/null
+ then
+ echo "Kylin streaming receiver is running, stop it first"
+ exit 1
+ fi
+ fi
+ #retrive $hbase_dependency
+ metadataUrl=`${dir}/get-properties.sh kylin.metadata.url`
+ if [[ "${metadataUrl##*@}" == "hbase" ]]
+ then
+ source ${dir}/find-hbase-dependency.sh
+ fi
+ #retrive $KYLIN_EXTRA_START_OPTS
+ if [ -f "${KYLIN_HOME}/conf/setenv.sh" ]
+ then source ${KYLIN_HOME}/conf/setenv.sh
+ fi
+
+ mkdir -p ${KYLIN_HOME}/ext
+ HBASE_CLASSPATH=`hbase classpath`
+ #echo "hbase class path:"$HBASE_CLASSPATH
+
STREAM_CLASSPATH=${KYLIN_HOME}/lib/streaming/*:${KYLIN_HOME}/ext/*:${HBASE_CLASSPATH}
+
+ # KYLIN_EXTRA_START_OPTS is for customized settings, checkout
bin/setenv.sh
+ $JAVA -cp $STREAM_CLASSPATH ${KYLIN_EXTRA_START_OPTS} \
+ -Dlog4j.configuration=stream-receiver-log4j.properties\
+ -DKYLIN_HOME=${KYLIN_HOME}\
+ -Dkylin.hbase.dependency=${hbase_dependency} \
+ org.apache.kylin.stream.server.StreamingReceiver $@ >
${KYLIN_HOME}/logs/streaming_receiver.out 2>&1 & echo $! >
${KYLIN_HOME}/streaming_receiver_pid &
+ exit 0
+ elif [ "$2" == "stop" ]
+ then
+ if [ ! -f "${KYLIN_HOME}/streaming_receiver_pid" ]
+ then
+ echo "Streaming receiver is not running, please check"
+ exit 1
+ fi
+ PID=`cat ${KYLIN_HOME}/streaming_receiver_pid`
+ if [ "$PID" = "" ]
+ then
+ echo "Streaming receiver is not running, please check"
+ exit 1
+ else
+ echo "Stopping streaming receiver: $PID"
+ WAIT_TIME=2
+ LOOP_COUNTER=20
+ if ps -p $PID > /dev/null
+ then
+ kill $PID
+
+ for ((i=0; i<$LOOP_COUNTER; i++))
+ do
+ # wait to process stopped
+ sleep $WAIT_TIME
+ if ps -p $PID > /dev/null ; then
+ echo "Stopping in progress. Will check after
$WAIT_TIME secs again..."
+ continue;
+ else
+ break;
+ fi
+ done
+
+ # if process is still around, use kill -9
+ if ps -p $PID > /dev/null
+ then
+ echo "Initial kill failed, getting serious now..."
+ kill -9 $PID
+ sleep 1 #give kill -9 sometime to "kill"
+ if ps -p $PID > /dev/null
+ then
+ quit "Warning, even kill -9 failed, giving up! Sorry..."
+ fi
+ fi
+
+ # process is killed , remove pid file
+ rm -rf ${KYLIN_HOME}/streaming_receiver_pid
+ echo "Kylin streaming receiver with pid ${PID} has been
stopped."
+ exit 0
+ else
+ quit "Kylin streaming receiver with pid ${PID} is not running"
+ fi
+ fi
+ elif [[ "$2" = org.apache.kylin.* ]]
+ then
+ source ${KYLIN_HOME}/conf/setenv.sh
+ HBASE_CLASSPATH=`hbase classpath`
+ #echo "hbase class path:"$HBASE_CLASSPATH
+
STREAM_CLASSPATH=${KYLIN_HOME}/lib/streaming/*:${KYLIN_HOME}/ext/*:${HBASE_CLASSPATH}
+
+ shift
+ # KYLIN_EXTRA_START_OPTS is for customized settings, checkout
bin/setenv.sh
+ $JAVA -cp $STREAM_CLASSPATH ${KYLIN_EXTRA_START_OPTS} \
+ -Dlog4j.configuration=stream-receiver-log4j.properties\
+ -DKYLIN_HOME=${KYLIN_HOME}\
+ -Dkylin.hbase.dependency=${hbase_dependency} \
+ "$@"
+ exit 0
+ fi
+
+elif [ "$1" = "version" ]
+then
+ runTool org.apache.kylin.common.KylinVersion
+
+elif [ "$1" = "diag" ]
+then
+ echo "'kylin.sh diag' no longer supported, use diag.sh instead"
+ exit 0
+
+# tool command
+elif [[ "$1" = org.apache.kylin.* ]]
+then
+ runTool "$@"
+else
+ quit "Usage: 'kylin.sh [-v] start' or 'kylin.sh [-v] stop'"
+fi
diff --git a/docker/dockerfile/standalone/conf/hadoop/capacity-scheduler.xml
b/docker/dockerfile/standalone/conf/hadoop/capacity-scheduler.xml
new file mode 100644
index 0000000..8f016e2
--- /dev/null
+++ b/docker/dockerfile/standalone/conf/hadoop/capacity-scheduler.xml
@@ -0,0 +1,134 @@
+<!--
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. See accompanying LICENSE file.
+-->
+<configuration>
+
+ <property>
+ <name>yarn.scheduler.capacity.maximum-applications</name>
+ <value>4</value>
+ <description>
+ Maximum number of applications that can be pending and running.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
+ <value>0.5</value>
+ <description>
+ Maximum percent of resources in the cluster which can be used to run
+ application masters i.e. controls number of concurrent running
+ applications.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.resource-calculator</name>
+
<value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
+ <description>
+ The ResourceCalculator implementation to be used to compare
+ Resources in the scheduler.
+ The default i.e. DefaultResourceCalculator only uses Memory while
+ DominantResourceCalculator uses dominant-resource to compare
+ multi-dimensional resources such as Memory, CPU etc.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.root.queues</name>
+ <value>default</value>
+ <description>
+ The queues at the this level (root is the root queue).
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.root.default.capacity</name>
+ <value>100</value>
+ <description>Default queue target capacity.</description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
+ <value>1</value>
+ <description>
+ Default queue user limit a percentage from 0.0 to 1.0.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
+ <value>100</value>
+ <description>
+ The maximum capacity of the default queue.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.root.default.state</name>
+ <value>RUNNING</value>
+ <description>
+ The state of the default queue. State can be one of RUNNING or STOPPED.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
+ <value>*</value>
+ <description>
+ The ACL of who can submit jobs to the default queue.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
+ <value>*</value>
+ <description>
+ The ACL of who can administer jobs on the default queue.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.node-locality-delay</name>
+ <value>40</value>
+ <description>
+ Number of missed scheduling opportunities after which the
CapacityScheduler
+ attempts to schedule rack-local containers.
+ Typically this should be set to number of nodes in the cluster, By
default is setting
+ approximately number of nodes in one rack which is 40.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.queue-mappings</name>
+ <value></value>
+ <description>
+ A list of mappings that will be used to assign jobs to queues
+ The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
+ Typically this list will be used to map users to queues,
+ for example, u:%user:%user maps all users to queues with the same name
+ as the user.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.scheduler.capacity.queue-mappings-override.enable</name>
+ <value>false</value>
+ <description>
+ If a queue mapping is present, will it override the value specified
+ by the user? This can be used by administrators to place jobs in queues
+ that are different than the one specified by the user.
+ The default is false.
+ </description>
+ </property>
+
+</configuration>
diff --git a/docker/dockerfile/standalone/conf/hive/hive-site.xml
b/docker/dockerfile/standalone/conf/hive/hive-site.xml
index fc51985..589e40f 100644
--- a/docker/dockerfile/standalone/conf/hive/hive-site.xml
+++ b/docker/dockerfile/standalone/conf/hive/hive-site.xml
@@ -37,4 +37,8 @@
<value>123456</value>
<description>password to use against metastore database</description>
</property>
-</configuration>
\ No newline at end of file
+ <property>
+ <name>hive.metastore.schema.verification</name>
+ <value>false</value>
+ </property>
+</configuration>
diff --git a/docker/dockerfile/standalone/conf/kylin/kylin.properties
b/docker/dockerfile/standalone/conf/kylin/kylin.properties
new file mode 100644
index 0000000..280b846
--- /dev/null
+++ b/docker/dockerfile/standalone/conf/kylin/kylin.properties
@@ -0,0 +1,377 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+
+
+# The below commented values will effect as default settings
+# Uncomment and override them if necessary
+
+
+
+#
+#### METADATA | ENV ###
+#
+## The metadata store has two implementations(RDBMS/HBase), while RDBMS is
recommended in Kylin 4.X
+## Please refer to
https://cwiki.apache.org/confluence/display/KYLIN/How+to+use+HBase+metastore+in+Kylin+4.0
if you prefer HBase
+#kylin.metadata.url=kylin_metadata@jdbc,url=jdbc:mysql://localhost:3306/kylin,username=XXXX,password=XXXXXX,maxActive=10,maxIdle=10
+#
+## metadata cache sync retry times
+#kylin.metadata.sync-retries=3
+#
+## Working folder in HDFS, better be qualified absolute path, make sure user
has the right permission to this directory
+#kylin.env.hdfs-working-dir=/kylin
+#
+## DEV|QA|PROD. DEV will turn on some dev features, QA and PROD has no
difference in terms of functions.
+#kylin.env=QA
+#
+## kylin zk base path
+#kylin.env.zookeeper-base-path=/kylin
+#
+## Run a TestingServer for curator locally
+#kylin.env.zookeeper-is-local=false
+#
+## Connect to a remote zookeeper with the url, should set
kylin.env.zookeeper-is-local to false
+#kylin.env.zookeeper-connect-string=sandbox.hortonworks.com
+#
+#### SERVER | WEB | RESTCLIENT ###
+#
+## Kylin server mode, valid value [all, query, job]
+#kylin.server.mode=all
+#
+### Kylin server port
+#server.port=7070
+#
+## List of web servers in use, this enables one web server instance to sync up
with other servers.
+#kylin.server.cluster-servers=localhost:7070
+#
+## Display timezone on UI,format like[GMT+N or GMT-N]
+#kylin.web.timezone=
+#
+## Timeout value for the queries submitted through the Web UI, in milliseconds
+#kylin.web.query-timeout=300000
+#
+#kylin.web.cross-domain-enabled=true
+#
+##allow user to export query result
+#kylin.web.export-allow-admin=true
+#kylin.web.export-allow-other=true
+#
+## Hide measures in measure list of cube designer, separate by comma
+#kylin.web.hide-measures=RAW
+#
+##max connections of one route
+#kylin.restclient.connection.default-max-per-route=20
+#
+##max connections of one rest-client
+#kylin.restclient.connection.max-total=200
+#
+#### PUBLIC CONFIG ###
+#kylin.engine.default=6
+#kylin.storage.default=4
+#kylin.web.hive-limit=20
+#kylin.web.help.length=4
+#kylin.web.help.0=start|Getting
Started|http://kylin.apache.org/docs/tutorial/kylin_sample.html
+#kylin.web.help.1=odbc|ODBC
Driver|http://kylin.apache.org/docs/tutorial/odbc.html
+#kylin.web.help.2=tableau|Tableau
Guide|http://kylin.apache.org/docs/tutorial/tableau_91.html
+#kylin.web.help.3=onboard|Cube Design
Tutorial|http://kylin.apache.org/docs/howto/howto_optimize_cubes.html
+#kylin.web.link-streaming-guide=http://kylin.apache.org/
+#kylin.htrace.show-gui-trace-toggle=false
+#kylin.web.link-hadoop=
+#kylin.web.link-diagnostic=
+#kylin.web.contact-mail=
+#kylin.server.external-acl-provider=
+#
+## Default time filter for job list, 0->current day, 1->last one day, 2->last
one week, 3->last one year, 4->all
+#kylin.web.default-time-filter=1
+#
+#### SOURCE ###
+## Define how to access to hive metadata
+## When user deploy kylin on AWS EMR and Glue is used as external metadata,
use gluecatalog instead
+#kylin.source.hive.metadata-type=hcatalog
+#
+## Hive client, valid value [cli, beeline]
+#kylin.source.hive.client=cli
+#
+## Absolute path to beeline shell, can be set to spark beeline instead of the
default hive beeline on PATH
+#kylin.source.hive.beeline-shell=beeline
+#
+## Hive database name for putting the intermediate flat tables
+#kylin.source.hive.database-for-flat-table=default
+#
+#### STORAGE ###
+#
+## The storage for final cube file in hbase
+#kylin.storage.url=hbase
+#
+## clean real storage after delete operation
+## if you want to delete the real storage like htable of deleting segment, you
can set it to true
+#kylin.storage.clean-after-delete-operation=false
+#
+#### JOB ###
+#
+## Max job retry on error, default 0: no retry
+#kylin.job.retry=0
+#
+## Max count of concurrent jobs running
+#kylin.job.max-concurrent-jobs=10
+#
+## The percentage of the sampling, default 100%
+#kylin.job.sampling-percentage=100
+#
+## If true, will send email notification on job complete
+##kylin.job.notification-enabled=true
+##kylin.job.notification-mail-enable-starttls=true
+##kylin.job.notification-mail-host=smtp.office365.com
+##kylin.job.notification-mail-port=587
+##[email protected]
+##kylin.job.notification-mail-password=mypassword
+##[email protected]
+#kylin.job.scheduler.provider.100=org.apache.kylin.job.impl.curator.CuratorScheduler
+#kylin.job.scheduler.default=0
+#
+#### CUBE | DICTIONARY ###
+#
+#kylin.cube.cuboid-scheduler=org.apache.kylin.cube.cuboid.DefaultCuboidScheduler
+#kylin.cube.segment-advisor=org.apache.kylin.cube.CubeSegmentAdvisor
+#
+## 'auto', 'inmem', 'layer' or 'random' for testing
+#kylin.cube.algorithm=layer
+#
+## A smaller threshold prefers layer, a larger threshold prefers in-mem
+#kylin.cube.algorithm.layer-or-inmem-threshold=7
+#
+## auto use inmem algorithm:
+## 1, cube planner optimize job
+## 2, no source record
+#kylin.cube.algorithm.inmem-auto-optimize=true
+#
+#kylin.cube.aggrgroup.max-combination=32768
+#
+#kylin.cube.cubeplanner.enabled=false
+#kylin.cube.cubeplanner.enabled-for-existing-cube=false
+#kylin.cube.cubeplanner.expansion-threshold=15.0
+#kylin.cube.cubeplanner.recommend-cache-max-size=200
+#kylin.cube.cubeplanner.mandatory-rollup-threshold=1000
+#kylin.cube.cubeplanner.algorithm-threshold-greedy=8
+#kylin.cube.cubeplanner.algorithm-threshold-genetic=23
+#
+#### QUERY ###
+#
+## Controls the maximum number of bytes a query is allowed to scan storage.
+## The default value 0 means no limit.
+## The counterpart kylin.storage.partition.max-scan-bytes sets the maximum per
coprocessor.
+#kylin.query.max-scan-bytes=0
+#
+#kylin.query.cache-enabled=true
+#kylin.query.cache-threshold-scan-count=10240
+#kylin.query.cache-threshold-duration=2000
+#kylin.query.cache-threshold-scan-bytes=1048576
+#kylin.query.large-query-threshold=1000000
+#
+## Controls extras properties for Calcite jdbc driver
+## all extras properties should undder prefix
"kylin.query.calcite.extras-props."
+## case sensitive, default: true, to enable case insensitive set it to false
+## @see org.apache.calcite.config.CalciteConnectionProperty.CASE_SENSITIVE
+#kylin.query.calcite.extras-props.caseSensitive=true
+## how to handle unquoted identity, defualt: TO_UPPER, available options:
UNCHANGED, TO_UPPER, TO_LOWER
+## @see org.apache.calcite.config.CalciteConnectionProperty.UNQUOTED_CASING
+#kylin.query.calcite.extras-props.unquotedCasing=TO_UPPER
+## quoting method, default: DOUBLE_QUOTE, available options: DOUBLE_QUOTE,
BACK_TICK, BRACKET
+## @see org.apache.calcite.config.CalciteConnectionProperty.QUOTING
+#kylin.query.calcite.extras-props.quoting=DOUBLE_QUOTE
+## change SqlConformance from DEFAULT to LENIENT to enable group by ordinal
+## @see org.apache.calcite.sql.validate.SqlConformance.SqlConformanceEnum
+#kylin.query.calcite.extras-props.conformance=LENIENT
+#
+## TABLE ACL
+#kylin.query.security.table-acl-enabled=true
+#
+## Usually should not modify this
+#kylin.query.interceptors=org.apache.kylin.rest.security.TableInterceptor
+#
+#kylin.query.escape-default-keyword=false
+#
+## Usually should not modify this
+#kylin.query.transformers=org.apache.kylin.query.util.DefaultQueryTransformer,org.apache.kylin.query.util.KeywordDefaultDirtyHack
+#
+#### SECURITY ###
+#
+## Spring security profile, options: testing, ldap, saml
+## with "testing" profile, user can use pre-defined name/pwd like KYLIN/ADMIN
to login
+#kylin.security.profile=testing
+#
+## Admin roles in LDAP, for ldap and saml
+#kylin.security.acl.admin-role=admin
+#
+## LDAP authentication configuration
+#kylin.security.ldap.connection-server=ldap://ldap_server:389
+#kylin.security.ldap.connection-username=
+#kylin.security.ldap.connection-password=
+## When you use the customized CA certificate library for user authentication
based on LDAPs, you need to configure this item.
+## The value of this item will be added to the JVM parameter
javax.net.ssl.trustStore.
+#kylin.security.ldap.connection-truststore=
+#
+## LDAP user account directory;
+#kylin.security.ldap.user-search-base=
+#kylin.security.ldap.user-search-pattern=
+#kylin.security.ldap.user-group-search-base=
+#kylin.security.ldap.user-group-search-filter=(|(member={0})(memberUid={1}))
+#
+## LDAP service account directory
+#kylin.security.ldap.service-search-base=
+#kylin.security.ldap.service-search-pattern=
+#kylin.security.ldap.service-group-search-base=
+#
+### SAML configurations for SSO
+## SAML IDP metadata file location
+#kylin.security.saml.metadata-file=classpath:sso_metadata.xml
+#kylin.security.saml.metadata-entity-base-url=https://hostname/kylin
+#kylin.security.saml.keystore-file=classpath:samlKeystore.jks
+#kylin.security.saml.context-scheme=https
+#kylin.security.saml.context-server-name=hostname
+#kylin.security.saml.context-server-port=443
+#kylin.security.saml.context-path=/kylin
+#
+#### SPARK BUILD ENGINE CONFIGS ###
+#
+## Hadoop conf folder, will export this as "HADOOP_CONF_DIR" to run
spark-submit
+## This must contain site xmls of core, yarn, hive, and hbase in one folder
+##kylin.env.hadoop-conf-dir=/etc/hadoop/conf
+#
+## Spark conf (default is in spark/conf/spark-defaults.conf)
+#kylin.engine.spark-conf.spark.master=yarn
+##kylin.engine.spark-conf.spark.submit.deployMode=client
+#kylin.engine.spark-conf.spark.yarn.queue=default
+##kylin.engine.spark-conf.spark.executor.cores=1
+##kylin.engine.spark-conf.spark.executor.memory=4G
+##kylin.engine.spark-conf.spark.executor.instances=1
+##kylin.engine.spark-conf.spark.executor.memoryOverhead=1024M
+#kylin.engine.spark-conf.spark.driver.cores=1
+#kylin.engine.spark-conf.spark.driver.memory=1G
+#kylin.engine.spark-conf.spark.shuffle.service.enabled=true
+#kylin.engine.spark-conf.spark.eventLog.enabled=true
+#kylin.engine.spark-conf.spark.eventLog.dir=hdfs\:///kylin/spark-history
+#kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs\:///kylin/spark-history
+#kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false
+#kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dfile.encoding=UTF-8
-Dhdp.version=current -Dlog4j.configuration=spark-executor-log4j.properties
-Dlog4j.debug -Dkylin.hdfs.working.dir=${hdfs.working.dir}
-Dkylin.metadata.identifier=${kylin.metadata.url.identifier}
-Dkylin.spark.category=job -Dkylin.spark.project=${job.project}
-Dkylin.spark.identifier=${job.id} -Dkylin.spark.jobName=${job.stepId}
-Duser.timezone=${user.timezone}
+##kylin.engine.spark-conf.spark.sql.shuffle.partitions=1
+#
+## manually upload spark-assembly jar to HDFS and then set this property will
avoid repeatedly uploading jar at runtime
+##kylin.engine.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
+##kylin.engine.spark-conf.spark.io.compression.codec=org.apache.spark.io.SnappyCompressionCodec
+#
+## uncomment for HDP
+##kylin.engine.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current
+##kylin.engine.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current
+#
+#### SPARK QUERY ENGINE CONFIGS (a.k.a. Sparder Context) ###
+## Enlarge cores and memory to improve query performance in production env,
please check https://cwiki.apache.org/confluence/display/KYLIN/User+Manual+4.X
+#
+#kylin.query.spark-conf.spark.master=yarn
+##kylin.query.spark-conf.spark.submit.deployMode=client
+#kylin.query.spark-conf.spark.driver.cores=1
+#kylin.query.spark-conf.spark.driver.memory=4G
+#kylin.query.spark-conf.spark.driver.memoryOverhead=1G
+#kylin.query.spark-conf.spark.executor.cores=1
+#kylin.query.spark-conf.spark.executor.instances=1
+#kylin.query.spark-conf.spark.executor.memory=4G
+#kylin.query.spark-conf.spark.executor.memoryOverhead=1G
+#kylin.query.spark-conf.spark.serializer=org.apache.spark.serializer.JavaSerializer
+##kylin.query.spark-conf.spark.sql.shuffle.partitions=40
+##kylin.query.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
+#
+#kylin.query.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current
-Dlog4j.configuration=spark-executor-log4j.properties -Dlog4j.debug
-Dkylin.hdfs.working.dir=${kylin.env.hdfs-working-dir}
-Dkylin.metadata.identifier=${kylin.metadata.url.identifier}
-Dkylin.spark.category=sparder -Dkylin.spark.project=${job.project}
+## uncomment for HDP
+##kylin.query.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current
+##kylin.query.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current
+#
+#### QUERY PUSH DOWN ###
+#
+##kylin.query.pushdown.runner-class-name=org.apache.kylin.query.pushdown.PushDownRunnerSparkImpl
+##kylin.query.pushdown.update-enabled=false
+
+kylin.env=QA
+kylin.server.mode=all
+kylin.server.host-address=127.0.0.1:7070
+server.port=7070
+# Display timezone on UI,format like[GMT+N or GMT-N]
+kylin.web.timezone=GMT+8
+
+kylin.source.hive.client=cli
+kylin.source.hive.database-for-flat-table=kylin4
+
+kylin.engine.spark-conf.spark.eventLog.enabled=true
+kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs://localhost:9000/kylin4/spark-history
+kylin.engine.spark-conf.spark.eventLog.dir=hdfs://localhost:9000/kylin4/spark-history
+kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false
+
+kylin.engine.spark-conf.spark.yarn.submit.file.replication=1
+kylin.engine.spark-conf.spark.master=yarn
+kylin.engine.spark-conf.spark.driver.memory=512M
+kylin.engine.spark-conf.spark.driver.memoryOverhead=512M
+kylin.engine.spark-conf.spark.executor.memory=1G
+kylin.engine.spark-conf.spark.executor.instances=1
+kylin.engine.spark-conf.spark.executor.memoryOverhead=512M
+kylin.engine.spark-conf.spark.executor.cores=1
+kylin.engine.spark-conf.spark.sql.shuffle.partitions=1
+kylin.engine.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
+
+kylin.storage.columnar.shard-rowcount=2500000
+kylin.storage.columnar.shard-countdistinct-rowcount=1000000
+kylin.storage.columnar.repartition-threshold-size-mb=128
+kylin.storage.columnar.shard-size-mb=128
+
+kylin.query.auto-sparder-context=true
+kylin.query.sparder-context.app-name=sparder_on_docker
+kylin.query.spark-conf.spark.master=yarn
+kylin.query.spark-conf.spark.driver.memory=512M
+kylin.query.spark-conf.spark.driver.memoryOverhead=512M
+kylin.query.spark-conf.spark.executor.memory=1G
+kylin.query.spark-conf.spark.executor.instances=1
+kylin.query.spark-conf.spark.executor.memoryOverhead=512M
+kylin.query.spark-conf.spark.executor.cores=1
+kylin.query.spark-conf.spark.serializer=org.apache.spark.serializer.JavaSerializer
+kylin.query.spark-conf.spark.sql.shuffle.partitions=1
+kylin.query.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
+kylin.query.spark-conf.spark.eventLog.enabled=true
+kylin.query.spark-conf.spark.history.fs.logDirectory=hdfs://localhost:9000/kylin4/spark-history
+kylin.query.spark-conf.spark.eventLog.dir=hdfs://localhost:9000/kylin4/spark-history
+
+# for local cache
+kylin.query.cache-enabled=false
+
+# for pushdown query
+kylin.query.pushdown.update-enabled=false
+kylin.query.pushdown.enabled=true
+kylin.query.pushdown.runner-class-name=org.apache.kylin.query.pushdown.PushDownRunnerSparkImpl
+
+# for Cube Planner
+kylin.cube.cubeplanner.enabled=true
+kylin.server.query-metrics2-enabled=false
+kylin.metrics.reporter-query-enabled=false
+kylin.metrics.reporter-job-enabled=false
+kylin.metrics.monitor-enabled=false
+kylin.web.dashboard-enabled=false
+
+# metadata for mysql
+kylin.metadata.url=kylin4@jdbc,url=jdbc:mysql://localhost:3306/kylin4,username=root,password=123456,maxActive=10,maxIdle=10
+kylin.env.hdfs-working-dir=/kylin4_metadata
+kylin.env.zookeeper-base-path=/kylin4
+kylin.env.zookeeper-connect-string=127.0.0.1
+
+kylin.storage.clean-after-delete-operation=true
diff --git a/docker/dockerfile/standalone/conf/spark/spark-defaults.conf
b/docker/dockerfile/standalone/conf/spark/spark-defaults.conf
new file mode 100644
index 0000000..dac2e3c
--- /dev/null
+++ b/docker/dockerfile/standalone/conf/spark/spark-defaults.conf
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default system properties included when running spark-submit.
+# This is useful for setting default environmental settings.
+
+# Example:
+# spark.master spark://master:7077
+# spark.eventLog.enabled true
+# spark.eventLog.dir hdfs://namenode:8021/directory
+# spark.serializer org.apache.spark.serializer.KryoSerializer
+# spark.driver.memory 5g
+# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value
-Dnumbers="one two three"
+
+spark.sql.catalogImplementation hive
+spark.driver.maxResultSize 1g
+spark.sql.hive.thriftServer.singleSession false
+
+spark.serializer
org.apache.spark.serializer.JavaSerializer
+
+spark.memory.useLegacyMode false
+spark.memory.fraction 0.3
+spark.memory.storageFraction 0.3
+
+spark.rdd.compress true
+spark.io.compression.codec snappy
+
+spark.locality.wait 100ms
+spark.speculation false
+
+spark.task.maxFailures 4
+
+spark.scheduler.minRegisteredResourcesRatio 1.0
+spark.scheduler.maxRegisteredResourcesWaitingTime 60s
+
+spark.yarn.jars hdfs://localhost:9000/spark2_jars/*
+
+
+
+
+
diff --git a/docker/dockerfile/standalone/conf/spark/spark-env.sh
b/docker/dockerfile/standalone/conf/spark/spark-env.sh
new file mode 100755
index 0000000..3c5837d
--- /dev/null
+++ b/docker/dockerfile/standalone/conf/spark/spark-env.sh
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This file is sourced when running various Spark programs.
+# Copy it as spark-env.sh and edit that to configure Spark for your site.
+
+# Options read when launching programs locally with
+# ./bin/run-example or ./bin/spark-submit
+# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
+# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
+
+# Options read by executors and drivers running inside the cluster
+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
+# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
+# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and
RDD data
+# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
+
+# Options read in YARN client/cluster mode
+# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf)
+# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
+# - YARN_CONF_DIR, to point Spark towards YARN configuration files when you
use YARN
+# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
+# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
+# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
+
+# Options for the daemons used in the standalone deploy mode
+# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname
+# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for
the master
+# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g.
"-Dx=y")
+# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
+# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give
executors (e.g. 1000m, 2g)
+# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for
the worker
+# - SPARK_WORKER_DIR, to set the working directory of worker processes
+# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g.
"-Dx=y")
+# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server
themselves (default: 1g).
+# - SPARK_HISTORY_OPTS, to set config properties only for the history server
(e.g. "-Dx=y")
+# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle
service (e.g. "-Dx=y")
+# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g.
"-Dx=y")
+# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons
+# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
+
+# Generic options for the daemons used in the standalone deploy mode
+# - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf)
+# - SPARK_LOG_DIR Where log files are stored. (Default:
${SPARK_HOME}/logs)
+# - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp)
+# - SPARK_IDENT_STRING A string representing this instance of spark.
(Default: $USER)
+# - SPARK_NICENESS The scheduling priority for daemons. (Default: 0)
+# - SPARK_NO_DAEMONIZE Run the proposed command in the foreground. It will
not output a PID file.
+# Options for native BLAS, like Intel MKL, OpenBLAS, and so on.
+# You might get better performance to enable these options if using native
BLAS (see SPARK-21305).
+# - MKL_NUM_THREADS=1 Disable multi-threading of Intel MKL
+# - OPENBLAS_NUM_THREADS=1 Disable multi-threading of OpenBLAS
+
+export JAVA_HOME=/home/admin/jdk1.8.0_141
+export CLASSPATH=.:$JAVA_HOME/lib
+export JAVA_LIBRARY_PATH=$JAVA_LIBRARY_PATH:/home/admin/hadoop-2.7.0/lib/native
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/admin/hadoop-2.7.0/lib/native
+
+export SPARK_PID_DIR=${SPARK_HOME}/
+
diff --git a/docker/dockerfile/standalone/conf/zk/zoo.cfg
b/docker/dockerfile/standalone/conf/zk/zoo.cfg
new file mode 100644
index 0000000..1a576de
--- /dev/null
+++ b/docker/dockerfile/standalone/conf/zk/zoo.cfg
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# The number of milliseconds of each tick
+tickTime=2000
+# The number of ticks that the initial
+# synchronization phase can take
+initLimit=10
+# The number of ticks that can pass between
+# sending a request and getting an acknowledgement
+syncLimit=5
+# the directory where the snapshot is stored.
+# do not use /tmp for storage, /tmp here is just
+# example sakes.
+dataDir=/data/zookeeper
+# the port at which the clients will connect
+clientPort=2181
+# the maximum number of client connections.
+# increase this if you need to handle more clients
+#maxClientCnxns=60
+#
+# Be sure to read the maintenance section of the
+# administrator guide before turning on autopurge.
+#
+# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
+#
+# The number of snapshots to retain in dataDir
+#autopurge.snapRetainCount=3
+# Purge task interval in hours
+# Set to "0" to disable auto purge feature
+#autopurge.purgeInterval=1
diff --git a/docker/setup_standalone.sh b/docker/setup_standalone.sh
index 3ed32ce..a6289af 100755
--- a/docker/setup_standalone.sh
+++ b/docker/setup_standalone.sh
@@ -23,4 +23,4 @@ docker run -d \
-p 8032:8032 \
-p 8042:8042 \
-p 2181:2181 \
-apachekylin/apache-kylin-standalone:4.0.0-alpha
+apachekylin/apache-kylin-standalone:4.0.0-beta