This is an automated email from the ASF dual-hosted git repository.
xxyu pushed a commit to branch kylin-on-parquet-v2
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/kylin-on-parquet-v2 by this
push:
new 6f4e356 KYLIN-4858 Support Kylin4 deployment on CDH 6.X (#1535)
6f4e356 is described below
commit 6f4e3562d9061bee4ef3288f35e9e53133a9e9cd
Author: Xiaoxiang Yu <[email protected]>
AuthorDate: Mon Jan 4 22:50:13 2021 +0800
KYLIN-4858 Support Kylin4 deployment on CDH 6.X (#1535)
* KYLIN-4858 Support Kylin4 deployment on CDH 6.X
* KYLIN-4858 Support Kylin4 deployment on CDH 6.X
---
build/bin/find-hive-dependency.sh | 5 +-
build/bin/kylin.sh | 3 +
build/bin/replace-jars-under-spark.sh | 140 ++++++++++++++++++++++++++++++++++
3 files changed, 147 insertions(+), 1 deletion(-)
diff --git a/build/bin/find-hive-dependency.sh
b/build/bin/find-hive-dependency.sh
index 22ee8f4..31530e5 100755
--- a/build/bin/find-hive-dependency.sh
+++ b/build/bin/find-hive-dependency.sh
@@ -197,7 +197,10 @@ else
fi
hive_lib_dir="$HIVE_LIB"
fi
-hive_lib=`find -L ${hive_lib_dir} -name '*.jar' ! -name '*druid*' ! -name
'*slf4j*' ! -name '*avatica*' ! -name '*calcite*' ! -name
'*jackson-datatype-joda*' ! -name '*derby*' -printf '%p:' | sed 's/:$//'`
+
+hive_lib=`find -L ${hive_lib_dir} -name '*.jar' ! -name '*druid*' ! -name
'*slf4j*' ! -name '*avatica*' ! -name '*calcite*' \
+ ! -name '*jackson-datatype-joda*' ! -name '*derby*' ! -name "*jetty*" !
-name "*jsp*" ! -name "*servlet*" ! -name "*hbase*" ! -name "*websocket*" \
+ -printf '%p:' | sed 's/:$//'`
validateDirectory ${hive_conf_path}
checkFileExist hive_lib ${hive_lib}
diff --git a/build/bin/kylin.sh b/build/bin/kylin.sh
index c6048e2..c62fb47 100755
--- a/build/bin/kylin.sh
+++ b/build/bin/kylin.sh
@@ -62,6 +62,9 @@ function retrieveDependency() {
# source ${dir}/find-flink-dependency.sh
fi
+ # Replace jars for different hadoop dist
+ bash ${dir}/replace-jars-under-spark.sh
+
# get hdp_version
if [ -z "${hdp_version}" ]; then
hdp_version=`/bin/bash -x hadoop 2>&1 | sed -n "s/\(.*\)export
HDP_VERSION=\(.*\)/\2/"p`
diff --git a/build/bin/replace-jars-under-spark.sh
b/build/bin/replace-jars-under-spark.sh
new file mode 100644
index 0000000..0c5a8cc
--- /dev/null
+++ b/build/bin/replace-jars-under-spark.sh
@@ -0,0 +1,140 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+BYPASS=${KYLIN_HOME}/spark/jars/replace-jars-bypass
+cdh_mapreduce_path="/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce"
+hadoop_lib_path="/usr/lib/hadoop"
+
+if [ -f ${BYPASS} ]; then
+ exit 0
+fi
+
+if [ ! -d "$KYLIN_HOME/spark" ]; then
+ echo "Skip spark which not owned by kylin. SPARK_HOME is $SPARK_HOME and
KYLIN_HOME is $KYLIN_HOME ."
+ exit 0
+fi
+
+echo "Start replacing hadoop jars under ${SPARK_HOME}/jars."
+
+function check_cdh_hadoop() {
+ # hadoop-common-3.0.0-cdh6.2.0.jar
+ hadoop_common_file=$(find ${cdh_mapreduce_path}/../hadoop/ -maxdepth 1 -name
"hadoop-common-*.jar" -not -name "*test*" | tail -1)
+ cdh_version=${hadoop_common_file##*-}
+ if [[ "${cdh_version}" == cdh6.* ]]; then
+ export is_cdh6=1
+ else
+ export is_cdh6=0
+ fi
+ if [[ "${cdh_version}" == cdh5.* ]]; then
+ export is_cdh5=1
+ else
+ export is_cdh5=0
+ fi
+}
+
+function check_aws_emr() {
+ if [ ! -d $hadoop_lib_path ]; then
+ return 0
+ fi
+
+ # hadoop-common-3.2.1-amzn-0.jar
+ hadoop_common_file=$(find $hadoop_lib_path -maxdepth 1 -name
"hadoop-common-*.jar" -not -name "*test*" | tail -1)
+ emr_version_1=${hadoop_common_file##*common-}
+ echo $emr_version_1
+ arrVersion=(${emr_version_1//-/ })
+
+ if [[ "${arrVersion[0]}" == 3.* && "${arrVersion[1]}" == *amzn* ]]; then
+ export is_emr6=1
+ else
+ export is_emr6=0
+ fi
+
+ if [[ "${arrVersion[0]}" == 2.* && "${arrVersion[1]}" == *amzn* ]]; then
+ export is_emr5=1
+ else
+ export is_emr5=0
+ fi
+}
+
+check_cdh_hadoop
+check_aws_emr
+
+common_jars=
+hdfs_jars=
+mr_jars=
+yarn_jars=
+other_jars=
+
+if [ $is_cdh6 == 1 ]; then
+ common_jars=$(find $cdh_mapreduce_path/../hadoop -maxdepth 2 \
+ -name "hadoop-annotations-*.jar" -not -name "*test*" \
+ -o -name "hadoop-auth-*.jar" -not -name "*test*" \
+ -o -name "hadoop-common-*.jar" -not -name "*test*")
+
+ hdfs_jars=$(find $cdh_mapreduce_path/../hadoop-hdfs -maxdepth 1 -name
"hadoop-hdfs-*" -not -name "*test*" -not -name "*nfs*")
+
+ mr_jars=$(find $cdh_mapreduce_path -maxdepth 1 \
+ -name "hadoop-mapreduce-client-app-*.jar" -not -name "*test*" \
+ -o -name "hadoop-mapreduce-client-common-*.jar" -not -name "*test*" \
+ -o -name "hadoop-mapreduce-client-jobclient-*.jar" -not -name "*test*" \
+ -o -name "hadoop-mapreduce-client-shuffle-*.jar" -not -name "*test*" \
+ -o -name "hadoop-mapreduce-client-core-*.jar" -not -name "*test*")
+
+ yarn_jars=$(find $cdh_mapreduce_path/../hadoop-yarn -maxdepth 1 \
+ -name "hadoop-yarn-api-*.jar" -not -name "*test*" \
+ -o -name "hadoop-yarn-client-*.jar" -not -name "*test*" \
+ -o -name "hadoop-yarn-common-*.jar" -not -name "*test*" \
+ -o -name "hadoop-yarn-server-common-*.jar" -not -name "*test*" \
+ -o -name "hadoop-yarn-server-web-proxy-*.jar" -not -name "*test*")
+
+ other_jars=$(find $cdh_mapreduce_path/../../jars -maxdepth 1 -name
"htrace-core4*" || find $cdh_mapreduce_path/../hadoop -maxdepth 2 -name
"htrace-core4*")
+
+ if [[ $is_cdh6 == 1 ]]; then
+ cdh6_jars=$(find ${cdh_mapreduce_path}/../../jars -maxdepth 1 \
+ -name "woodstox-core-*.jar" -o -name "commons-configuration2-*.jar" -o
-name "re2j-*.jar")
+ fi
+fi
+
+jar_list="${common_jars} ${hdfs_jars} ${mr_jars} ${yarn_jars} ${other_jars}
${cdh6_jars}"
+
+echo "Find platform specific jars:${jar_list}, will replace with these jars
under ${SPARK_HOME}/jars."
+
+if [ $is_cdh6 == 1 ]; then
+ find ${KYLIN_HOME}/spark/jars -name "hadoop-hdfs-*.jar" -exec rm -f {} \;
+ find ${KYLIN_HOME}/spark/jars -name "hadoop-yarn-*.jar" -exec rm -f {} \;
+ find ${KYLIN_HOME}/spark/jars -name "hadoop-mapreduce-*.jar" -exec rm -f {}
\;
+ find ${KYLIN_HOME}/spark/jars -name "hive-exec-*.jar" -exec rm -f {} \;
+# cp ${KYLIN_HOME}/bin/hadoop3_jars/cdh6/*.jar ${SPARK_HOME}/jars
+fi
+
+for jar_file in ${jar_list}; do
+ $(cp ${jar_file} ${KYLIN_HOME}/spark/jars)
+done
+
+# Remove all spaces
+jar_list=${jar_list// /}
+
+if [ -z "${jar_list}" ]; then
+ echo "Please confirm that the corresponding hadoop jars have been replaced.
The automatic replacement program cannot be executed correctly."
+else
+ echo "Replace jars under SPARK_HOME/jars finished."
+ touch ${BYPASS}
+fi
+
+echo "Done hadoop jars replacement under ${SPARK_HOME}/jars."
\ No newline at end of file