This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch branch-3.4.2 in repository https://gitbox.apache.org/repos/asf/impala.git
commit 8e9c5a5d17dbcb275653d6e10b9dc719b965ed5d Author: stiga-huang <[email protected]> AuthorDate: Mon Sep 5 13:13:11 2022 +0800 IMPALA-10262: RPM/DEB Packaging Support This patch bases on a previous patch contributed by Shant Hovsepian: https://gerrit.cloudera.org/c/16612/ It adds a new option, -package, to buildall.sh for building a package for the current OS type (e.g. CentOS/Ubuntu). You can also use "make/ninja package" to build the package. Scripts for launching the services and the required configuration files are also added. Tests: - Built on Ubuntu 18.04/20.04 and CentOS 7 using ./buildall.sh -noclean -skiptests -release -package - Deployed the RPM package on a CDP cluster. Verifed the scripts. - Deployed the DEB package on a docker container. Verified the scripts. Resolved trivial backport conflicts in: - CMakeLists.txt - bin/bootstrap_system.sh - bin/jenkins/build-all-flag-combinations.sh - buildall.sh - docker/install_os_packages.sh Non-trivial backport notes: CMake function cmake_host_system_information does not recognize keys of DISTRIB_ID and DISTRIB_VERSION_ID (required version >= 3.22). Currently version used in branch-3.4 is 3.14.3. Details to remove using them: - One usage of DISTRIB_ID is to skip packaging impala-shell on redhat8. Removes it since redhat8 is not supported on branch-3.4. - Another usage of DISTRIB_ID is to determine the package file type (DEB vs RPM) based on the OS. Replaces it with content of the os-release files. - Removes the usage of OS_DISTRIB_VERSION_ID in the package file name Tests: - Built on Ubuntu 18.04 Change-Id: I64419fd400fe8d233dac016b6306157fe9461d82 Reviewed-on: http://gerrit.cloudera.org:8080/18939 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> Reviewed-on: http://gerrit.cloudera.org:8080/21130 Tested-by: Quanlong Huang <[email protected]> Reviewed-by: Zihao Ye <[email protected]> --- CMakeLists.txt | 105 ++++++++++++++ be/src/service/CMakeLists.txt | 4 + bin/bootstrap_system.sh | 2 +- bin/impala-config.sh | 3 + bin/jenkins/build-all-flag-combinations.sh | 2 + bin/rat_exclude_files.txt | 3 + buildall.sh | 12 ++ docker/install_os_packages.sh | 212 +++++++++++++++++++++++++++++ package/bin/impala-env.sh | 85 ++++++++++++ package/bin/start-catalogd.sh | 38 ++++++ package/bin/start-impalad.sh | 49 +++++++ package/bin/start-statestored.sh | 37 +++++ package/bin/stop-catalogd.sh | 23 ++++ package/bin/stop-impalad.sh | 23 ++++ package/bin/stop-statestored.sh | 23 ++++ package/conf/catalogd_flags | 13 ++ package/conf/core-site.xml | 20 +++ package/conf/fair-scheduler.xml | 6 + package/conf/hdfs-site.xml | 40 ++++++ package/conf/hive-site.xml | 20 +++ package/conf/impalad_flags | 16 +++ package/conf/llama-site.xml | 3 + package/conf/statestore_flags | 7 + 23 files changed, 745 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 52a60eef6..6828e5f04 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,8 @@ set(NO_TESTS 1) # initialized. project(Impala) +option(BUILD_PACKAGES "Build deployment packages") + include(cmake_modules/kudu_cmake_fns.txt) if (NOT DEFINED BUILD_SHARED_LIBS) @@ -399,6 +401,10 @@ endif() find_package(kuduClient REQUIRED NO_DEFAULT_PATH) include_directories(SYSTEM ${KUDU_CLIENT_INCLUDE_DIR}) +## installation path +set(CMAKE_INSTALL_PREFIX "/opt") +set(IMPALA_INSTALLDIR "impala" CACHE INTERNAL "") + # compile these subdirs using their own CMakeLists.txt add_subdirectory(common/function-registry) add_subdirectory(common/thrift) @@ -413,6 +419,36 @@ add_subdirectory(impala-parent) add_subdirectory(ext-data-source) add_subdirectory(query-event-hook-api) +install(DIRECTORY "www/" DESTINATION ${IMPALA_INSTALLDIR}/www) +install(FILES fe/target/impala-frontend-$ENV{IMPALA_VERSION}.jar + DESTINATION ${IMPALA_INSTALLDIR}/jar) + +set(IMPALA_GCC_HOME $ENV{IMPALA_TOOLCHAIN_PACKAGES_HOME}/gcc-$ENV{IMPALA_GCC_VERSION}) +FILE(GLOB gcc_lib ${IMPALA_GCC_HOME}/lib64/libgcc_s.so.1*) +install(FILES ${gcc_lib} DESTINATION ${IMPALA_INSTALLDIR}/lib) + +FILE(GLOB cpp_lib ${IMPALA_GCC_HOME}/lib64/libstdc++.so.6*) +install(FILES ${cpp_lib} DESTINATION ${IMPALA_INSTALLDIR}/lib) + +set(KUDU_HOME $ENV{IMPALA_TOOLCHAIN_PACKAGES_HOME}/kudu-$ENV{IMPALA_KUDU_VERSION}/release) +# The parent folder is lib64 on centos/redhat, while on ubuntu it's lib. +FILE(GLOB kudu_lib ${KUDU_HOME}/lib*/libkudu_client.so*) +install(FILES ${kudu_lib} DESTINATION ${IMPALA_INSTALLDIR}/lib) + +FILE(GLOB hadoop_lib $ENV{HADOOP_LIB_DIR}/native/libhadoop.so*) +install(FILES ${hadoop_lib} DESTINATION ${IMPALA_INSTALLDIR}/lib) + +install(DIRECTORY fe/target/dependency/ DESTINATION ${IMPALA_INSTALLDIR}/jar + FILES_MATCHING PATTERN "*.jar") + +install(DIRECTORY shell/build/impala-shell-$ENV{IMPALA_VERSION}/ + DESTINATION ${IMPALA_INSTALLDIR}/shell + USE_SOURCE_PERMISSIONS PATTERN "*.pyc" EXCLUDE) + +install(DIRECTORY package/bin/ DESTINATION ${IMPALA_INSTALLDIR}/bin + USE_SOURCE_PERMISSIONS FILES_MATCHING PATTERN "*.sh") +install(DIRECTORY package/conf/ DESTINATION ${IMPALA_INSTALLDIR}/conf) + # Build target for all generated files which most backend code depends on add_custom_target(gen-deps ALL DEPENDS thrift-deps proto-deps fb-deps) @@ -447,3 +483,72 @@ if (DUMP_INCLUDE_PATHS) endif(DUMP_INCLUDE_PATHS) SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libstdc++") + +# packaging +if (BUILD_PACKAGES) + if (EXISTS "/etc/redhat-release") + set(IMPALA_LINUX_ID_FILE "/etc/redhat-release") + elseif (EXISTS "/etc/os-release") + set(IMPALA_LINUX_ID_FILE "/etc/os-release") + elseif (EXISTS "/etc/issue") + set(IMPALA_LINUX_ID_FILE "/etc/issue") + else() + message(FATAL_ERROR "Can not determine OS version!") + endif() + file(READ ${IMPALA_LINUX_ID_FILE} IMPALA_OS_INFO) + + string(TOLOWER ${IMPALA_OS_INFO} IMPALA_OS_INFO) + if (${IMPALA_OS_INFO} MATCHES "(redhat|centos)") + set(CPACK_GENERATOR "RPM") + elseif (${IMPALA_OS_INFO} MATCHES "ubuntu") + set(CPACK_GENERATOR "DEB") + else() + message(WARNING "OS is ${IMPALA_OS_INFO}. No packages will be generated.") + endif() + + if (CPACK_GENERATOR) + message(STATUS "Packaging enabled: ${CPACK_GENERATOR}") + set(CPACK_PACKAGE_NAME ${CMAKE_PROJECT_NAME}) + set(CPACK_PACKAGE_VENDOR "Apache") + set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "An open source massively parallel processing SQL query engine.") + set(CPACK_PACKAGE_CONTACT "[email protected]") + set(CPACK_PACKAGE_HOMEPAGE_URL "https://impala.apache.org") + set(CPACK_PACKAGE_VERSION "$ENV{IMPALA_VERSION}") + set(CPACK_RPM_PACKAGE_LICENSE "ASL-2.0") + # Set a meaningful package name, e.g. Impala-4.3.0-SNAPSHOT_hive-3.1.3000.7.2.18.0-41-x86_64 + set(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}") + set(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}_hive-$ENV{IMPALA_HIVE_VERSION}") + set(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}-${CMAKE_SYSTEM_PROCESSOR}") + message(STATUS "Package name: ${CPACK_PACKAGE_FILE_NAME}") + + if ($ENV{STRIP_DEPLOYMENT_IMPALAD}) + set(CPACK_STRIP_FILES ${IMPALA_INSTALLDIR}/bin/impalad) + message("Binaries in the package will be stripped") + endif() + + set(CPACK_PACKAGING_INSTALL_PREFIX "/opt") + set(CPACK_PACKAGE_INSTALL_DIRECTORY "impala") + + execute_process( + COMMAND bash -c "${CMAKE_SOURCE_DIR}/docker/install_os_packages.sh --dry-run | tail -n1" + OUTPUT_VARIABLE PKG_LIST + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if ("${PKG_LIST}" STREQUAL "") + message(FATAL_ERROR "Package list is empty: '${PKG_LIST}'") + else() + message(STATUS "Get required package list: '${PKG_LIST}'") + endif() + + if (${CPACK_GENERATOR} MATCHES "RPM") + set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") + set(CPACK_RPM_PACKAGE_RELOCATABLE TRUE) + set(CPACK_RPM_PACKAGE_REQUIRES ${PKG_LIST}) + else() + set(CPACK_DEBIAN_PACKAGE_DEPENDS ${PKG_LIST}) + endif() + endif() + +include(CPack) + +endif() diff --git a/be/src/service/CMakeLists.txt b/be/src/service/CMakeLists.txt index 939df2226..9371cbc80 100644 --- a/be/src/service/CMakeLists.txt +++ b/be/src/service/CMakeLists.txt @@ -110,6 +110,10 @@ target_link_libraries(unifiedbetests ${JAVA_JSIG_LIBRARY} ${UNIFIED_TEST_LINK_LIBS}) ADD_DEPENDENCIES(unified-be-test-executable unifiedbetests) +install(FILES ${STATESTORED_SYMLINK} DESTINATION ${IMPALA_INSTALLDIR}/bin) +install(FILES ${CATALOGD_SYMLINK} DESTINATION ${IMPALA_INSTALLDIR}/bin) +install(TARGETS impalad DESTINATION ${IMPALA_INSTALLDIR}/bin) + # Exception to unified be tests: Custom main() due to leak ADD_BE_TEST(session-expiry-test session-expiry-test.cc) # TODO: this leaks thrift server ADD_UNIFIED_BE_LSAN_TEST(hs2-util-test "StitchNullsTest.*:PrintTColumnValueTest.*") diff --git a/bin/bootstrap_system.sh b/bin/bootstrap_system.sh index 9f8d8f481..22d9bd44b 100755 --- a/bin/bootstrap_system.sh +++ b/bin/bootstrap_system.sh @@ -228,7 +228,7 @@ fi redhat sudo yum install -y curl gcc gcc-c++ git krb5-devel krb5-server krb5-workstation \ libevent-devel libffi-devel make ntp ntpdate ntp-perl openssl-devel cyrus-sasl \ cyrus-sasl-gssapi cyrus-sasl-devel cyrus-sasl-plain \ - python-devel python-setuptools postgresql postgresql-server \ + python-devel python-setuptools postgresql postgresql-server rpm-build \ wget vim-common nscd cmake lzo-devel fuse-devel snappy-devel zlib-devel \ psmisc lsof openssh-server redhat-lsb java-1.8.0-openjdk-devel \ java-1.8.0-openjdk-src python-argparse diff --git a/bin/impala-config.sh b/bin/impala-config.sh index 4218fa6e7..a29f698d1 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -802,6 +802,9 @@ CLASSPATH="$LZO_JAR_PATH:$CLASSPATH" # A marker in the environment to prove that we really did source this file export IMPALA_CONFIG_SOURCED=1 +# Whether to strip the impalad binary when generating deployment package +export STRIP_DEPLOYMENT_IMPALAD=true + echo "IMPALA_VERSION = $IMPALA_VERSION" echo "IMPALA_HOME = $IMPALA_HOME" echo "HADOOP_HOME = $HADOOP_HOME" diff --git a/bin/jenkins/build-all-flag-combinations.sh b/bin/jenkins/build-all-flag-combinations.sh index cbbc7c493..b1412d867 100755 --- a/bin/jenkins/build-all-flag-combinations.sh +++ b/bin/jenkins/build-all-flag-combinations.sh @@ -37,6 +37,7 @@ CONFIGS=( # Test gcc builds with and without -so: "-skiptests -noclean" "-skiptests -noclean -release" + "-skiptests -noclean -release -package" "-skiptests -noclean -release -so -ninja" # clang sanitizer builds: "-skiptests -noclean -asan" @@ -44,6 +45,7 @@ CONFIGS=( "-skiptests -noclean -ubsan -so -ninja" # USE_CDP_HIVE=true build: "-skiptests -noclean -use_cdp_hive" + "-skiptests -noclean -use_cdp_hive -package" ) FAILED="" diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt index 29851b628..07998a1a9 100644 --- a/bin/rat_exclude_files.txt +++ b/bin/rat_exclude_files.txt @@ -212,3 +212,6 @@ docs/images/howto_show_histogram.png docs/images/howto_static_server_pools_config.png docs/images/impala_arch.jpeg docs/images/support_send_diagnostic_data.png + +# Files of deployment configuration +package/conf/* diff --git a/buildall.sh b/buildall.sh index 498f8632a..0f5d03101 100755 --- a/buildall.sh +++ b/buildall.sh @@ -69,6 +69,7 @@ BUILD_ASAN=0 BUILD_FE_ONLY=0 BUILD_TESTS=1 GEN_CMAKE_ONLY=0 +GEN_PACKAGE=0 BUILD_RELEASE_AND_DEBUG=0 BUILD_TIDY=0 BUILD_UBSAN=0 @@ -198,6 +199,9 @@ do -cmake_only) GEN_CMAKE_ONLY=1 ;; + -package) + GEN_PACKAGE=1 + ;; -help|*) echo "buildall.sh - Builds Impala and runs all tests." echo "[-noclean] : Omits cleaning all packages before building. Will not kill"\ @@ -247,6 +251,7 @@ do echo "[-fe_only] : Build just the frontend" echo "[-ninja] : Use ninja instead of make" echo "[-cmake_only] : Generate makefiles only, instead of doing a full build" + echo "[-package] : Generate a package for deployment." echo "----------------------------------------------------------------------------- Examples of common tasks: @@ -462,6 +467,9 @@ generate_cmake_files() { else CMAKE_ARGS+=(-DCMAKE_TOOLCHAIN_FILE=$IMPALA_HOME/cmake_modules/toolchain.cmake) fi + if [[ "$GEN_PACKAGE" -eq 1 ]]; then + CMAKE_ARGS+=(-DBUILD_PACKAGES=ON) + fi cmake . ${CMAKE_ARGS[@]} } @@ -587,6 +595,10 @@ else build_all_components $CMAKE_BUILD_TYPE 1 fi +if [[ "$GEN_PACKAGE" -eq 1 ]]; then + ${MAKE_CMD} -j${IMPALA_BUILD_THREADS:-4} package +fi + if [[ $NEED_MINICLUSTER -eq 1 ]]; then reconfigure_test_cluster fi diff --git a/docker/install_os_packages.sh b/docker/install_os_packages.sh new file mode 100755 index 000000000..8fbeef793 --- /dev/null +++ b/docker/install_os_packages.sh @@ -0,0 +1,212 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# This installs the minimal dependencies needed for Impala +# services to run. It currently handles Ubuntu and Redhat +# distributions. This takes an optional argument "--install-debug-tools" +# which installs extra debugging tools like curl, ping, etc. + +set -euo pipefail + +INSTALL_DEBUG_TOOLS=false +JAVA_VERSION=8 +DRY_RUN=false +PKG_LIST="" +NON_PKG_NAMES=(apt-get yum install update) + +function print_usage { + echo "install_os_packages.sh - Helper script to install OS dependencies" + echo "[--install-debug-tools] : Also install debug tools like curl, iproute, etc" + echo "[--java <version>] : Use specified Java version rather than the default Java 8." + echo "[--dry-run] : Print the list of packages to install." +} + +# Wraps the passed in command to either execute it (DRY_RUN=false) or just use it +# to update PKG_LIST. +function wrap { + if $DRY_RUN; then + for arg in $@; do + if [[ "${NON_PKG_NAMES[@]}" =~ "$arg" ]]; then + continue + elif [[ "$arg" == "-"* ]]; then + # Ignores command options + continue + elif [[ "$PKG_LIST" != "" ]]; then + PKG_LIST="$PKG_LIST,$arg" + else + PKG_LIST="$arg" + fi + done + else + "$@" + fi +} + +while [ -n "$*" ] +do + case "$1" in + --install-debug-tools) + INSTALL_DEBUG_TOOLS=true + ;; + --java) + JAVA_VERSION="${2-}" + shift; + ;; + --dry-run) + DRY_RUN=true + ;; + --help|*) + print_usage + exit 1 + ;; + esac + shift +done + +echo "INSTALL_DEBUG_TOOLS=${INSTALL_DEBUG_TOOLS}" +echo "JAVA_VERSION=${JAVA_VERSION}" +echo "DRY_RUN=${DRY_RUN}" + +# This can get more detailed if there are specific steps +# for specific versions, but at the moment the distribution +# is all we need. +DISTRIBUTION=Unknown +if [[ -f /etc/redhat-release ]]; then + echo "Identified Redhat system." + DISTRIBUTION=Redhat +else + source /etc/lsb-release + if [[ $DISTRIB_ID == Ubuntu ]]; then + echo "Identified Ubuntu system." + DISTRIBUTION=Ubuntu + + # Ubuntu 16.04 does not have Java 11 in its package repository, + # so exit with an error. + if [[ $DISTRIB_RELEASE == 16.04 ]] && [[ $JAVA_VERSION == 11 ]] ; then + echo "ERROR: Java 11 is not supported on Ubuntu 16.04" + exit 1 + fi + fi +fi + +if [[ $DISTRIBUTION == Unknown ]]; then + echo "ERROR: Did not detect supported distribution." + echo "Only Ubuntu and Redhat-based distributions are supported." + exit 1 +fi + +# Install minimal set of files. +# Optionally install extra debug tools. +if [[ $DISTRIBUTION == Ubuntu ]]; then + export DEBIAN_FRONTEND=noninteractive + wrap apt-get update + wrap apt-get install -y \ + hostname \ + krb5-user \ + language-pack-en \ + libsasl2-2 \ + libsasl2-modules \ + libsasl2-modules-gssapi-mit \ + openjdk-${JAVA_VERSION}-jre-headless \ + tzdata + if $INSTALL_DEBUG_TOOLS ; then + echo "Installing extra debug tools" + wrap apt-get install -y \ + curl \ + dnsutils \ + iproute2 \ + iputils-ping \ + less \ + netcat-openbsd \ + sudo \ + vim + fi +elif [[ $DISTRIBUTION == Redhat ]]; then + if [[ $JAVA_VERSION == 8 ]]; then + JAVA_VERSION=1.8.0 + fi + wrap yum install -y --disableplugin=subscription-manager \ + cyrus-sasl-gssapi \ + cyrus-sasl-plain \ + hostname \ + java-${JAVA_VERSION}-openjdk-headless \ + krb5-workstation \ + openldap-devel \ + procps-ng \ + tzdata + + # UTF-8 masking functions require the presence of en_US.utf8. + # Install the appropriate language packs. Redhat/Centos 7 come + # with en_US.utf8, so there is no need to install anything. + if ! grep 'release 7\.' /etc/redhat-release; then + wrap yum install -y --disableplugin=subscription-manager \ + glibc-langpack-en \ + langpacks-en + fi + + if $INSTALL_DEBUG_TOOLS ; then + echo "Installing extra debug tools" + wrap yum install -y --disableplugin=subscription-manager \ + bind-utils \ + curl \ + iproute \ + iputils \ + less \ + nmap-ncat \ + sudo \ + vim \ + which + fi +fi + +if $DRY_RUN; then + echo "The following packages would be installed:" + echo "$PKG_LIST" + exit 0 +fi + +# Verify en_US.utf8 is present +if ! locale -a | grep en_US.utf8 ; then + echo "ERROR: en_US.utf8 locale is not present." + exit 1 +fi + +if ! hostname ; then + echo "ERROR: 'hostname' command failed." + exit 1 +fi + +# graceful_shutdown_backends.sh requires the pgrep utility. Verify it is present. +if ! command -v pgrep ; then + echo "ERROR: 'pgrep' is not present." + exit 1 +fi + +# Impala will fail to start if the permissions on /var/tmp are not set to include +# the sticky bit (i.e. +t). Some versions of Redhat UBI images do not have +# this set by default, so specifically set the sticky bit for both /tmp and /var/tmp. +chmod a=rwx,o+t /var/tmp /tmp + +# To minimize the size for the Docker image, clean up any unnecessary files. +if [[ $DISTRIBUTION == Ubuntu ]]; then + apt-get clean + rm -rf /var/lib/apt/lists/* +elif [[ $DISTRIBUTION == Redhat ]]; then + yum clean all + rm -rf /var/cache/yum/* +fi diff --git a/package/bin/impala-env.sh b/package/bin/impala-env.sh new file mode 100644 index 000000000..3655ac6cf --- /dev/null +++ b/package/bin/impala-env.sh @@ -0,0 +1,85 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +if [[ -z "$JAVA_HOME" ]]; then + echo "JAVA_HOME not set!" + exit 1 +fi + +echo "Using JAVA_HOME: $JAVA_HOME" +LIB_JVM_DIR=$(dirname $(find $JAVA_HOME -type f -name libjvm.so)) +LIB_JSIG_DIR=$(dirname $(find $JAVA_HOME -type f -name libjsig.so)) + +export LC_ALL=en_US.utf8 +export LD_LIBRARY_PATH="/opt/impala/lib/:$LIB_JVM_DIR:$LIB_JSIG_DIR" +export CLASSPATH="/opt/impala/conf:/opt/impala/jar/*" + +#TODO: Add graceful shutdown for impalads +function stop_process { + name=$1 + pid_file="/tmp/${name}.pid" + if [[ -f $pid_file ]]; then + PID=$(cat $pid_file) + if ps $PID | grep $name; then + echo "Killing $name with PID=$PID" + kill $PID + rm $pid_file + echo "Killed $name" + else + rm $pid_file + echo "Already stopped: $name is not running with PID=$PID. Removed stale $pid_file" + fi + else + echo "PID file $pid_file not found!" + fi +} + +function wait_for_ready { + name=$1 + port=$2 + pid=$3 + + NUM_WAIT_ITERATIONS=20 + i=0 + while [[ $i -lt $NUM_WAIT_ITERATIONS ]]; do + if ! ps $pid | grep $name > /dev/null 2>&1; then + echo "$name with PID $pid doesn't exist" + break + fi + STATUS=$(curl -s http://localhost:$port/healthz) + if [[ $? != 0 ]]; then + echo "Waiting for $name. Port $port not ready." + elif [[ "$STATUS" != "OK" ]]; then + echo "Waiting for $name to be ready" + else + echo "$name is ready" + break + fi + sleep 2 + i=$((i+1)) + done + if [[ "$STATUS" == "OK" ]]; then + echo "Launched $name with PID $pid" + elif [[ $i -eq $NUM_WAIT_ITERATIONS ]]; then + echo "Timed out waiting for $name to be ready. Check logs for more details." + else + echo "Failed to launch $name" + exit 1 + fi +} diff --git a/package/bin/start-catalogd.sh b/package/bin/start-catalogd.sh new file mode 100755 index 000000000..9b472fe60 --- /dev/null +++ b/package/bin/start-catalogd.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Script to launch catalogd. Required JAVA_HOME being set. +# Edit conf/catalogd_flags to set the correct hostname and state_store_host. +# Edit core-site.xml, hdfs-site.xml, hive-site.xml, etc. in conf based on the cluster. +# Example usage: +# export JAVA_HOME=/usr/java/jdk1.8.0_232-cloudera +# bin/start-catalogd.sh +# To launch catalogd using another username (e.g. "impala"): +# sudo -E -u impala bin/start-catalogd.sh + +echo "Using IMPALA_HOME: ${IMPALA_HOME:=/opt/impala}" +source $IMPALA_HOME/bin/impala-env.sh +$IMPALA_HOME/bin/catalogd --flagfile=$IMPALA_HOME/conf/catalogd_flags & +PID=$! +echo $PID > /tmp/catalogd.pid + +# Sleep 1s so the glog output won't be messed up with waiting messages +sleep 1 + +wait_for_ready catalogd 25020 $PID diff --git a/package/bin/start-impalad.sh b/package/bin/start-impalad.sh new file mode 100755 index 000000000..89ccbbd21 --- /dev/null +++ b/package/bin/start-impalad.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Script to launch impalad. Required JAVA_HOME being set. +# Edit conf/impalad_flags to set the correct hostnames. +# Edit core-site.xml, hdfs-site.xml, hive-site.xml, etc. in conf based on the cluster. +# Example usage: +# export JAVA_HOME=/usr/java/jdk1.8.0_232-cloudera +# bin/start-impalad.sh +# To launch impalad using another username (e.g. "impala"): +# sudo -E -u impala bin/start-impalad.sh + +echo "Using IMPALA_HOME: ${IMPALA_HOME:=/opt/impala}" +source $IMPALA_HOME/bin/impala-env.sh + +if [[ -n "$HADOOP_HOME" ]]; then + echo "Using HADOOP_HOME: $HADOOP_HOME" + export HADOOP_LIB_DIR="${HADOOP_HOME}/lib" + export LIBHDFS_OPTS="${LIBHDFS_OPTS:-} -Djava.library.path=${HADOOP_LIB_DIR}/native/" + echo "Using hadoop native libs in ${HADOOP_LIB_DIR}/native/" +else + export LIBHDFS_OPTS="${LIBHDFS_OPTS:-} -Djava.library.path=${IMPALA_HOME}/lib" + echo "HADOOP_HOME not set. Using hadoop native libs in ${IMPALA_HOME}/lib" +fi + +$IMPALA_HOME/bin/impalad --flagfile=$IMPALA_HOME/conf/impalad_flags & +PID=$! +echo $PID > /tmp/impalad.pid + +# Sleep 1s so the glog output won't be messed up with waiting messages +sleep 1 + +wait_for_ready impalad 25000 $PID diff --git a/package/bin/start-statestored.sh b/package/bin/start-statestored.sh new file mode 100755 index 000000000..e3c78cd34 --- /dev/null +++ b/package/bin/start-statestored.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Script to launch statestore. Required JAVA_HOME being set. +# Edit conf/statestore_flags to set a correct hostname. +# Example usage: +# export JAVA_HOME=/usr/java/jdk1.8.0_232-cloudera +# bin/start-statestored.sh +# To launch statestore using another username (e.g. "impala"): +# sudo -E -u impala bin/start-statestored.sh + +echo "Using IMPALA_HOME: ${IMPALA_HOME:=/opt/impala}" +source $IMPALA_HOME/bin/impala-env.sh +$IMPALA_HOME/bin/statestored --flagfile=$IMPALA_HOME/conf/statestore_flags & +PID=$! +echo $PID > /tmp/statestored.pid + +# Sleep 1s so the glog output won't be messed up with waiting messages +sleep 1 + +wait_for_ready statestored 25010 $PID diff --git a/package/bin/stop-catalogd.sh b/package/bin/stop-catalogd.sh new file mode 100755 index 000000000..8649ed143 --- /dev/null +++ b/package/bin/stop-catalogd.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +echo "Using IMPALA_HOME: ${IMPALA_HOME:=/opt/impala}" +source $IMPALA_HOME/bin/impala-env.sh + +stop_process catalogd diff --git a/package/bin/stop-impalad.sh b/package/bin/stop-impalad.sh new file mode 100755 index 000000000..4e323e889 --- /dev/null +++ b/package/bin/stop-impalad.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +echo "Using IMPALA_HOME: ${IMPALA_HOME:=/opt/impala}" +source $IMPALA_HOME/bin/impala-env.sh + +stop_process impalad diff --git a/package/bin/stop-statestored.sh b/package/bin/stop-statestored.sh new file mode 100755 index 000000000..e5aa9578d --- /dev/null +++ b/package/bin/stop-statestored.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +echo "Using IMPALA_HOME: ${IMPALA_HOME:=/opt/impala}" +source $IMPALA_HOME/bin/impala-env.sh + +stop_process statestored diff --git a/package/conf/catalogd_flags b/package/conf/catalogd_flags new file mode 100644 index 000000000..7b94816a2 --- /dev/null +++ b/package/conf/catalogd_flags @@ -0,0 +1,13 @@ +-hostname=localhost +-state_store_host=localhost +#-kudu_master_hosts=localhost + +-log_dir=/var/log/catalogd +-log_filename=catalogd +-state_store_port=24000 +-minidump_path=/var/log/impala-minidumps +-webserver_doc_root=/opt/impala +-catalog_topic_mode=minimal +-hms_event_polling_interval_s=0 +-v=1 +-max_log_size=200 diff --git a/package/conf/core-site.xml b/package/conf/core-site.xml new file mode 100644 index 000000000..2303cdf5f --- /dev/null +++ b/package/conf/core-site.xml @@ -0,0 +1,20 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<configuration> + <property> + <name>fs.defaultFS</name> + <value>hdfs://localhost:8020</value> + </property> + <property> + <name>ipc.client.connection.maxidletime</name> + <value>30000</value> + </property> + <property> + <name>ipc.client.connect.max.retries</name> + <value>50</value> + </property> + <property> + <name>fs.trash.interval</name> + <value>1</value> + </property> +</configuration> diff --git a/package/conf/fair-scheduler.xml b/package/conf/fair-scheduler.xml new file mode 100644 index 000000000..0edf7f080 --- /dev/null +++ b/package/conf/fair-scheduler.xml @@ -0,0 +1,6 @@ +<?xml version="1.0" encoding="UTF-8" standalone="yes"?> +<allocations> + <queue name="root"> + <queue name="default"/> + </queue> +</allocations> diff --git a/package/conf/hdfs-site.xml b/package/conf/hdfs-site.xml new file mode 100644 index 000000000..02468a478 --- /dev/null +++ b/package/conf/hdfs-site.xml @@ -0,0 +1,40 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<configuration> + <property> + <name>dfs.namenode.servicerpc-address</name> + <value>localhost:8022</value> + </property> + <property> + <name>dfs.namenode.http-address</name> + <value>localhost:20101</value> + </property> + <property> + <name>dfs.replication</name> + <value>3</value> + </property> + <property> + <name>dfs.blocksize</name> + <value>134217728</value> + </property> + <property> + <name>dfs.domain.socket.path</name> + <value>/var/run/hdfs-sockets/dn</value> + </property> + <property> + <name>dfs.client.read.shortcircuit</name> + <value>true</value> + </property> + <property> + <name>dfs.client.read.shortcircuit.streams.cache.size</name> + <value>4096</value> + </property> + <property> + <name>dfs.client.read.shortcircuit.skip.checksum</name> + <value>false</value> + </property> + <property> + <name>dfs.client.file-block-storage-locations.timeout.millis</name> + <value>10000</value> + </property> +</configuration> diff --git a/package/conf/hive-site.xml b/package/conf/hive-site.xml new file mode 100644 index 000000000..ec614f2b5 --- /dev/null +++ b/package/conf/hive-site.xml @@ -0,0 +1,20 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<configuration> + <property> + <name>hive.metastore.uris</name> + <value>thrift://localhost:9083</value> + </property> + <property> + <name>hive.support.concurrency</name> + <value>true</value> + </property> + <property> + <name>hive.metastore.client.socket.timeout</name> + <value>3600</value> + </property> + <property> + <name>hive.metastore.connect.retries</name> + <value>5</value> + </property> +</configuration> diff --git a/package/conf/impalad_flags b/package/conf/impalad_flags new file mode 100644 index 000000000..4c2dc3a92 --- /dev/null +++ b/package/conf/impalad_flags @@ -0,0 +1,16 @@ +-hostname=localhost +-state_store_host=localhost +-catalog_service_host=localhost +#-kudu_master_hosts=localhost + +-mem_limit=80% +-use_local_catalog=true +-log_dir=/var/log/impalad +-log_filename=impalad +-minidump_path=/var/log/impala-minidumps +-local_library_dir=/var/lib/impala/udfs +-fair_scheduler_allocation_path=/opt/impala/conf/fair-scheduler.xml +-llama_site_path=/opt/impala/conf/llama-site.xml +-webserver_doc_root=/opt/impala +-v=1 +-max_log_size=200 diff --git a/package/conf/llama-site.xml b/package/conf/llama-site.xml new file mode 100644 index 000000000..fefdb93ca --- /dev/null +++ b/package/conf/llama-site.xml @@ -0,0 +1,3 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<configuration/> diff --git a/package/conf/statestore_flags b/package/conf/statestore_flags new file mode 100644 index 000000000..9e53d3a70 --- /dev/null +++ b/package/conf/statestore_flags @@ -0,0 +1,7 @@ +-hostname=localhost +-log_dir=/var/log/statestore +-log_filename=statestored +-minidump_path=/var/log/impala-minidumps +-webserver_doc_root=/opt/impala +-v=1 +-max_log_size=200
