This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch branch-3.4.2
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 8e9c5a5d17dbcb275653d6e10b9dc719b965ed5d
Author: stiga-huang <[email protected]>
AuthorDate: Mon Sep 5 13:13:11 2022 +0800

    IMPALA-10262: RPM/DEB Packaging Support
    
    This patch bases on a previous patch contributed by Shant Hovsepian:
    https://gerrit.cloudera.org/c/16612/
    
    It adds a new option, -package, to buildall.sh for building a package
    for the current OS type (e.g. CentOS/Ubuntu). You can also use
    "make/ninja package" to build the package. Scripts for launching the
    services and the required configuration files are also added.
    
    Tests:
     - Built on Ubuntu 18.04/20.04 and CentOS 7 using
       ./buildall.sh -noclean -skiptests -release -package
     - Deployed the RPM package on a CDP cluster. Verifed the scripts.
     - Deployed the DEB package on a docker container. Verified the scripts.
    
    Resolved trivial backport conflicts in:
     - CMakeLists.txt
     - bin/bootstrap_system.sh
     - bin/jenkins/build-all-flag-combinations.sh
     - buildall.sh
     - docker/install_os_packages.sh
    
    Non-trivial backport notes:
    CMake function cmake_host_system_information does not recognize keys of
    DISTRIB_ID and DISTRIB_VERSION_ID (required version >= 3.22). Currently
    version used in branch-3.4 is 3.14.3. Details to remove using them:
     - One usage of DISTRIB_ID is to skip packaging impala-shell on redhat8.
       Removes it since redhat8 is not supported on branch-3.4.
     - Another usage of DISTRIB_ID is to determine the package file type
       (DEB vs RPM) based on the OS. Replaces it with content of the
       os-release files.
     - Removes the usage of OS_DISTRIB_VERSION_ID in the package file name
    
    Tests:
     - Built on Ubuntu 18.04
    
    Change-Id: I64419fd400fe8d233dac016b6306157fe9461d82
    Reviewed-on: http://gerrit.cloudera.org:8080/18939
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
    Reviewed-on: http://gerrit.cloudera.org:8080/21130
    Tested-by: Quanlong Huang <[email protected]>
    Reviewed-by: Zihao Ye <[email protected]>
---
 CMakeLists.txt                             | 105 ++++++++++++++
 be/src/service/CMakeLists.txt              |   4 +
 bin/bootstrap_system.sh                    |   2 +-
 bin/impala-config.sh                       |   3 +
 bin/jenkins/build-all-flag-combinations.sh |   2 +
 bin/rat_exclude_files.txt                  |   3 +
 buildall.sh                                |  12 ++
 docker/install_os_packages.sh              | 212 +++++++++++++++++++++++++++++
 package/bin/impala-env.sh                  |  85 ++++++++++++
 package/bin/start-catalogd.sh              |  38 ++++++
 package/bin/start-impalad.sh               |  49 +++++++
 package/bin/start-statestored.sh           |  37 +++++
 package/bin/stop-catalogd.sh               |  23 ++++
 package/bin/stop-impalad.sh                |  23 ++++
 package/bin/stop-statestored.sh            |  23 ++++
 package/conf/catalogd_flags                |  13 ++
 package/conf/core-site.xml                 |  20 +++
 package/conf/fair-scheduler.xml            |   6 +
 package/conf/hdfs-site.xml                 |  40 ++++++
 package/conf/hive-site.xml                 |  20 +++
 package/conf/impalad_flags                 |  16 +++
 package/conf/llama-site.xml                |   3 +
 package/conf/statestore_flags              |   7 +
 23 files changed, 745 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 52a60eef6..6828e5f04 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,6 +24,8 @@ set(NO_TESTS 1)
 # initialized.
 project(Impala)
 
+option(BUILD_PACKAGES "Build deployment packages")
+
 include(cmake_modules/kudu_cmake_fns.txt)
 
 if (NOT DEFINED BUILD_SHARED_LIBS)
@@ -399,6 +401,10 @@ endif()
 find_package(kuduClient REQUIRED NO_DEFAULT_PATH)
 include_directories(SYSTEM ${KUDU_CLIENT_INCLUDE_DIR})
 
+## installation path
+set(CMAKE_INSTALL_PREFIX "/opt")
+set(IMPALA_INSTALLDIR "impala" CACHE INTERNAL "")
+
 # compile these subdirs using their own CMakeLists.txt
 add_subdirectory(common/function-registry)
 add_subdirectory(common/thrift)
@@ -413,6 +419,36 @@ add_subdirectory(impala-parent)
 add_subdirectory(ext-data-source)
 add_subdirectory(query-event-hook-api)
 
+install(DIRECTORY "www/" DESTINATION ${IMPALA_INSTALLDIR}/www)
+install(FILES fe/target/impala-frontend-$ENV{IMPALA_VERSION}.jar
+  DESTINATION ${IMPALA_INSTALLDIR}/jar)
+
+set(IMPALA_GCC_HOME 
$ENV{IMPALA_TOOLCHAIN_PACKAGES_HOME}/gcc-$ENV{IMPALA_GCC_VERSION})
+FILE(GLOB gcc_lib ${IMPALA_GCC_HOME}/lib64/libgcc_s.so.1*)
+install(FILES ${gcc_lib} DESTINATION ${IMPALA_INSTALLDIR}/lib)
+
+FILE(GLOB cpp_lib ${IMPALA_GCC_HOME}/lib64/libstdc++.so.6*)
+install(FILES ${cpp_lib} DESTINATION ${IMPALA_INSTALLDIR}/lib)
+
+set(KUDU_HOME 
$ENV{IMPALA_TOOLCHAIN_PACKAGES_HOME}/kudu-$ENV{IMPALA_KUDU_VERSION}/release)
+# The parent folder is lib64 on centos/redhat, while on ubuntu it's lib.
+FILE(GLOB kudu_lib ${KUDU_HOME}/lib*/libkudu_client.so*)
+install(FILES ${kudu_lib} DESTINATION ${IMPALA_INSTALLDIR}/lib)
+
+FILE(GLOB hadoop_lib $ENV{HADOOP_LIB_DIR}/native/libhadoop.so*)
+install(FILES ${hadoop_lib} DESTINATION ${IMPALA_INSTALLDIR}/lib)
+
+install(DIRECTORY fe/target/dependency/ DESTINATION ${IMPALA_INSTALLDIR}/jar
+  FILES_MATCHING PATTERN "*.jar")
+
+install(DIRECTORY shell/build/impala-shell-$ENV{IMPALA_VERSION}/
+  DESTINATION ${IMPALA_INSTALLDIR}/shell
+  USE_SOURCE_PERMISSIONS PATTERN "*.pyc" EXCLUDE)
+
+install(DIRECTORY package/bin/ DESTINATION ${IMPALA_INSTALLDIR}/bin
+  USE_SOURCE_PERMISSIONS FILES_MATCHING PATTERN "*.sh")
+install(DIRECTORY package/conf/ DESTINATION ${IMPALA_INSTALLDIR}/conf)
+
 # Build target for all generated files which most backend code depends on
 add_custom_target(gen-deps ALL DEPENDS thrift-deps proto-deps fb-deps)
 
@@ -447,3 +483,72 @@ if (DUMP_INCLUDE_PATHS)
 endif(DUMP_INCLUDE_PATHS)
 
 SET(CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libstdc++")
+
+# packaging
+if (BUILD_PACKAGES)
+  if (EXISTS "/etc/redhat-release")
+    set(IMPALA_LINUX_ID_FILE "/etc/redhat-release")
+  elseif (EXISTS "/etc/os-release")
+    set(IMPALA_LINUX_ID_FILE "/etc/os-release")
+  elseif (EXISTS "/etc/issue")
+    set(IMPALA_LINUX_ID_FILE "/etc/issue")
+  else()
+    message(FATAL_ERROR "Can not determine OS version!")
+  endif()
+  file(READ ${IMPALA_LINUX_ID_FILE} IMPALA_OS_INFO)
+
+  string(TOLOWER ${IMPALA_OS_INFO} IMPALA_OS_INFO)
+  if (${IMPALA_OS_INFO} MATCHES "(redhat|centos)")
+    set(CPACK_GENERATOR "RPM")
+  elseif (${IMPALA_OS_INFO} MATCHES "ubuntu")
+    set(CPACK_GENERATOR "DEB")
+  else()
+    message(WARNING "OS is ${IMPALA_OS_INFO}. No packages will be generated.")
+  endif()
+
+  if (CPACK_GENERATOR)
+    message(STATUS "Packaging enabled: ${CPACK_GENERATOR}")
+    set(CPACK_PACKAGE_NAME ${CMAKE_PROJECT_NAME})
+    set(CPACK_PACKAGE_VENDOR "Apache")
+    set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "An open source massively parallel 
processing SQL query engine.")
+    set(CPACK_PACKAGE_CONTACT "[email protected]")
+    set(CPACK_PACKAGE_HOMEPAGE_URL "https://impala.apache.org";)
+    set(CPACK_PACKAGE_VERSION "$ENV{IMPALA_VERSION}")
+    set(CPACK_RPM_PACKAGE_LICENSE "ASL-2.0")
+    # Set a meaningful package name, e.g. 
Impala-4.3.0-SNAPSHOT_hive-3.1.3000.7.2.18.0-41-x86_64
+    set(CPACK_PACKAGE_FILE_NAME 
"${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}")
+    set(CPACK_PACKAGE_FILE_NAME 
"${CPACK_PACKAGE_FILE_NAME}_hive-$ENV{IMPALA_HIVE_VERSION}")
+    set(CPACK_PACKAGE_FILE_NAME 
"${CPACK_PACKAGE_FILE_NAME}-${CMAKE_SYSTEM_PROCESSOR}")
+    message(STATUS "Package name: ${CPACK_PACKAGE_FILE_NAME}")
+
+    if ($ENV{STRIP_DEPLOYMENT_IMPALAD})
+      set(CPACK_STRIP_FILES ${IMPALA_INSTALLDIR}/bin/impalad)
+      message("Binaries in the package will be stripped")
+    endif()
+
+    set(CPACK_PACKAGING_INSTALL_PREFIX "/opt")
+    set(CPACK_PACKAGE_INSTALL_DIRECTORY "impala")
+
+    execute_process(
+      COMMAND bash -c "${CMAKE_SOURCE_DIR}/docker/install_os_packages.sh 
--dry-run | tail -n1"
+      OUTPUT_VARIABLE PKG_LIST
+      OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+    if ("${PKG_LIST}" STREQUAL "")
+      message(FATAL_ERROR "Package list is empty: '${PKG_LIST}'")
+    else()
+      message(STATUS "Get required package list: '${PKG_LIST}'")
+    endif()
+
+    if (${CPACK_GENERATOR} MATCHES "RPM")
+      set(CPACK_RPM_PACKAGE_AUTOREQPROV " no")
+      set(CPACK_RPM_PACKAGE_RELOCATABLE TRUE)
+      set(CPACK_RPM_PACKAGE_REQUIRES ${PKG_LIST})
+    else()
+      set(CPACK_DEBIAN_PACKAGE_DEPENDS ${PKG_LIST})
+    endif()
+  endif()
+
+include(CPack)
+
+endif()
diff --git a/be/src/service/CMakeLists.txt b/be/src/service/CMakeLists.txt
index 939df2226..9371cbc80 100644
--- a/be/src/service/CMakeLists.txt
+++ b/be/src/service/CMakeLists.txt
@@ -110,6 +110,10 @@ target_link_libraries(unifiedbetests
   ${JAVA_JSIG_LIBRARY} ${UNIFIED_TEST_LINK_LIBS})
 ADD_DEPENDENCIES(unified-be-test-executable unifiedbetests)
 
+install(FILES ${STATESTORED_SYMLINK} DESTINATION ${IMPALA_INSTALLDIR}/bin)
+install(FILES ${CATALOGD_SYMLINK} DESTINATION ${IMPALA_INSTALLDIR}/bin)
+install(TARGETS impalad DESTINATION ${IMPALA_INSTALLDIR}/bin)
+
 # Exception to unified be tests: Custom main() due to leak
 ADD_BE_TEST(session-expiry-test session-expiry-test.cc) # TODO: this leaks 
thrift server
 ADD_UNIFIED_BE_LSAN_TEST(hs2-util-test 
"StitchNullsTest.*:PrintTColumnValueTest.*")
diff --git a/bin/bootstrap_system.sh b/bin/bootstrap_system.sh
index 9f8d8f481..22d9bd44b 100755
--- a/bin/bootstrap_system.sh
+++ b/bin/bootstrap_system.sh
@@ -228,7 +228,7 @@ fi
 redhat sudo yum install -y curl gcc gcc-c++ git krb5-devel krb5-server 
krb5-workstation \
         libevent-devel libffi-devel make ntp ntpdate ntp-perl openssl-devel 
cyrus-sasl \
         cyrus-sasl-gssapi cyrus-sasl-devel cyrus-sasl-plain \
-        python-devel python-setuptools postgresql postgresql-server \
+        python-devel python-setuptools postgresql postgresql-server rpm-build \
         wget vim-common nscd cmake lzo-devel fuse-devel snappy-devel 
zlib-devel \
         psmisc lsof openssh-server redhat-lsb java-1.8.0-openjdk-devel \
         java-1.8.0-openjdk-src python-argparse
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 4218fa6e7..a29f698d1 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -802,6 +802,9 @@ CLASSPATH="$LZO_JAR_PATH:$CLASSPATH"
 # A marker in the environment to prove that we really did source this file
 export IMPALA_CONFIG_SOURCED=1
 
+# Whether to strip the impalad binary when generating deployment package
+export STRIP_DEPLOYMENT_IMPALAD=true
+
 echo "IMPALA_VERSION          = $IMPALA_VERSION"
 echo "IMPALA_HOME             = $IMPALA_HOME"
 echo "HADOOP_HOME             = $HADOOP_HOME"
diff --git a/bin/jenkins/build-all-flag-combinations.sh 
b/bin/jenkins/build-all-flag-combinations.sh
index cbbc7c493..b1412d867 100755
--- a/bin/jenkins/build-all-flag-combinations.sh
+++ b/bin/jenkins/build-all-flag-combinations.sh
@@ -37,6 +37,7 @@ CONFIGS=(
   # Test gcc builds with and without -so:
   "-skiptests -noclean"
   "-skiptests -noclean -release"
+  "-skiptests -noclean -release -package"
   "-skiptests -noclean -release -so -ninja"
   # clang sanitizer builds:
   "-skiptests -noclean -asan"
@@ -44,6 +45,7 @@ CONFIGS=(
   "-skiptests -noclean -ubsan -so -ninja"
   # USE_CDP_HIVE=true build:
   "-skiptests -noclean -use_cdp_hive"
+  "-skiptests -noclean -use_cdp_hive -package"
 )
 
 FAILED=""
diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt
index 29851b628..07998a1a9 100644
--- a/bin/rat_exclude_files.txt
+++ b/bin/rat_exclude_files.txt
@@ -212,3 +212,6 @@ docs/images/howto_show_histogram.png
 docs/images/howto_static_server_pools_config.png
 docs/images/impala_arch.jpeg
 docs/images/support_send_diagnostic_data.png
+
+# Files of deployment configuration
+package/conf/*
diff --git a/buildall.sh b/buildall.sh
index 498f8632a..0f5d03101 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -69,6 +69,7 @@ BUILD_ASAN=0
 BUILD_FE_ONLY=0
 BUILD_TESTS=1
 GEN_CMAKE_ONLY=0
+GEN_PACKAGE=0
 BUILD_RELEASE_AND_DEBUG=0
 BUILD_TIDY=0
 BUILD_UBSAN=0
@@ -198,6 +199,9 @@ do
     -cmake_only)
       GEN_CMAKE_ONLY=1
       ;;
+    -package)
+      GEN_PACKAGE=1
+      ;;
     -help|*)
       echo "buildall.sh - Builds Impala and runs all tests."
       echo "[-noclean] : Omits cleaning all packages before building. Will not 
kill"\
@@ -247,6 +251,7 @@ do
       echo "[-fe_only] : Build just the frontend"
       echo "[-ninja] : Use ninja instead of make"
       echo "[-cmake_only] : Generate makefiles only, instead of doing a full 
build"
+      echo "[-package] : Generate a package for deployment."
       echo 
"-----------------------------------------------------------------------------
 Examples of common tasks:
 
@@ -462,6 +467,9 @@ generate_cmake_files() {
   else
     
CMAKE_ARGS+=(-DCMAKE_TOOLCHAIN_FILE=$IMPALA_HOME/cmake_modules/toolchain.cmake)
   fi
+  if [[ "$GEN_PACKAGE" -eq 1 ]]; then
+    CMAKE_ARGS+=(-DBUILD_PACKAGES=ON)
+  fi
   cmake . ${CMAKE_ARGS[@]}
 }
 
@@ -587,6 +595,10 @@ else
   build_all_components $CMAKE_BUILD_TYPE 1
 fi
 
+if [[ "$GEN_PACKAGE" -eq 1 ]]; then
+  ${MAKE_CMD} -j${IMPALA_BUILD_THREADS:-4} package
+fi
+
 if [[ $NEED_MINICLUSTER -eq 1 ]]; then
   reconfigure_test_cluster
 fi
diff --git a/docker/install_os_packages.sh b/docker/install_os_packages.sh
new file mode 100755
index 000000000..8fbeef793
--- /dev/null
+++ b/docker/install_os_packages.sh
@@ -0,0 +1,212 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# This installs the minimal dependencies needed for Impala
+# services to run. It currently handles Ubuntu and Redhat
+# distributions. This takes an optional argument "--install-debug-tools"
+# which installs extra debugging tools like curl, ping, etc.
+
+set -euo pipefail
+
+INSTALL_DEBUG_TOOLS=false
+JAVA_VERSION=8
+DRY_RUN=false
+PKG_LIST=""
+NON_PKG_NAMES=(apt-get yum install update)
+
+function print_usage {
+    echo "install_os_packages.sh - Helper script to install OS dependencies"
+    echo "[--install-debug-tools] : Also install debug tools like curl, 
iproute, etc"
+    echo "[--java <version>] : Use specified Java version rather than the 
default Java 8."
+    echo "[--dry-run] : Print the list of packages to install."
+}
+
+# Wraps the passed in command to either execute it (DRY_RUN=false) or just use 
it
+# to update PKG_LIST.
+function wrap {
+  if $DRY_RUN; then
+    for arg in $@; do
+      if [[ "${NON_PKG_NAMES[@]}"  =~ "$arg" ]]; then
+        continue
+      elif [[ "$arg" == "-"* ]]; then
+        # Ignores command options
+        continue
+      elif [[ "$PKG_LIST" != "" ]]; then
+        PKG_LIST="$PKG_LIST,$arg"
+      else
+        PKG_LIST="$arg"
+      fi
+    done
+  else
+    "$@"
+  fi
+}
+
+while [ -n "$*" ]
+do
+  case "$1" in
+    --install-debug-tools)
+      INSTALL_DEBUG_TOOLS=true
+      ;;
+    --java)
+      JAVA_VERSION="${2-}"
+      shift;
+      ;;
+    --dry-run)
+      DRY_RUN=true
+      ;;
+    --help|*)
+      print_usage
+      exit 1
+      ;;
+  esac
+  shift
+done
+
+echo "INSTALL_DEBUG_TOOLS=${INSTALL_DEBUG_TOOLS}"
+echo "JAVA_VERSION=${JAVA_VERSION}"
+echo "DRY_RUN=${DRY_RUN}"
+
+# This can get more detailed if there are specific steps
+# for specific versions, but at the moment the distribution
+# is all we need.
+DISTRIBUTION=Unknown
+if [[ -f /etc/redhat-release ]]; then
+  echo "Identified Redhat system."
+  DISTRIBUTION=Redhat
+else
+  source /etc/lsb-release
+  if [[ $DISTRIB_ID == Ubuntu ]]; then
+    echo "Identified Ubuntu system."
+    DISTRIBUTION=Ubuntu
+
+    # Ubuntu 16.04 does not have Java 11 in its package repository,
+    # so exit with an error.
+    if [[ $DISTRIB_RELEASE == 16.04 ]] && [[ $JAVA_VERSION == 11 ]] ; then
+      echo "ERROR: Java 11 is not supported on Ubuntu 16.04"
+      exit 1
+    fi
+  fi
+fi
+
+if [[ $DISTRIBUTION == Unknown ]]; then
+  echo "ERROR: Did not detect supported distribution."
+  echo "Only Ubuntu and Redhat-based distributions are supported."
+  exit 1
+fi
+
+# Install minimal set of files.
+# Optionally install extra debug tools.
+if [[ $DISTRIBUTION == Ubuntu ]]; then
+  export DEBIAN_FRONTEND=noninteractive
+  wrap apt-get update
+  wrap apt-get install -y \
+      hostname \
+      krb5-user \
+      language-pack-en \
+      libsasl2-2 \
+      libsasl2-modules \
+      libsasl2-modules-gssapi-mit \
+      openjdk-${JAVA_VERSION}-jre-headless \
+      tzdata
+  if $INSTALL_DEBUG_TOOLS ; then
+    echo "Installing extra debug tools"
+    wrap apt-get install -y \
+        curl \
+        dnsutils \
+        iproute2 \
+        iputils-ping \
+        less \
+        netcat-openbsd \
+        sudo \
+        vim
+  fi
+elif [[ $DISTRIBUTION == Redhat ]]; then
+  if [[ $JAVA_VERSION == 8 ]]; then
+    JAVA_VERSION=1.8.0
+  fi
+  wrap yum install -y --disableplugin=subscription-manager \
+      cyrus-sasl-gssapi \
+      cyrus-sasl-plain \
+      hostname \
+      java-${JAVA_VERSION}-openjdk-headless \
+      krb5-workstation \
+      openldap-devel \
+      procps-ng \
+      tzdata
+
+  # UTF-8 masking functions require the presence of en_US.utf8.
+  # Install the appropriate language packs. Redhat/Centos 7 come
+  # with en_US.utf8, so there is no need to install anything.
+  if ! grep 'release 7\.' /etc/redhat-release; then
+      wrap yum install -y --disableplugin=subscription-manager \
+          glibc-langpack-en \
+          langpacks-en
+  fi
+
+  if $INSTALL_DEBUG_TOOLS ; then
+    echo "Installing extra debug tools"
+    wrap yum install -y --disableplugin=subscription-manager \
+        bind-utils \
+        curl \
+        iproute \
+        iputils \
+        less \
+        nmap-ncat \
+        sudo \
+        vim \
+        which
+  fi
+fi
+
+if $DRY_RUN; then
+  echo "The following packages would be installed:"
+  echo "$PKG_LIST"
+  exit 0
+fi
+
+# Verify en_US.utf8 is present
+if ! locale -a | grep en_US.utf8 ; then
+  echo "ERROR: en_US.utf8 locale is not present."
+  exit 1
+fi
+
+if ! hostname ; then
+  echo "ERROR: 'hostname' command failed."
+  exit 1
+fi
+
+# graceful_shutdown_backends.sh requires the pgrep utility. Verify it is 
present.
+if ! command -v pgrep ; then
+  echo "ERROR: 'pgrep' is not present."
+  exit 1
+fi
+
+# Impala will fail to start if the permissions on /var/tmp are not set to 
include
+# the sticky bit (i.e. +t). Some versions of Redhat UBI images do not have
+# this set by default, so specifically set the sticky bit for both /tmp and 
/var/tmp.
+chmod a=rwx,o+t /var/tmp /tmp
+
+# To minimize the size for the Docker image, clean up any unnecessary files.
+if [[ $DISTRIBUTION == Ubuntu ]]; then
+  apt-get clean
+  rm -rf /var/lib/apt/lists/*
+elif [[ $DISTRIBUTION == Redhat ]]; then
+  yum clean all
+  rm -rf /var/cache/yum/*
+fi
diff --git a/package/bin/impala-env.sh b/package/bin/impala-env.sh
new file mode 100644
index 000000000..3655ac6cf
--- /dev/null
+++ b/package/bin/impala-env.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+if [[ -z "$JAVA_HOME" ]]; then
+  echo "JAVA_HOME not set!"
+  exit 1
+fi
+
+echo "Using JAVA_HOME: $JAVA_HOME"
+LIB_JVM_DIR=$(dirname $(find $JAVA_HOME -type f -name libjvm.so))
+LIB_JSIG_DIR=$(dirname $(find $JAVA_HOME -type f -name libjsig.so))
+
+export LC_ALL=en_US.utf8
+export LD_LIBRARY_PATH="/opt/impala/lib/:$LIB_JVM_DIR:$LIB_JSIG_DIR"
+export CLASSPATH="/opt/impala/conf:/opt/impala/jar/*"
+
+#TODO: Add graceful shutdown for impalads
+function stop_process {
+  name=$1
+  pid_file="/tmp/${name}.pid"
+  if [[ -f $pid_file ]]; then
+    PID=$(cat $pid_file)
+    if ps $PID | grep $name; then
+      echo "Killing $name with PID=$PID"
+      kill $PID
+      rm $pid_file
+      echo "Killed $name"
+    else
+      rm $pid_file
+      echo "Already stopped: $name is not running with PID=$PID. Removed stale 
$pid_file"
+    fi
+  else
+    echo "PID file $pid_file not found!"
+  fi
+}
+
+function wait_for_ready {
+  name=$1
+  port=$2
+  pid=$3
+
+  NUM_WAIT_ITERATIONS=20
+  i=0
+  while [[ $i -lt $NUM_WAIT_ITERATIONS ]]; do
+    if ! ps $pid | grep $name > /dev/null 2>&1; then
+      echo "$name with PID $pid doesn't exist"
+      break
+    fi
+    STATUS=$(curl -s http://localhost:$port/healthz)
+    if [[ $? != 0 ]]; then
+      echo "Waiting for $name. Port $port not ready."
+    elif [[ "$STATUS" != "OK" ]]; then
+      echo "Waiting for $name to be ready"
+    else
+      echo "$name is ready"
+      break
+    fi
+    sleep 2
+    i=$((i+1))
+  done
+  if [[ "$STATUS" == "OK" ]]; then
+    echo "Launched $name with PID $pid"
+  elif [[ $i -eq $NUM_WAIT_ITERATIONS ]]; then
+    echo "Timed out waiting for $name to be ready. Check logs for more 
details."
+  else
+    echo "Failed to launch $name"
+    exit 1
+  fi
+}
diff --git a/package/bin/start-catalogd.sh b/package/bin/start-catalogd.sh
new file mode 100755
index 000000000..9b472fe60
--- /dev/null
+++ b/package/bin/start-catalogd.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Script to launch catalogd. Required JAVA_HOME being set.
+# Edit conf/catalogd_flags to set the correct hostname and state_store_host.
+# Edit core-site.xml, hdfs-site.xml, hive-site.xml, etc. in conf based on the 
cluster.
+# Example usage:
+#   export JAVA_HOME=/usr/java/jdk1.8.0_232-cloudera
+#   bin/start-catalogd.sh
+# To launch catalogd using another username (e.g. "impala"):
+#   sudo -E -u impala bin/start-catalogd.sh
+
+echo "Using IMPALA_HOME: ${IMPALA_HOME:=/opt/impala}"
+source $IMPALA_HOME/bin/impala-env.sh
+$IMPALA_HOME/bin/catalogd --flagfile=$IMPALA_HOME/conf/catalogd_flags &
+PID=$!
+echo $PID > /tmp/catalogd.pid
+
+# Sleep 1s so the glog output won't be messed up with waiting messages
+sleep 1
+
+wait_for_ready catalogd 25020 $PID
diff --git a/package/bin/start-impalad.sh b/package/bin/start-impalad.sh
new file mode 100755
index 000000000..89ccbbd21
--- /dev/null
+++ b/package/bin/start-impalad.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Script to launch impalad. Required JAVA_HOME being set.
+# Edit conf/impalad_flags to set the correct hostnames.
+# Edit core-site.xml, hdfs-site.xml, hive-site.xml, etc. in conf based on the 
cluster.
+# Example usage:
+#   export JAVA_HOME=/usr/java/jdk1.8.0_232-cloudera
+#   bin/start-impalad.sh
+# To launch impalad using another username (e.g. "impala"):
+#   sudo -E -u impala bin/start-impalad.sh
+
+echo "Using IMPALA_HOME: ${IMPALA_HOME:=/opt/impala}"
+source $IMPALA_HOME/bin/impala-env.sh
+
+if [[ -n "$HADOOP_HOME" ]]; then
+  echo "Using HADOOP_HOME: $HADOOP_HOME"
+  export HADOOP_LIB_DIR="${HADOOP_HOME}/lib"
+  export LIBHDFS_OPTS="${LIBHDFS_OPTS:-} 
-Djava.library.path=${HADOOP_LIB_DIR}/native/"
+  echo "Using hadoop native libs in ${HADOOP_LIB_DIR}/native/"
+else
+  export LIBHDFS_OPTS="${LIBHDFS_OPTS:-} 
-Djava.library.path=${IMPALA_HOME}/lib"
+  echo "HADOOP_HOME not set. Using hadoop native libs in ${IMPALA_HOME}/lib"
+fi
+
+$IMPALA_HOME/bin/impalad --flagfile=$IMPALA_HOME/conf/impalad_flags &
+PID=$!
+echo $PID > /tmp/impalad.pid
+
+# Sleep 1s so the glog output won't be messed up with waiting messages
+sleep 1
+
+wait_for_ready impalad 25000 $PID
diff --git a/package/bin/start-statestored.sh b/package/bin/start-statestored.sh
new file mode 100755
index 000000000..e3c78cd34
--- /dev/null
+++ b/package/bin/start-statestored.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Script to launch statestore. Required JAVA_HOME being set.
+# Edit conf/statestore_flags to set a correct hostname.
+# Example usage:
+#   export JAVA_HOME=/usr/java/jdk1.8.0_232-cloudera
+#   bin/start-statestored.sh
+# To launch statestore using another username (e.g. "impala"):
+#   sudo -E -u impala bin/start-statestored.sh
+
+echo "Using IMPALA_HOME: ${IMPALA_HOME:=/opt/impala}"
+source $IMPALA_HOME/bin/impala-env.sh
+$IMPALA_HOME/bin/statestored --flagfile=$IMPALA_HOME/conf/statestore_flags &
+PID=$!
+echo $PID > /tmp/statestored.pid
+
+# Sleep 1s so the glog output won't be messed up with waiting messages
+sleep 1
+
+wait_for_ready statestored 25010 $PID
diff --git a/package/bin/stop-catalogd.sh b/package/bin/stop-catalogd.sh
new file mode 100755
index 000000000..8649ed143
--- /dev/null
+++ b/package/bin/stop-catalogd.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+echo "Using IMPALA_HOME: ${IMPALA_HOME:=/opt/impala}"
+source $IMPALA_HOME/bin/impala-env.sh
+
+stop_process catalogd
diff --git a/package/bin/stop-impalad.sh b/package/bin/stop-impalad.sh
new file mode 100755
index 000000000..4e323e889
--- /dev/null
+++ b/package/bin/stop-impalad.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+echo "Using IMPALA_HOME: ${IMPALA_HOME:=/opt/impala}"
+source $IMPALA_HOME/bin/impala-env.sh
+
+stop_process impalad
diff --git a/package/bin/stop-statestored.sh b/package/bin/stop-statestored.sh
new file mode 100755
index 000000000..e5aa9578d
--- /dev/null
+++ b/package/bin/stop-statestored.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+echo "Using IMPALA_HOME: ${IMPALA_HOME:=/opt/impala}"
+source $IMPALA_HOME/bin/impala-env.sh
+
+stop_process statestored
diff --git a/package/conf/catalogd_flags b/package/conf/catalogd_flags
new file mode 100644
index 000000000..7b94816a2
--- /dev/null
+++ b/package/conf/catalogd_flags
@@ -0,0 +1,13 @@
+-hostname=localhost
+-state_store_host=localhost
+#-kudu_master_hosts=localhost
+
+-log_dir=/var/log/catalogd
+-log_filename=catalogd
+-state_store_port=24000
+-minidump_path=/var/log/impala-minidumps
+-webserver_doc_root=/opt/impala
+-catalog_topic_mode=minimal
+-hms_event_polling_interval_s=0
+-v=1
+-max_log_size=200
diff --git a/package/conf/core-site.xml b/package/conf/core-site.xml
new file mode 100644
index 000000000..2303cdf5f
--- /dev/null
+++ b/package/conf/core-site.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<configuration>
+  <property>
+    <name>fs.defaultFS</name>
+    <value>hdfs://localhost:8020</value>
+  </property>
+  <property>
+    <name>ipc.client.connection.maxidletime</name>
+    <value>30000</value>
+  </property>
+  <property>
+    <name>ipc.client.connect.max.retries</name>
+    <value>50</value>
+  </property>
+  <property>
+    <name>fs.trash.interval</name>
+    <value>1</value>
+  </property>
+</configuration>
diff --git a/package/conf/fair-scheduler.xml b/package/conf/fair-scheduler.xml
new file mode 100644
index 000000000..0edf7f080
--- /dev/null
+++ b/package/conf/fair-scheduler.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<allocations>
+    <queue name="root">
+        <queue name="default"/>
+    </queue>
+</allocations>
diff --git a/package/conf/hdfs-site.xml b/package/conf/hdfs-site.xml
new file mode 100644
index 000000000..02468a478
--- /dev/null
+++ b/package/conf/hdfs-site.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<configuration>
+  <property>
+    <name>dfs.namenode.servicerpc-address</name>
+    <value>localhost:8022</value>
+  </property>
+  <property>
+    <name>dfs.namenode.http-address</name>
+    <value>localhost:20101</value>
+  </property>
+  <property>
+    <name>dfs.replication</name>
+    <value>3</value>
+  </property>
+  <property>
+    <name>dfs.blocksize</name>
+    <value>134217728</value>
+  </property>
+  <property>
+    <name>dfs.domain.socket.path</name>
+    <value>/var/run/hdfs-sockets/dn</value>
+  </property>
+  <property>
+    <name>dfs.client.read.shortcircuit</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>dfs.client.read.shortcircuit.streams.cache.size</name>
+    <value>4096</value>
+  </property>
+  <property>
+    <name>dfs.client.read.shortcircuit.skip.checksum</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>dfs.client.file-block-storage-locations.timeout.millis</name>
+    <value>10000</value>
+  </property>
+</configuration>
diff --git a/package/conf/hive-site.xml b/package/conf/hive-site.xml
new file mode 100644
index 000000000..ec614f2b5
--- /dev/null
+++ b/package/conf/hive-site.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<configuration>
+  <property>
+    <name>hive.metastore.uris</name>
+    <value>thrift://localhost:9083</value>
+  </property>
+  <property>
+    <name>hive.support.concurrency</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.metastore.client.socket.timeout</name>
+    <value>3600</value>
+  </property>
+  <property>
+    <name>hive.metastore.connect.retries</name>
+    <value>5</value>
+  </property>
+</configuration>
diff --git a/package/conf/impalad_flags b/package/conf/impalad_flags
new file mode 100644
index 000000000..4c2dc3a92
--- /dev/null
+++ b/package/conf/impalad_flags
@@ -0,0 +1,16 @@
+-hostname=localhost
+-state_store_host=localhost
+-catalog_service_host=localhost
+#-kudu_master_hosts=localhost
+
+-mem_limit=80%
+-use_local_catalog=true
+-log_dir=/var/log/impalad
+-log_filename=impalad
+-minidump_path=/var/log/impala-minidumps
+-local_library_dir=/var/lib/impala/udfs
+-fair_scheduler_allocation_path=/opt/impala/conf/fair-scheduler.xml
+-llama_site_path=/opt/impala/conf/llama-site.xml
+-webserver_doc_root=/opt/impala
+-v=1
+-max_log_size=200
diff --git a/package/conf/llama-site.xml b/package/conf/llama-site.xml
new file mode 100644
index 000000000..fefdb93ca
--- /dev/null
+++ b/package/conf/llama-site.xml
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<configuration/>
diff --git a/package/conf/statestore_flags b/package/conf/statestore_flags
new file mode 100644
index 000000000..9e53d3a70
--- /dev/null
+++ b/package/conf/statestore_flags
@@ -0,0 +1,7 @@
+-hostname=localhost
+-log_dir=/var/log/statestore
+-log_filename=statestored
+-minidump_path=/var/log/impala-minidumps
+-webserver_doc_root=/opt/impala
+-v=1
+-max_log_size=200


Reply via email to