This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 6a53cf8efa [dependency](be) Add vectorscan for support hypserscan on ARM (#11102) 6a53cf8efa is described below commit 6a53cf8efaf9e408af59e6fa0d634df6c3732a83 Author: Kang <kxiao.ti...@gmail.com> AuthorDate: Tue Jul 26 11:28:25 2022 +0800 [dependency](be) Add vectorscan for support hypserscan on ARM (#11102) hyperscan is a high-performance regular expression matching library, but can not be used on ARM. vectorscan is an ARM port for hyperscan, it can be used as a drop in replacement. Since hyperscan is original created by Intel and is popular and mature on x86, so we just use vectorscan only for aarch64 when build thirdparty. --- dist/LICENSE-dist.txt | 1 + dist/licenses/LICENSE-vectorscan.txt | 123 ++++++++++++++++++++++++++++++ thirdparty/CHANGELOG.md | 3 + thirdparty/build-thirdparty.sh | 27 +++---- thirdparty/download-thirdparty.sh | 11 ++- thirdparty/patches/vectorscan-5.4.7.patch | 31 ++++++++ thirdparty/vars.sh | 10 +++ 7 files changed, 188 insertions(+), 18 deletions(-) diff --git a/dist/LICENSE-dist.txt b/dist/LICENSE-dist.txt index 127eb03fd8..d496979a6a 100644 --- a/dist/LICENSE-dist.txt +++ b/dist/LICENSE-dist.txt @@ -1554,6 +1554,7 @@ Other dependencies: * curl: 7.79.0 -- license/LICENSE-curl.txt * re2: 2021-02-02 -- license/LICENSE-re2.txt * hyperscan: 5.4.0 -- license/LICENSE-hyperscan.txt + * vectorscan: 5.4.7 -- license/LICENSE-vectorscan.txt * boost: 1.73.0 -- license/LICENSE-boost.txt * unixodbc: 2.3.7 -- license/LICENSE-unixodbc.txt * leveldb: 1.20 -- license/LICENSE-leveldb.txt diff --git a/dist/licenses/LICENSE-vectorscan.txt b/dist/licenses/LICENSE-vectorscan.txt new file mode 100644 index 0000000000..8324617bf5 --- /dev/null +++ b/dist/licenses/LICENSE-vectorscan.txt @@ -0,0 +1,123 @@ +Hyperscan is licensed under the BSD License. + +Copyright (c) 2015, Intel Corporation + +Vectorscan is licensed under the BSD License. + +Copyright (c) 2020, VectorCamp PC +Copyright (c) 2021, Arm Limited + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +-------------------------------------------------------------------------------- + +This product also contains code from third parties, under the following +licenses: + +Intel's Slicing-by-8 CRC32 implementation +----------------------------------------- + +Copyright (c) 2004-2006, Intel Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Boost C++ Headers Library +------------------------- + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + + +The Google C++ Testing Framework (Google Test) +---------------------------------------------- + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + diff --git a/thirdparty/CHANGELOG.md b/thirdparty/CHANGELOG.md index 4a936be5fc..de91ea7843 100644 --- a/thirdparty/CHANGELOG.md +++ b/thirdparty/CHANGELOG.md @@ -2,6 +2,9 @@ This file contains version of the third-party dependency libraries in the build-env image. The docker build-env image is apache/doris, and the tag is `build-env-${version}` +## v20220606 +- Added: vectorscan 5.4.7, and a patch for compilation + ## v20220613 - Modified: update libhdfs3 from 2.3.0 to 2.3.1 fix client uuid set error diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh index ee20ac5fcf..af9fc38abe 100755 --- a/thirdparty/build-thirdparty.sh +++ b/thirdparty/build-thirdparty.sh @@ -551,21 +551,16 @@ build_re2() { # hyperscan build_hyperscan() { - MACHINE_TYPE=$(uname -m) - if [[ "${MACHINE_TYPE}" == "aarch64" ]]; then - echo "hyperscan is not supporting aarch64 now." - else - check_if_source_exist $RAGEL_SOURCE - cd $TP_SOURCE_DIR/$RAGEL_SOURCE - ./configure --prefix=$TP_INSTALL_DIR && make install - - check_if_source_exist $HYPERSCAN_SOURCE - cd $TP_SOURCE_DIR/$HYPERSCAN_SOURCE - mkdir -p $BUILD_DIR && cd $BUILD_DIR - PATH=$TP_INSTALL_DIR/bin:$PATH ${CMAKE_CMD} -G "${GENERATOR}" -DBUILD_SHARED_LIBS=0 \ - -DBOOST_ROOT=$BOOST_SOURCE -DCMAKE_INSTALL_PREFIX=$TP_INSTALL_DIR .. - ${BUILD_SYSTEM} -j $PARALLEL install - fi + check_if_source_exist $RAGEL_SOURCE + cd $TP_SOURCE_DIR/$RAGEL_SOURCE + ./configure --prefix=$TP_INSTALL_DIR && make install + + check_if_source_exist $HYPERSCAN_SOURCE + cd $TP_SOURCE_DIR/$HYPERSCAN_SOURCE + mkdir -p $BUILD_DIR && cd $BUILD_DIR + PATH=$TP_INSTALL_DIR/bin:$PATH ${CMAKE_CMD} -G "${GENERATOR}" -DBUILD_SHARED_LIBS=0 \ + -DBOOST_ROOT=$BOOST_SOURCE -DCMAKE_INSTALL_PREFIX=$TP_INSTALL_DIR .. + ${BUILD_SYSTEM} -j $PARALLEL install } # boost @@ -1193,7 +1188,7 @@ build_snappy build_gperftools build_curl build_re2 -# build_hyperscan +build_hyperscan build_thrift build_leveldb build_brpc diff --git a/thirdparty/download-thirdparty.sh b/thirdparty/download-thirdparty.sh index 1b911a3f35..a621ae68c6 100755 --- a/thirdparty/download-thirdparty.sh +++ b/thirdparty/download-thirdparty.sh @@ -320,7 +320,7 @@ fi echo "Finished patching $ARROW_SOURCE" # patch librdkafka to avoid crash -if [ $LIBRDKAFKA_SOURCE = "librdkafka-1.8.2" ]; then +if [ $LIBRDKAFKA_SOURCE == "librdkafka-1.8.2" ]; then cd $TP_SOURCE_DIR/$LIBRDKAFKA_SOURCE if [ ! -f $PATCHED_MARK ]; then patch -p0 < $TP_PATCH_DIR/librdkafka-1.8.2.patch @@ -332,13 +332,20 @@ echo "Finished patching $LIBRDKAFKA_SOURCE" # patch hyperscan # https://github.com/intel/hyperscan/issues/292 -if [ $HYPERSCAN_SOURCE = "hyperscan-5.4.0" ]; then +if [ $HYPERSCAN_SOURCE == "hyperscan-5.4.0" ]; then cd $TP_SOURCE_DIR/$HYPERSCAN_SOURCE if [ ! -f $PATCHED_MARK ]; then patch -p0 < $TP_PATCH_DIR/hyperscan-5.4.0.patch touch $PATCHED_MARK fi cd - +elif [ $HYPERSCAN_SOURCE == "vectorscan-vectorscan-5.4.7" ]; then + cd $TP_SOURCE_DIR/$HYPERSCAN_SOURCE + if [ ! -f $PATCHED_MARK ]; then + patch -p0 < $TP_PATCH_DIR/vectorscan-5.4.7.patch + touch $PATCHED_MARK + fi + cd - fi echo "Finished patching $HYPERSCAN_SOURCE" diff --git a/thirdparty/patches/vectorscan-5.4.7.patch b/thirdparty/patches/vectorscan-5.4.7.patch new file mode 100644 index 0000000000..712c11e86a --- /dev/null +++ b/thirdparty/patches/vectorscan-5.4.7.patch @@ -0,0 +1,31 @@ +diff --git CMakeLists.txt CMakeLists.txt +index cb4ba80..9a106e5 100644 +--- CMakeLists.txt ++++ CMakeLists.txt +@@ -170,7 +170,7 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) + + # arg1 might exist if using ccache + string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1) +- set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native) ++ set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -mtune=native) + execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} + OUTPUT_VARIABLE _GCC_OUTPUT) + string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}" POS) +diff --git cmake/build_wrapper.sh cmake/build_wrapper.sh +index 895610c..becfbf4 100755 +--- cmake/build_wrapper.sh ++++ cmake/build_wrapper.sh +@@ -17,11 +17,11 @@ KEEPSYMS=$(mktemp -p /tmp keep.syms.XXXXX) + LIBC_SO=$("$@" --print-file-name=libc.so.6) + cp ${KEEPSYMS_IN} ${KEEPSYMS} + # get all symbols from libc and turn them into patterns +-nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ @]*\).*/^\1$/' >> ${KEEPSYMS} ++nm -f posix -g -D ${LIBC_SO} | sed -s 's/\([^ @]*\).*/^\1$/' >> ${KEEPSYMS} + # build the object + "$@" + # rename the symbols in the object +-nm -f p -g ${OUT} | cut -f1 -d' ' | grep -v -f ${KEEPSYMS} | sed -e "s/\(.*\)/\1\ ${PREFIX}_\1/" >> ${SYMSFILE} ++nm -f posix -g ${OUT} | cut -f1 -d' ' | grep -v -f ${KEEPSYMS} | sed -e "s/\(.*\)/\1\ ${PREFIX}_\1/" >> ${SYMSFILE} + if test -s ${SYMSFILE} + then + objcopy --redefine-syms=${SYMSFILE} ${OUT} diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh index 14ba7ff033..675d35d3cd 100644 --- a/thirdparty/vars.sh +++ b/thirdparty/vars.sh @@ -155,6 +155,16 @@ HYPERSCAN_NAME=hyperscan-5.4.0.tar.gz HYPERSCAN_SOURCE=hyperscan-5.4.0 HYPERSCAN_MD5SUM="65e08385038c24470a248f6ff2fa379b" +# vectorscan (support arm for hyperscan) +MACHINE_TYPE=$(uname -m) +if [[ "${MACHINE_TYPE}" == "aarch64" ]]; then + echo "use vectorscan instead of hyperscan on aarch64" + HYPERSCAN_DOWNLOAD="https://github.com/VectorCamp/vectorscan/archive/refs/tags/vectorscan/5.4.7.tar.gz" + HYPERSCAN_NAME=vectorscan-5.4.7.tar.gz + HYPERSCAN_SOURCE=vectorscan-vectorscan-5.4.7 + HYPERSCAN_MD5SUM="ae924ccce79ef9bf6bf118693ae14fe5" +fi + # ragel (dependency for hyperscan) RAGEL_DOWNLOAD="http://www.colm.net/files/ragel/ragel-6.10.tar.gz" RAGEL_NAME=ragel-6.10.tar.gz --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org