This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch dev-1.1.2 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/dev-1.1.2 by this push: new 68cdc20fc8 [tools](ssb and tpch) optimize tools (#11974) 68cdc20fc8 is described below commit 68cdc20fc8499b485b1f6c1676fcb29dfc16afbd Author: Dongyang Li <hello_step...@qq.com> AuthorDate: Tue Aug 23 09:10:49 2022 +0800 [tools](ssb and tpch) optimize tools (#11974) --- tools/ssb-tools/README.md | 40 +++ tools/ssb-tools/{ => bin}/build-ssb-dbgen.sh | 21 +- .../bin/create-ssb-tables.sh} | 24 +- tools/ssb-tools/{ => bin}/gen-ssb-data.sh | 72 ++++-- tools/ssb-tools/bin/load-ssb-data.sh | 276 +++++++++++++++++++++ .../bin/run-ssb-flat-queries.sh} | 49 ++-- .../bin/run-ssb-queries.sh} | 44 ++-- tools/ssb-tools/{ => conf}/doris-cluster.conf | 6 +- tools/ssb-tools/ddl/create-ssb-flat-table.sql | 74 ++++++ .../create-ssb-tables.sql} | 115 +++++---- tools/ssb-tools/load-dimension-data.sh | 68 ----- tools/ssb-tools/load-fact-data.sh | 126 ---------- .../q1.3.sql => ssb-flat-queries/q1.1.sql} | 17 +- .../{queries => ssb-flat-queries}/q1.2.sql | 16 +- .../{queries => ssb-flat-queries}/q1.3.sql | 18 +- .../q1.2.sql => ssb-flat-queries/q2.1.sql} | 16 +- .../q1.1.sql => ssb-flat-queries/q2.2.sql} | 19 +- .../q1.1.sql => ssb-flat-queries/q2.3.sql} | 18 +- .../q1.1.sql => ssb-flat-queries/q3.1.sql} | 21 +- .../q1.1.sql => ssb-flat-queries/q3.2.sql} | 21 +- .../q1.1.sql => ssb-flat-queries/q3.3.sql} | 21 +- .../q1.1.sql => ssb-flat-queries/q3.4.sql} | 21 +- .../q1.1.sql => ssb-flat-queries/q4.1.sql} | 19 +- .../q1.1.sql => ssb-flat-queries/q4.2.sql} | 25 +- .../q2.1.sql => ssb-flat-queries/q4.3.sql} | 23 +- tools/ssb-tools/{queries => ssb-queries}/q1.1.sql | 15 +- tools/ssb-tools/{queries => ssb-queries}/q1.2.sql | 15 +- tools/ssb-tools/{queries => ssb-queries}/q1.3.sql | 18 +- tools/ssb-tools/{queries => ssb-queries}/q2.1.sql | 16 +- tools/ssb-tools/{queries => ssb-queries}/q2.2.sql | 17 +- tools/ssb-tools/{queries => ssb-queries}/q2.3.sql | 16 +- tools/ssb-tools/{queries => ssb-queries}/q3.1.sql | 26 +- tools/ssb-tools/{queries => ssb-queries}/q3.2.sql | 26 +- tools/ssb-tools/{queries => ssb-queries}/q3.3.sql | 34 ++- tools/ssb-tools/{queries => ssb-queries}/q3.4.sql | 33 ++- tools/ssb-tools/{queries => ssb-queries}/q4.1.sql | 30 ++- tools/ssb-tools/{queries => ssb-queries}/q4.2.sql | 36 +-- tools/ssb-tools/{queries => ssb-queries}/q4.3.sql | 30 ++- tools/tpch-tools/README.md | 25 +- tools/tpch-tools/{ => bin}/build-tpch-dbgen.sh | 17 +- tools/tpch-tools/{ => bin}/create-tpch-tables.sh | 12 +- tools/tpch-tools/{ => bin}/gen-tpch-data.sh | 32 +-- tools/tpch-tools/{ => bin}/load-tpch-data.sh | 97 ++++---- tools/tpch-tools/{ => bin}/run-tpch-queries.sh | 36 +-- tools/tpch-tools/{ => conf}/doris-cluster.conf | 4 +- tools/tpch-tools/create-tpch-tables.sql | 100 -------- tools/tpch-tools/ddl/create-tpch-tables.sql | 174 +++++++++++++ tools/tpch-tools/queries/q1.sql | 2 +- tools/tpch-tools/queries/q10.sql | 22 +- tools/tpch-tools/queries/q11.sql | 27 +- tools/tpch-tools/queries/q12.sql | 2 +- tools/tpch-tools/queries/q13.sql | 6 +- tools/tpch-tools/queries/q14.sql | 8 +- tools/tpch-tools/queries/q15.sql | 17 +- tools/tpch-tools/queries/q16.sql | 2 +- tools/tpch-tools/queries/q17.sql | 20 +- tools/tpch-tools/queries/q18.sql | 58 +++-- tools/tpch-tools/queries/q19.sql | 2 +- tools/tpch-tools/queries/q2.sql | 44 ++-- tools/tpch-tools/queries/q20.sql | 62 ++--- tools/tpch-tools/queries/q21.sql | 66 +++-- tools/tpch-tools/queries/q22.sql | 2 +- tools/tpch-tools/queries/q3.sql | 22 +- tools/tpch-tools/queries/q4.sql | 21 +- tools/tpch-tools/queries/q5.sql | 2 +- tools/tpch-tools/queries/q6.sql | 2 +- tools/tpch-tools/queries/q7.sql | 2 +- tools/tpch-tools/queries/q8.sql | 8 +- tools/tpch-tools/queries/q9.sql | 26 +- 69 files changed, 1426 insertions(+), 976 deletions(-) diff --git a/tools/ssb-tools/README.md b/tools/ssb-tools/README.md new file mode 100644 index 0000000000..6d8e85e97d --- /dev/null +++ b/tools/ssb-tools/README.md @@ -0,0 +1,40 @@ +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# Usage + + These scripts are used to make ssb and ssb flat test. + The ssb flat data comes from ssb tables by way of 'INSERT INTO ... SELECT ...'. + +## ssb test, follow the steps below: +### 1. build ssb dbgen tool. + ./bin/build-ssb-dbgen.sh +### 2. generate ssb data. use -h for more infomations. + ./bin/gen-ssb-data.sh -s 1 +### 3. create ssb tables. modify `conf/doris-cluster.conf` to specify Doris cluster info, then run script below. + ./bin/create-ssb-tables.sh +### 4. load ssb data. use -h for help. + ./bin/load-ssb-data.sh +### 5. run ssb queries. + ./bin/run-ssb-queries.sh + +## ssb flat test, follow the steps below: +### 1. prepare ssb data, which means ssb test steps 1 to 4 have been done. +### 2. run ssb flat queries. + ./bin/run-ssb-flat-queries.sh diff --git a/tools/ssb-tools/build-ssb-dbgen.sh b/tools/ssb-tools/bin/build-ssb-dbgen.sh similarity index 80% rename from tools/ssb-tools/build-ssb-dbgen.sh rename to tools/ssb-tools/bin/build-ssb-dbgen.sh index 59af467441..56fd2056e4 100755 --- a/tools/ssb-tools/build-ssb-dbgen.sh +++ b/tools/ssb-tools/bin/build-ssb-dbgen.sh @@ -19,14 +19,17 @@ ############################################################## # This script is used to build ssb-dbgen # sssb-dbgen's source code is from https://github.com/electrum/ssb-dbgen.git -# Usage: +# Usage: # sh build-ssb-dbgen.sh ############################################################## set -eo pipefail -ROOT=`dirname "$0"` -ROOT=`cd "$ROOT"; pwd` +ROOT=$(dirname "$0") +ROOT=$( + cd "$ROOT" + pwd +) CURDIR=${ROOT} SSB_DBGEN_DIR=$CURDIR/ssb-dbgen/ @@ -35,17 +38,23 @@ SSB_DBGEN_DIR=$CURDIR/ssb-dbgen/ if [[ -d $SSB_DBGEN_DIR ]]; then echo "Dir $CURDIR/ssb-dbgen/ already exists. No need to download." echo "If you want to download ssb-dbgen again, please delete this dir first." + exit 1 else - curl https://palo-cloud-repo-bd.bd.bcebos.com/baidu-doris-release/ssb-dbgen-linux.tar.gz | tar xz -C $CURDIR/ + cd "$CURDIR" + wget https://palo-cloud-repo-bd.bd.bcebos.com/baidu-doris-release/ssb-dbgen-linux.tar.gz && tar -xzvf ssb-dbgen-linux.tar.gz -C $CURDIR/ fi # compile ssb-dbgen -cd $SSB_DBGEN_DIR/ && make +cd "$SSB_DBGEN_DIR/" && make cd - # check if [[ -f $CURDIR/ssb-dbgen/dbgen ]]; then - echo "Build succeed! Run $CURDIR/ssb-dbgen/dbgen -h" + echo -e " +################ +Build succeed! +################ +Run $CURDIR/ssb-dbgen/dbgen -h" exit 0 else echo "Build failed!" diff --git a/tools/tpch-tools/create-tpch-tables.sh b/tools/ssb-tools/bin/create-ssb-tables.sh similarity index 73% copy from tools/tpch-tools/create-tpch-tables.sh copy to tools/ssb-tools/bin/create-ssb-tables.sh index 2263f47e1b..41177843e2 100755 --- a/tools/tpch-tools/create-tpch-tables.sh +++ b/tools/ssb-tools/bin/create-ssb-tables.sh @@ -17,7 +17,7 @@ # under the License. ############################################################## -# This script is used to create TPC-H tables +# This script is used to create SSB tables ############################################################## set -eo pipefail @@ -29,19 +29,22 @@ ROOT=$( ) CURDIR=${ROOT} +SSB_DDL="${CURDIR}/../ddl/create-ssb-tables.sql" +SSB_FLAT_DDL="${CURDIR}/../ddl/create-ssb-flat-table.sql" usage() { echo " -This script is used to create TPC-H tables, -will use mysql client to connect Doris server which is specified in doris-cluster.conf file. +This script is used to create SSB tables, +will use mysql client to connect Doris server which is specified in conf/doris-cluster.conf file. Usage: $0 " exit 1 } OPTS=$(getopt \ - -n $0 \ + -n "$0" \ -o '' \ + -o 'h' \ -- "$@") eval set -- "$OPTS" @@ -84,7 +87,9 @@ check_prerequest() { check_prerequest "mysql --version" "mysql" -source $CURDIR/doris-cluster.conf +# shellcheck source=/dev/null +source "$CURDIR/../conf/doris-cluster.conf" +export MYSQL_PWD=$PASSWORD echo "FE_HOST: $FE_HOST" echo "FE_QUERY_PORT: $FE_QUERY_PORT" @@ -92,7 +97,10 @@ echo "USER: $USER" echo "PASSWORD: $PASSWORD" echo "DB: $DB" -mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -e "CREATE DATABASE IF NOT EXISTS $DB" +mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -e "CREATE DATABASE IF NOT EXISTS $DB" -echo $CURDIR/create-tpch-tables.sql -mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB <$CURDIR/create-tpch-tables.sql +echo "Run DDL from $SSB_DDL" +mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" <"$SSB_DDL" + +echo "Run DDL from $SSB_FLAT_DDL" +mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" <"$SSB_FLAT_DDL" diff --git a/tools/ssb-tools/gen-ssb-data.sh b/tools/ssb-tools/bin/gen-ssb-data.sh similarity index 71% rename from tools/ssb-tools/gen-ssb-data.sh rename to tools/ssb-tools/bin/gen-ssb-data.sh index 594f1296a2..afc9e8bf68 100755 --- a/tools/ssb-tools/gen-ssb-data.sh +++ b/tools/ssb-tools/bin/gen-ssb-data.sh @@ -22,8 +22,11 @@ set -eo pipefail -ROOT=`dirname "$0"` -ROOT=`cd "$ROOT"; pwd` +ROOT=$(dirname "$0") +ROOT=$( + cd "$ROOT" + pwd +) CURDIR=${ROOT} SSB_DBGEN_DIR=$CURDIR/ssb-dbgen/ @@ -45,7 +48,7 @@ Usage: $0 <options> } OPTS=$(getopt \ - -n $0 \ + -n "$0" \ -o '' \ -o 'hs:c:' \ -- "$@") @@ -56,23 +59,38 @@ SCALE_FACTOR=100 PARALLEL=10 HELP=0 -if [ $# == 0 ] ; then - usage +if [ $# == 0 ]; then + usage fi while true; do - case "$1" in - -h) HELP=1 ; shift ;; - -s) SCALE_FACTOR=$2 ; shift 2 ;; - -c) PARALLEL=$2 ; shift 2 ;; - --) shift ; break ;; - *) echo "Internal error" ; exit 1 ;; - esac + case "$1" in + -h) + HELP=1 + shift + ;; + -s) + SCALE_FACTOR=$2 + shift 2 + ;; + -c) + PARALLEL=$2 + shift 2 + ;; + --) + shift + break + ;; + *) + echo "Internal error" + exit 1 + ;; + esac done if [[ ${HELP} -eq 1 ]]; then - usage - exit + usage + exit fi echo "Scale Factor: $SCALE_FACTOR" @@ -80,33 +98,33 @@ echo "Parallelism: $PARALLEL" # check if dbgen exists if [[ ! -f $SSB_DBGEN_DIR/dbgen ]]; then - echo "$SSB_DBGEN_DIR/dbgen does not exist. Run build-ssb-dbgen.sh first to build it first." - exit 1 + echo "$SSB_DBGEN_DIR/dbgen does not exist. Run build-ssb-dbgen.sh first to build it first." + exit 1 fi if [[ -d $SSB_DATA_DIR/ ]]; then - echo "$SSB_DATA_DIR exists. Remove it before generating data" - exit 1 + echo "$SSB_DATA_DIR exists. Remove it before generating data" + exit 1 fi -mkdir $SSB_DATA_DIR/ +mkdir "$SSB_DATA_DIR/" # gen data -cd $SSB_DBGEN_DIR +cd "$SSB_DBGEN_DIR" echo "Begin to generate data for table: customer" -$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T c +"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T c echo "Begin to generate data for table: part" -$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T p +"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T p echo "Begin to generate data for table: supplier" -$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T s +"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T s echo "Begin to generate data for table: date" -$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T d +"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T d echo "Begin to generate data for table: lineorder" -$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T l -C $PARALLEL +"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T l -C "$PARALLEL" cd - # move data to $SSB_DATA_DIR -mv $SSB_DBGEN_DIR/*.tbl* $SSB_DATA_DIR/ +mv "$SSB_DBGEN_DIR"/*.tbl* "$SSB_DATA_DIR/" # check data -du -sh $SSB_DATA_DIR/*.tbl* +du -sh "$SSB_DATA_DIR"/*.tbl* diff --git a/tools/ssb-tools/bin/load-ssb-data.sh b/tools/ssb-tools/bin/load-ssb-data.sh new file mode 100755 index 0000000000..f6be6bbe72 --- /dev/null +++ b/tools/ssb-tools/bin/load-ssb-data.sh @@ -0,0 +1,276 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +############################################################## +# This script is used to load generated ssb data set to Doris +# Only for 1 fact table: lineorder +############################################################## + +set -eo pipefail + +ROOT=$(dirname "$0") +ROOT=$( + cd "$ROOT" + pwd +) + +CURDIR=${ROOT} +SSB_DATA_DIR="$CURDIR/ssb-data/" + +usage() { + echo " +Usage: $0 <options> + Optional options: + -c parallelism to load data of lineorder table, default is 5. + + Eg. + $0 load data using default value. + $0 -c 10 load lineorder table data using parallelism 10. + " + exit 1 +} + +OPTS=$(getopt \ + -n "$0" \ + -o '' \ + -o 'hc:' \ + -- "$@") + +eval set -- "$OPTS" + +PARALLEL=5 +HELP=0 + +if [ $# == 0 ]; then + usage +fi + +while true; do + case "$1" in + -h) + HELP=1 + shift + ;; + -c) + PARALLEL=$2 + shift 2 + ;; + --) + shift + break + ;; + *) + echo "Internal error" + exit 1 + ;; + esac +done + +if [[ ${HELP} -eq 1 ]]; then + usage + exit +fi + +echo "Parallelism: $PARALLEL" + +# check if ssb-data exists +if [[ ! -d $SSB_DATA_DIR/ ]]; then + echo "$SSB_DATA_DIR does not exist. Run sh gen-ssb-data.sh first." + exit 1 +fi + +check_prerequest() { + local CMD=$1 + local NAME=$2 + if ! $CMD; then + echo "$NAME is missing. This script depends on cURL to load data to Doris." + exit 1 + fi +} + +run_sql() { + sql="$*" + echo "$sql" + mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" -e "$@" +} + +load_lineitem_flat() { + # Loading data in batches by year. + for con in 'lo_orderdate<19930101' 'lo_orderdate>=19930101 and lo_orderdate<19940101' 'lo_orderdate>=19940101 and lo_orderdate<19950101' 'lo_orderdate>=19950101 and lo_orderdate<19960101' 'lo_orderdate>=19960101 and lo_orderdate<19970101' 'lo_orderdate>=19970101 and lo_orderdate<19980101' 'lo_orderdate>=19980101'; do + echo -e "\n$con" + run_sql " +INSERT INTO lineorder_flat +SELECT + LO_ORDERDATE, + LO_ORDERKEY, + LO_LINENUMBER, + LO_CUSTKEY, + LO_PARTKEY, + LO_SUPPKEY, + LO_ORDERPRIORITY, + LO_SHIPPRIORITY, + LO_QUANTITY, + LO_EXTENDEDPRICE, + LO_ORDTOTALPRICE, + LO_DISCOUNT, + LO_REVENUE, + LO_SUPPLYCOST, + LO_TAX, + LO_COMMITDATE, + LO_SHIPMODE, + C_NAME, + C_ADDRESS, + C_CITY, + C_NATION, + C_REGION, + C_PHONE, + C_MKTSEGMENT, + S_NAME, + S_ADDRESS, + S_CITY, + S_NATION, + S_REGION, + S_PHONE, + P_NAME, + P_MFGR, + P_CATEGORY, + P_BRAND, + P_COLOR, + P_TYPE, + P_SIZE, + P_CONTAINER +FROM ( + SELECT + lo_orderkey, + lo_linenumber, + lo_custkey, + lo_partkey, + lo_suppkey, + lo_orderdate, + lo_orderpriority, + lo_shippriority, + lo_quantity, + lo_extendedprice, + lo_ordtotalprice, + lo_discount, + lo_revenue, + lo_supplycost, + lo_tax, + lo_commitdate, + lo_shipmode + FROM lineorder + WHERE ${con} +) l +INNER JOIN customer c +ON (c.c_custkey = l.lo_custkey) +INNER JOIN supplier s +ON (s.s_suppkey = l.lo_suppkey) +INNER JOIN part p +ON (p.p_partkey = l.lo_partkey); +" + done +} + +check_prerequest "curl --version" "curl" + +# load lineorder +# shellcheck source=/dev/null +source "$CURDIR/../conf/doris-cluster.conf" +export MYSQL_PWD=$PASSWORD + +echo "FE_HOST: $FE_HOST" +echo "FE_HTTP_PORT: $FE_HTTP_PORT" +echo "USER: $USER" +echo "PASSWORD: $PASSWORD" +echo "DB: $DB" + +date +echo "==========Start to load data into ssb tables==========" +echo 'Loading data for table: part' +curl --location-trusted -u "$USER":"$PASSWORD" \ + -H "column_separator:|" \ + -H "columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy" \ + -T "$SSB_DATA_DIR"/part.tbl http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/part/_stream_load + +echo 'Loading data for table: date' +curl --location-trusted -u "$USER":"$PASSWORD" \ + -H "column_separator:|" \ + -H "columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy" \ + -T "$SSB_DATA_DIR"/date.tbl http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/dates/_stream_load + +echo 'Loading data for table: supplier' +curl --location-trusted -u "$USER":"$PASSWORD" \ + -H "column_separator:|" \ + -H "columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \ + -T "$SSB_DATA_DIR"/supplier.tbl http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/supplier/_stream_load + +echo 'Loading data for table: customer' +curl --location-trusted -u "$USER":"$PASSWORD" \ + -H "column_separator:|" \ + -H "columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use" \ + -T "$SSB_DATA_DIR"/customer.tbl http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/customer/_stream_load + +echo "Loading data for table: lineorder, with $PARALLEL parallel" +function load() { + echo "$@" + curl --location-trusted -u "$USER":"$PASSWORD" \ + -H "column_separator:|" \ + -H "columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy" \ + -T "$@" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/lineorder/_stream_load +} + +# set parallelism +[ -e /tmp/fd1 ] || mkfifo /tmp/fd1 +exec 3<>/tmp/fd1 +rm -rf /tmp/fd1 + +for ((i = 1; i <= PARALLEL; i++)); do + echo >&3 +done + +date +for file in "$SSB_DATA_DIR"/lineorder.tbl.*; do + read -r -u3 + { + load "$file" + echo >&3 + } & +done + +# wait for child thread finished +wait +date + +echo "==========Start to insert data into ssb flat table==========" +echo "change some session variables before load, and then restore after load." +origin_query_timeout=$(run_sql 'select @@query_timeout;' | sed -n '3p') +origin_parallel=$(run_sql 'select @@parallel_fragment_exec_instance_num;' | sed -n '3p') +# set parallel_fragment_exec_instance_num=1, loading maybe slow but stable. +run_sql "set global query_timeout=7200;" +run_sql "set global parallel_fragment_exec_instance_num=1;" +echo '============================================' +date +load_lineitem_flat +date +echo '============================================' +echo "restore session variables" +run_sql "set global query_timeout=${origin_query_timeout};" +run_sql "set global parallel_fragment_exec_instance_num=${origin_parallel};" +echo '============================================' + +echo "DONE." diff --git a/tools/tpch-tools/run-tpch-queries.sh b/tools/ssb-tools/bin/run-ssb-flat-queries.sh similarity index 62% copy from tools/tpch-tools/run-tpch-queries.sh copy to tools/ssb-tools/bin/run-ssb-flat-queries.sh index 5a0d06d512..6db5ba3303 100755 --- a/tools/tpch-tools/run-tpch-queries.sh +++ b/tools/ssb-tools/bin/run-ssb-flat-queries.sh @@ -17,7 +17,7 @@ # under the License. ############################################################## -# This script is used to create TPC-H tables +# This script is used to create ssb flat queries ############################################################## set -eo pipefail @@ -28,21 +28,22 @@ ROOT=$( pwd ) -CURDIR=${ROOT} -QUERIES_DIR=$CURDIR/queries +CURDIR="${ROOT}" +QUERIES_DIR="$CURDIR/../ssb-flat-queries" usage() { echo " -This script is used to run TPC-H 22queries, -will use mysql client to connect Doris server which parameter is specified in doris-cluster.conf file. +This script is used to run SSB flat 13queries, +will use mysql client to connect Doris server which parameter is specified in 'doris-cluster.conf' file. Usage: $0 " exit 1 } OPTS=$(getopt \ - -n $0 \ + -n "$0" \ -o '' \ + -o 'h' \ -- "$@") eval set -- "$OPTS" @@ -83,9 +84,11 @@ check_prerequest() { fi } -check_prerequest "mysql --version" "mysql" +check_prerequest "mysqlslap --version" "mysqlslap" -source $CURDIR/doris-cluster.conf +# shellcheck source=/dev/null +source "$CURDIR/../conf/doris-cluster.conf" +export MYSQL_PWD=$PASSWORD echo "FE_HOST: $FE_HOST" echo "FE_QUERY_PORT: $FE_QUERY_PORT" @@ -94,24 +97,24 @@ echo "PASSWORD: $PASSWORD" echo "DB: $DB" pre_set() { - echo $@ - mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB -e "$@" + echo "$@" + mysql -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" -D"$DB" -e "$@" } pre_set "set global enable_vectorized_engine=1;" pre_set "set global parallel_fragment_exec_instance_num=8;" -pre_set "set global exec_mem_limit=48G;" +pre_set "set global exec_mem_limit=8G;" pre_set "set global batch_size=4096;" -# pre_set "show variables like 'batch_size';" - -for i in $(seq 1 22); do - total=0 - # Each query is executed three times and takes the average time - for j in $(seq 1 3); do - start=$(date +%s%3N) - mysql -h$FE_HOST -u $USER -P$FE_QUERY_PORT -D$DB <$QUERIES_DIR/q$i.sql >/dev/null - end=$(date +%s%3N) - total=$((total + end - start)) - done - echo "q$i: $((total / 3))ms" +echo '============================================' +pre_set "show variables;" +echo '============================================' +pre_set "show table status;" +echo '============================================' + +for i in '1.1' '1.2' '1.3' '2.1' '2.2' '2.3' '3.1' '3.2' '3.3' '3.4' '4.1' '4.2' '4.3'; do + # First run to prevent the affect of cold start + mysql -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" -D "$DB" <"$QUERIES_DIR"/q${i}.sql >/dev/null 2>&1 + # Then run 3 times and takes the average time + res=$(mysqlslap -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" --create-schema="$DB" --query="$QUERIES_DIR"/q${i}.sql -F '\r' -i 3 | sed -n '2p' | cut -d ' ' -f 9,10) + echo "q$i: $res" done diff --git a/tools/tpch-tools/run-tpch-queries.sh b/tools/ssb-tools/bin/run-ssb-queries.sh similarity index 65% copy from tools/tpch-tools/run-tpch-queries.sh copy to tools/ssb-tools/bin/run-ssb-queries.sh index 5a0d06d512..5a1db9a57e 100755 --- a/tools/tpch-tools/run-tpch-queries.sh +++ b/tools/ssb-tools/bin/run-ssb-queries.sh @@ -17,7 +17,7 @@ # under the License. ############################################################## -# This script is used to create TPC-H tables +# This script is used to create ssb queries ############################################################## set -eo pipefail @@ -29,11 +29,11 @@ ROOT=$( ) CURDIR=${ROOT} -QUERIES_DIR=$CURDIR/queries +QUERIES_DIR=$CURDIR/../ssb-queries usage() { echo " -This script is used to run TPC-H 22queries, +This script is used to run SSB 13queries, will use mysql client to connect Doris server which parameter is specified in doris-cluster.conf file. Usage: $0 " @@ -41,8 +41,9 @@ Usage: $0 } OPTS=$(getopt \ - -n $0 \ + -n "$0" \ -o '' \ + -o 'h' \ -- "$@") eval set -- "$OPTS" @@ -83,9 +84,11 @@ check_prerequest() { fi } -check_prerequest "mysql --version" "mysql" +check_prerequest "mysqlslap --version" "mysql slap" -source $CURDIR/doris-cluster.conf +# shellcheck source=/dev/null +source "$CURDIR/../conf/doris-cluster.conf" +export MYSQL_PWD=$PASSWORD echo "FE_HOST: $FE_HOST" echo "FE_QUERY_PORT: $FE_QUERY_PORT" @@ -94,24 +97,25 @@ echo "PASSWORD: $PASSWORD" echo "DB: $DB" pre_set() { - echo $@ - mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB -e "$@" + echo "$@" + mysql -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" -D"$DB" -e "$@" } pre_set "set global enable_vectorized_engine=1;" pre_set "set global parallel_fragment_exec_instance_num=8;" pre_set "set global exec_mem_limit=48G;" pre_set "set global batch_size=4096;" -# pre_set "show variables like 'batch_size';" - -for i in $(seq 1 22); do - total=0 - # Each query is executed three times and takes the average time - for j in $(seq 1 3); do - start=$(date +%s%3N) - mysql -h$FE_HOST -u $USER -P$FE_QUERY_PORT -D$DB <$QUERIES_DIR/q$i.sql >/dev/null - end=$(date +%s%3N) - total=$((total + end - start)) - done - echo "q$i: $((total / 3))ms" +pre_set "set global enable_projection=true;" +pre_set "set global runtime_filter_mode=global;" +# pre_set "set global enable_cost_based_join_reorder=1" +echo '============================================' +pre_set "show variables;" +echo '============================================' +pre_set "show table status;" +echo '============================================' + +for i in '1.1' '1.2' '1.3' '2.1' '2.2' '2.3' '3.1' '3.2' '3.3' '3.4' '4.1' '4.2' '4.3'; do + # Each query is executed 3 times and takes the average time + res=$(mysqlslap -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" --create-schema="$DB" --query="$QUERIES_DIR"/q${i}.sql -F '\r' -i 3 | sed -n '2p' | cut -d ' ' -f 9,10) + echo "q$i: $res" done diff --git a/tools/ssb-tools/doris-cluster.conf b/tools/ssb-tools/conf/doris-cluster.conf similarity index 91% rename from tools/ssb-tools/doris-cluster.conf rename to tools/ssb-tools/conf/doris-cluster.conf index bef6c7b54c..5567a2f588 100644 --- a/tools/ssb-tools/doris-cluster.conf +++ b/tools/ssb-tools/conf/doris-cluster.conf @@ -19,9 +19,11 @@ export FE_HOST='127.0.0.1' # http_port in fe.conf export FE_HTTP_PORT=8030 +# query_port in fe.conf +export FE_QUERY_PORT=9030 # Doris username -export USER='admin' +export USER='root' # Doris password export PASSWORD='' # The database where SSB tables located -export DB='db1' +export DB='ssb' diff --git a/tools/ssb-tools/ddl/create-ssb-flat-table.sql b/tools/ssb-tools/ddl/create-ssb-flat-table.sql new file mode 100644 index 0000000000..b1e1681a94 --- /dev/null +++ b/tools/ssb-tools/ddl/create-ssb-flat-table.sql @@ -0,0 +1,74 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +CREATE TABLE IF NOT EXISTS `lineorder_flat` ( + `LO_ORDERDATE` int(11) NOT NULL COMMENT "", + `LO_ORDERKEY` int(11) NOT NULL COMMENT "", + `LO_LINENUMBER` tinyint(4) NOT NULL COMMENT "", + `LO_CUSTKEY` int(11) NOT NULL COMMENT "", + `LO_PARTKEY` int(11) NOT NULL COMMENT "", + `LO_SUPPKEY` int(11) NOT NULL COMMENT "", + `LO_ORDERPRIORITY` varchar(100) NOT NULL COMMENT "", + `LO_SHIPPRIORITY` tinyint(4) NOT NULL COMMENT "", + `LO_QUANTITY` tinyint(4) NOT NULL COMMENT "", + `LO_EXTENDEDPRICE` int(11) NOT NULL COMMENT "", + `LO_ORDTOTALPRICE` int(11) NOT NULL COMMENT "", + `LO_DISCOUNT` tinyint(4) NOT NULL COMMENT "", + `LO_REVENUE` int(11) NOT NULL COMMENT "", + `LO_SUPPLYCOST` int(11) NOT NULL COMMENT "", + `LO_TAX` tinyint(4) NOT NULL COMMENT "", + `LO_COMMITDATE` date NOT NULL COMMENT "", + `LO_SHIPMODE` varchar(100) NOT NULL COMMENT "", + `C_NAME` varchar(100) NOT NULL COMMENT "", + `C_ADDRESS` varchar(100) NOT NULL COMMENT "", + `C_CITY` varchar(100) NOT NULL COMMENT "", + `C_NATION` varchar(100) NOT NULL COMMENT "", + `C_REGION` varchar(100) NOT NULL COMMENT "", + `C_PHONE` varchar(100) NOT NULL COMMENT "", + `C_MKTSEGMENT` varchar(100) NOT NULL COMMENT "", + `S_NAME` varchar(100) NOT NULL COMMENT "", + `S_ADDRESS` varchar(100) NOT NULL COMMENT "", + `S_CITY` varchar(100) NOT NULL COMMENT "", + `S_NATION` varchar(100) NOT NULL COMMENT "", + `S_REGION` varchar(100) NOT NULL COMMENT "", + `S_PHONE` varchar(100) NOT NULL COMMENT "", + `P_NAME` varchar(100) NOT NULL COMMENT "", + `P_MFGR` varchar(100) NOT NULL COMMENT "", + `P_CATEGORY` varchar(100) NOT NULL COMMENT "", + `P_BRAND` varchar(100) NOT NULL COMMENT "", + `P_COLOR` varchar(100) NOT NULL COMMENT "", + `P_TYPE` varchar(100) NOT NULL COMMENT "", + `P_SIZE` tinyint(4) NOT NULL COMMENT "", + `P_CONTAINER` varchar(100) NOT NULL COMMENT "" +) ENGINE=OLAP +DUPLICATE KEY(`LO_ORDERDATE`, `LO_ORDERKEY`) +COMMENT "OLAP" +PARTITION BY RANGE(`LO_ORDERDATE`) +(PARTITION p1992 VALUES [("-2147483648"), ("19930101")), +PARTITION p1993 VALUES [("19930101"), ("19940101")), +PARTITION p1994 VALUES [("19940101"), ("19950101")), +PARTITION p1995 VALUES [("19950101"), ("19960101")), +PARTITION p1996 VALUES [("19960101"), ("19970101")), +PARTITION p1997 VALUES [("19970101"), ("19980101")), +PARTITION p1998 VALUES [("19980101"), ("19990101"))) +DISTRIBUTED BY HASH(`LO_ORDERKEY`) BUCKETS 48 +PROPERTIES ( +"replication_num" = "1", +"colocate_with" = "groupxx1", +"in_memory" = "false", +"storage_format" = "DEFAULT" +); \ No newline at end of file diff --git a/tools/ssb-tools/create-tables.sql b/tools/ssb-tools/ddl/create-ssb-tables.sql similarity index 62% rename from tools/ssb-tools/create-tables.sql rename to tools/ssb-tools/ddl/create-ssb-tables.sql index 5e99f89146..273d2d90b9 100644 --- a/tools/ssb-tools/create-tables.sql +++ b/tools/ssb-tools/ddl/create-ssb-tables.sql @@ -15,39 +15,44 @@ -- specific language governing permissions and limitations -- under the License. -CREATE TABLE `lineorder` ( - `lo_orderkey` bigint(20) NOT NULL COMMENT "", - `lo_linenumber` bigint(20) NOT NULL COMMENT "", +CREATE TABLE IF NOT EXISTS `lineorder` ( + `lo_orderkey` int(11) NOT NULL COMMENT "", + `lo_linenumber` int(11) NOT NULL COMMENT "", `lo_custkey` int(11) NOT NULL COMMENT "", `lo_partkey` int(11) NOT NULL COMMENT "", `lo_suppkey` int(11) NOT NULL COMMENT "", `lo_orderdate` int(11) NOT NULL COMMENT "", `lo_orderpriority` varchar(16) NOT NULL COMMENT "", `lo_shippriority` int(11) NOT NULL COMMENT "", - `lo_quantity` bigint(20) NOT NULL COMMENT "", - `lo_extendedprice` bigint(20) NOT NULL COMMENT "", - `lo_ordtotalprice` bigint(20) NOT NULL COMMENT "", - `lo_discount` bigint(20) NOT NULL COMMENT "", - `lo_revenue` bigint(20) NOT NULL COMMENT "", - `lo_supplycost` bigint(20) NOT NULL COMMENT "", - `lo_tax` bigint(20) NOT NULL COMMENT "", - `lo_commitdate` bigint(20) NOT NULL COMMENT "", + `lo_quantity` int(11) NOT NULL COMMENT "", + `lo_extendedprice` int(11) NOT NULL COMMENT "", + `lo_ordtotalprice` int(11) NOT NULL COMMENT "", + `lo_discount` int(11) NOT NULL COMMENT "", + `lo_revenue` int(11) NOT NULL COMMENT "", + `lo_supplycost` int(11) NOT NULL COMMENT "", + `lo_tax` int(11) NOT NULL COMMENT "", + `lo_commitdate` int(11) NOT NULL COMMENT "", `lo_shipmode` varchar(11) NOT NULL COMMENT "" -) +) ENGINE=OLAP +DUPLICATE KEY(`lo_orderkey`) +COMMENT "OLAP" PARTITION BY RANGE(`lo_orderdate`) -(PARTITION p1992 VALUES [("-2147483648"), ("19930101")), -PARTITION p1993 VALUES [("19930101"), ("19940101")), -PARTITION p1994 VALUES [("19940101"), ("19950101")), -PARTITION p1995 VALUES [("19950101"), ("19960101")), -PARTITION p1996 VALUES [("19960101"), ("19970101")), -PARTITION p1997 VALUES [("19970101"), ("19980101")), -PARTITION p1998 VALUES [("19980101"), ("19990101"))) +(PARTITION p1 VALUES [("-2147483648"), ("19930101")), +PARTITION p2 VALUES [("19930101"), ("19940101")), +PARTITION p3 VALUES [("19940101"), ("19950101")), +PARTITION p4 VALUES [("19950101"), ("19960101")), +PARTITION p5 VALUES [("19960101"), ("19970101")), +PARTITION p6 VALUES [("19970101"), ("19980101")), +PARTITION p7 VALUES [("19980101"), ("19990101"))) DISTRIBUTED BY HASH(`lo_orderkey`) BUCKETS 48 PROPERTIES ( -"replication_num" = "1" +"replication_num" = "1", +"colocate_with" = "groupa1", +"in_memory" = "false", +"storage_format" = "DEFAULT" ); -CREATE TABLE `customer` ( +CREATE TABLE IF NOT EXISTS `customer` ( `c_custkey` int(11) NOT NULL COMMENT "", `c_name` varchar(26) NOT NULL COMMENT "", `c_address` varchar(41) NOT NULL COMMENT "", @@ -56,13 +61,18 @@ CREATE TABLE `customer` ( `c_region` varchar(13) NOT NULL COMMENT "", `c_phone` varchar(16) NOT NULL COMMENT "", `c_mktsegment` varchar(11) NOT NULL COMMENT "" -) -DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 10 +) ENGINE=OLAP +DUPLICATE KEY(`c_custkey`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 12 PROPERTIES ( -"replication_num" = "1" +"replication_num" = "1", +"colocate_with" = "groupa2", +"in_memory" = "false", +"storage_format" = "DEFAULT" ); -CREATE TABLE `date` ( +CREATE TABLE IF NOT EXISTS `dates` ( `d_datekey` int(11) NOT NULL COMMENT "", `d_date` varchar(20) NOT NULL COMMENT "", `d_dayofweek` varchar(10) NOT NULL COMMENT "", @@ -80,13 +90,37 @@ CREATE TABLE `date` ( `d_lastdayinmonthfl` int(11) NOT NULL COMMENT "", `d_holidayfl` int(11) NOT NULL COMMENT "", `d_weekdayfl` int(11) NOT NULL COMMENT "" -) +) ENGINE=OLAP +DUPLICATE KEY(`d_datekey`) +COMMENT "OLAP" DISTRIBUTED BY HASH(`d_datekey`) BUCKETS 1 PROPERTIES ( -"replication_num" = "1" +"replication_num" = "1", +"in_memory" = "false", +"colocate_with" = "groupa3", +"storage_format" = "DEFAULT" ); -CREATE TABLE `part` ( + CREATE TABLE IF NOT EXISTS `supplier` ( + `s_suppkey` int(11) NOT NULL COMMENT "", + `s_name` varchar(26) NOT NULL COMMENT "", + `s_address` varchar(26) NOT NULL COMMENT "", + `s_city` varchar(11) NOT NULL COMMENT "", + `s_nation` varchar(16) NOT NULL COMMENT "", + `s_region` varchar(13) NOT NULL COMMENT "", + `s_phone` varchar(16) NOT NULL COMMENT "" +) ENGINE=OLAP +DUPLICATE KEY(`s_suppkey`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 12 +PROPERTIES ( +"replication_num" = "1", +"colocate_with" = "groupa4", +"in_memory" = "false", +"storage_format" = "DEFAULT" +); + +CREATE TABLE IF NOT EXISTS `part` ( `p_partkey` int(11) NOT NULL COMMENT "", `p_name` varchar(23) NOT NULL COMMENT "", `p_mfgr` varchar(7) NOT NULL COMMENT "", @@ -96,22 +130,13 @@ CREATE TABLE `part` ( `p_type` varchar(26) NOT NULL COMMENT "", `p_size` int(11) NOT NULL COMMENT "", `p_container` varchar(11) NOT NULL COMMENT "" -) -DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 10 -PROPERTIES ( -"replication_num" = "1" -); - -CREATE TABLE `supplier` ( - `s_suppkey` int(11) NOT NULL COMMENT "", - `s_name` varchar(26) NOT NULL COMMENT "", - `s_address` varchar(26) NOT NULL COMMENT "", - `s_city` varchar(11) NOT NULL COMMENT "", - `s_nation` varchar(16) NOT NULL COMMENT "", - `s_region` varchar(13) NOT NULL COMMENT "", - `s_phone` varchar(16) NOT NULL COMMENT "" -) -DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 10 +) ENGINE=OLAP +DUPLICATE KEY(`p_partkey`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 12 PROPERTIES ( -"replication_num" = "1" +"replication_num" = "1", +"colocate_with" = "groupa5", +"in_memory" = "false", +"storage_format" = "DEFAULT" ); diff --git a/tools/ssb-tools/load-dimension-data.sh b/tools/ssb-tools/load-dimension-data.sh deleted file mode 100755 index f24ad3af59..0000000000 --- a/tools/ssb-tools/load-dimension-data.sh +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -############################################################## -# This script is used to load generated ssb data set to Doris -# Only for 4 dimension tables: customer, part, supplier and date. -# Usage: -# sh load-dimension-data.sh -############################################################## - -set -eo pipefail - -ROOT=`dirname "$0"` -ROOT=`cd "$ROOT"; pwd` - -CURDIR=${ROOT} -SSB_DATA_DIR=$CURDIR/ssb-data/ - -# check if ssb-data exists -if [[ ! -d $SSB_DATA_DIR/ ]]; then - echo "$SSB_DATA_DIR does not exist. Run sh gen-ssb-data.sh first." - exit 1 -fi - -check_prerequest() { - local CMD=$1 - local NAME=$2 - if ! $CMD; then - echo "$NAME is missing. This script depends on cURL to load data to Doris." - exit 1 - fi -} - -check_prerequest "curl --version" "curl" - -# load 4 small dimension tables - -source $CURDIR/doris-cluster.conf - -echo "FE_HOST: $FE_HOST" -echo "FE_HTTP_PORT: $FE_HTTP_PORT" -echo "USER: $USER" -echo "PASSWORD: $PASSWORD" -echo "DB: $DB" - -echo 'Loading data for table: part' -curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" -H "columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy" -T $SSB_DATA_DIR/part.tbl http://$FE_HOST:$FE_HTTP_PORT/api/$DB/part/_stream_load -echo 'Loading data for table: date' -curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" -H "columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy" -T $SSB_DATA_DIR/date.tbl http://$FE_HOST:$FE_HTTP_PORT/api/$DB/date/_stream_load -echo 'Loading data for table: supplier' -curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" -H "columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" -T $SSB_DATA_DIR/supplier.tbl http://$FE_HOST:$FE_HTTP_PORT/api/$DB/supplier/_stream_load -echo 'Loading data for table: customer' -curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" -H "columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use" -T $SSB_DATA_DIR/customer.tbl http://$FE_HOST:$FE_HTTP_PORT/api/$DB/customer/_stream_load diff --git a/tools/ssb-tools/load-fact-data.sh b/tools/ssb-tools/load-fact-data.sh deleted file mode 100755 index 24bc7f3bf4..0000000000 --- a/tools/ssb-tools/load-fact-data.sh +++ /dev/null @@ -1,126 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -############################################################## -# This script is used to load generated ssb data set to Doris -# Only for 1 fact table: lineorder -############################################################## - -set -eo pipefail - -ROOT=`dirname "$0"` -ROOT=`cd "$ROOT"; pwd` - -CURDIR=${ROOT} -SSB_DATA_DIR=$CURDIR/ssb-data/ - -usage() { - echo " -Usage: $0 <options> - Optional options: - -c parallelism to load data of lineorder table, default is 5. - - Eg. - $0 load data using default value. - $0 -c 10 load lineorder table data using parallelism 10. - " - exit 1 -} - -OPTS=$(getopt \ - -n $0 \ - -o '' \ - -o 'c:' \ - -- "$@") - -eval set -- "$OPTS" - -PARALLEL=3 -HELP=0 - -if [ $# == 0 ] ; then - usage -fi - -while true; do - case "$1" in - -h) HELP=1 ; shift ;; - -c) PARALLEL=$2 ; shift 2 ;; - --) shift ; break ;; - *) echo "Internal error" ; exit 1 ;; - esac -done - -if [[ ${HELP} -eq 1 ]]; then - usage - exit -fi - -echo "Parallelism: $PARALLEL" - -# check if ssb-data exists -if [[ ! -d $SSB_DATA_DIR/ ]]; then - echo "$SSB_DATA_DIR does not exist. Run sh gen-ssb-data.sh first." - exit 1 -fi - -check_prerequest() { - local CMD=$1 - local NAME=$2 - if ! $CMD; then - echo "$NAME is missing. This script depends on cURL to load data to Doris." - exit 1 - fi -} - -check_prerequest "curl --version" "curl" - -# load lineorder -source $CURDIR/doris-cluster.conf - -echo "FE_HOST: $FE_HOST" -echo "FE_HTTP_PORT: $FE_HTTP_PORT" -echo "USER: $USER" -echo "PASSWORD: $PASSWORD" -echo "DB: $DB" - -function load() -{ - echo $@ - curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" -H "columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy" -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/lineorder/_stream_load -} - - -# set parallelism -[ -e /tmp/fd1 ] || mkfifo /tmp/fd1 -exec 3<>/tmp/fd1 -rm -rf /tmp/fd1 - -for ((i=1;i<=$PARALLEL;i++)) -do - echo >&3 -done - -for file in `ls $SSB_DATA_DIR/lineorder.tbl.*` -do - read -u3 - { - load $file - echo >&3 - }& -done diff --git a/tools/ssb-tools/queries/q1.3.sql b/tools/ssb-tools/ssb-flat-queries/q1.1.sql similarity index 78% copy from tools/ssb-tools/queries/q1.3.sql copy to tools/ssb-tools/ssb-flat-queries/q1.1.sql index ed6e51b1cf..fbc2c6e4a1 100644 --- a/tools/ssb-tools/queries/q1.3.sql +++ b/tools/ssb-tools/ssb-flat-queries/q1.1.sql @@ -14,12 +14,11 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT SUM(lo_extendedprice*lo_discount) AS -REVENUE -FROM lineorder, date -WHERE lo_orderdate = d_datekey -AND d_weeknuminyear= 6 -AND d_year = 1994 -AND lo_discount BETWEEN 5 AND 7 -AND lo_quantity BETWEEN 26 AND 35; +--Q1.1 +SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue +FROM lineorder_flat +WHERE + LO_ORDERDATE >= 19930101 + AND LO_ORDERDATE <= 19931231 + AND LO_DISCOUNT BETWEEN 1 AND 3 + AND LO_QUANTITY < 25; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q1.2.sql b/tools/ssb-tools/ssb-flat-queries/q1.2.sql similarity index 78% copy from tools/ssb-tools/queries/q1.2.sql copy to tools/ssb-tools/ssb-flat-queries/q1.2.sql index 1b8442bd93..3a899c9344 100644 --- a/tools/ssb-tools/queries/q1.2.sql +++ b/tools/ssb-tools/ssb-flat-queries/q1.2.sql @@ -14,11 +14,11 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT SUM(lo_extendedprice*lo_discount) AS -REVENUE -FROM lineorder, date -WHERE lo_orderdate = d_datekey -AND d_yearmonth = 'Jan1994' -AND lo_discount BETWEEN 4 AND 6 -AND lo_quantity BETWEEN 26 AND 35; +--Q1.2 +SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue +FROM lineorder_flat +WHERE + LO_ORDERDATE >= 19940101 + AND LO_ORDERDATE <= 19940131 + AND LO_DISCOUNT BETWEEN 4 AND 6 + AND LO_QUANTITY BETWEEN 26 AND 35; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q1.3.sql b/tools/ssb-tools/ssb-flat-queries/q1.3.sql similarity index 75% copy from tools/ssb-tools/queries/q1.3.sql copy to tools/ssb-tools/ssb-flat-queries/q1.3.sql index ed6e51b1cf..5aaeff83a7 100644 --- a/tools/ssb-tools/queries/q1.3.sql +++ b/tools/ssb-tools/ssb-flat-queries/q1.3.sql @@ -14,12 +14,12 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT SUM(lo_extendedprice*lo_discount) AS -REVENUE -FROM lineorder, date -WHERE lo_orderdate = d_datekey -AND d_weeknuminyear= 6 -AND d_year = 1994 -AND lo_discount BETWEEN 5 AND 7 -AND lo_quantity BETWEEN 26 AND 35; +--Q1.3 +SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue +FROM lineorder_flat +WHERE + weekofyear(LO_ORDERDATE) = 6 + AND LO_ORDERDATE >= 19940101 + AND LO_ORDERDATE <= 19941231 + AND LO_DISCOUNT BETWEEN 5 AND 7 + AND LO_QUANTITY BETWEEN 26 AND 35; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q1.2.sql b/tools/ssb-tools/ssb-flat-queries/q2.1.sql similarity index 79% copy from tools/ssb-tools/queries/q1.2.sql copy to tools/ssb-tools/ssb-flat-queries/q2.1.sql index 1b8442bd93..254ea6481a 100644 --- a/tools/ssb-tools/queries/q1.2.sql +++ b/tools/ssb-tools/ssb-flat-queries/q2.1.sql @@ -14,11 +14,11 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT SUM(lo_extendedprice*lo_discount) AS -REVENUE -FROM lineorder, date -WHERE lo_orderdate = d_datekey -AND d_yearmonth = 'Jan1994' -AND lo_discount BETWEEN 4 AND 6 -AND lo_quantity BETWEEN 26 AND 35; +--Q2.1 +SELECT + SUM(LO_REVENUE), (LO_ORDERDATE DIV 10000) AS YEAR, + P_BRAND +FROM lineorder_flat +WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA' +GROUP BY YEAR, P_BRAND +ORDER BY YEAR, P_BRAND; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q1.1.sql b/tools/ssb-tools/ssb-flat-queries/q2.2.sql similarity index 77% copy from tools/ssb-tools/queries/q1.1.sql copy to tools/ssb-tools/ssb-flat-queries/q2.2.sql index 4ef15e93ea..6a636f3a9e 100644 --- a/tools/ssb-tools/queries/q1.1.sql +++ b/tools/ssb-tools/ssb-flat-queries/q2.2.sql @@ -14,11 +14,14 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT SUM(lo_extendedprice*lo_discount) AS -REVENUE -FROM lineorder, date -WHERE lo_orderdate = d_datekey -AND d_year = 1993 -AND lo_discount BETWEEN 1 AND 3 -AND lo_quantity < 25; +--Q2.2 +SELECT + SUM(LO_REVENUE), (LO_ORDERDATE DIV 10000) AS YEAR, + P_BRAND +FROM lineorder_flat +WHERE + P_BRAND >= 'MFGR#2221' + AND P_BRAND <= 'MFGR#2228' + AND S_REGION = 'ASIA' +GROUP BY YEAR, P_BRAND +ORDER BY YEAR, P_BRAND; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q1.1.sql b/tools/ssb-tools/ssb-flat-queries/q2.3.sql similarity index 79% copy from tools/ssb-tools/queries/q1.1.sql copy to tools/ssb-tools/ssb-flat-queries/q2.3.sql index 4ef15e93ea..a2ef0c6df3 100644 --- a/tools/ssb-tools/queries/q1.1.sql +++ b/tools/ssb-tools/ssb-flat-queries/q2.3.sql @@ -14,11 +14,13 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT SUM(lo_extendedprice*lo_discount) AS -REVENUE -FROM lineorder, date -WHERE lo_orderdate = d_datekey -AND d_year = 1993 -AND lo_discount BETWEEN 1 AND 3 -AND lo_quantity < 25; +--Q2.3 +SELECT + SUM(LO_REVENUE), (LO_ORDERDATE DIV 10000) AS YEAR, + P_BRAND +FROM lineorder_flat +WHERE + P_BRAND = 'MFGR#2239' + AND S_REGION = 'EUROPE' +GROUP BY YEAR, P_BRAND +ORDER BY YEAR, P_BRAND; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q1.1.sql b/tools/ssb-tools/ssb-flat-queries/q3.1.sql similarity index 71% copy from tools/ssb-tools/queries/q1.1.sql copy to tools/ssb-tools/ssb-flat-queries/q3.1.sql index 4ef15e93ea..8df98222c4 100644 --- a/tools/ssb-tools/queries/q1.1.sql +++ b/tools/ssb-tools/ssb-flat-queries/q3.1.sql @@ -14,11 +14,16 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT SUM(lo_extendedprice*lo_discount) AS -REVENUE -FROM lineorder, date -WHERE lo_orderdate = d_datekey -AND d_year = 1993 -AND lo_discount BETWEEN 1 AND 3 -AND lo_quantity < 25; +--Q3.1 +SELECT + C_NATION, + S_NATION, (LO_ORDERDATE DIV 10000) AS YEAR, + SUM(LO_REVENUE) AS revenue +FROM lineorder_flat +WHERE + C_REGION = 'ASIA' + AND S_REGION = 'ASIA' + AND LO_ORDERDATE >= 19920101 + AND LO_ORDERDATE <= 19971231 +GROUP BY C_NATION, S_NATION, YEAR +ORDER BY YEAR ASC, revenue DESC; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q1.1.sql b/tools/ssb-tools/ssb-flat-queries/q3.2.sql similarity index 71% copy from tools/ssb-tools/queries/q1.1.sql copy to tools/ssb-tools/ssb-flat-queries/q3.2.sql index 4ef15e93ea..c588b5bbce 100644 --- a/tools/ssb-tools/queries/q1.1.sql +++ b/tools/ssb-tools/ssb-flat-queries/q3.2.sql @@ -14,11 +14,16 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT SUM(lo_extendedprice*lo_discount) AS -REVENUE -FROM lineorder, date -WHERE lo_orderdate = d_datekey -AND d_year = 1993 -AND lo_discount BETWEEN 1 AND 3 -AND lo_quantity < 25; +--Q3.2 +SELECT + C_CITY, + S_CITY, (LO_ORDERDATE DIV 10000) AS YEAR, + SUM(LO_REVENUE) AS revenue +FROM lineorder_flat +WHERE + C_NATION = 'UNITED STATES' + AND S_NATION = 'UNITED STATES' + AND LO_ORDERDATE >= 19920101 + AND LO_ORDERDATE <= 19971231 +GROUP BY C_CITY, S_CITY, YEAR +ORDER BY YEAR ASC, revenue DESC; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q1.1.sql b/tools/ssb-tools/ssb-flat-queries/q3.3.sql similarity index 69% copy from tools/ssb-tools/queries/q1.1.sql copy to tools/ssb-tools/ssb-flat-queries/q3.3.sql index 4ef15e93ea..9a099d1732 100644 --- a/tools/ssb-tools/queries/q1.1.sql +++ b/tools/ssb-tools/ssb-flat-queries/q3.3.sql @@ -14,11 +14,16 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT SUM(lo_extendedprice*lo_discount) AS -REVENUE -FROM lineorder, date -WHERE lo_orderdate = d_datekey -AND d_year = 1993 -AND lo_discount BETWEEN 1 AND 3 -AND lo_quantity < 25; +--Q3.3 +SELECT + C_CITY, + S_CITY, (LO_ORDERDATE DIV 10000) AS YEAR, + SUM(LO_REVENUE) AS revenue +FROM lineorder_flat +WHERE + C_CITY IN ('UNITED KI1', 'UNITED KI5') + AND S_CITY IN ('UNITED KI1', 'UNITED KI5') + AND LO_ORDERDATE >= 19920101 + AND LO_ORDERDATE <= 19971231 +GROUP BY C_CITY, S_CITY, YEAR +ORDER BY YEAR ASC, revenue DESC; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q1.1.sql b/tools/ssb-tools/ssb-flat-queries/q3.4.sql similarity index 69% copy from tools/ssb-tools/queries/q1.1.sql copy to tools/ssb-tools/ssb-flat-queries/q3.4.sql index 4ef15e93ea..6bd71b5891 100644 --- a/tools/ssb-tools/queries/q1.1.sql +++ b/tools/ssb-tools/ssb-flat-queries/q3.4.sql @@ -14,11 +14,16 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT SUM(lo_extendedprice*lo_discount) AS -REVENUE -FROM lineorder, date -WHERE lo_orderdate = d_datekey -AND d_year = 1993 -AND lo_discount BETWEEN 1 AND 3 -AND lo_quantity < 25; +--Q3.4 +SELECT + C_CITY, + S_CITY, (LO_ORDERDATE DIV 10000) AS YEAR, + SUM(LO_REVENUE) AS revenue +FROM lineorder_flat +WHERE + C_CITY IN ('UNITED KI1', 'UNITED KI5') + AND S_CITY IN ('UNITED KI1', 'UNITED KI5') + AND LO_ORDERDATE >= 19971201 + AND LO_ORDERDATE <= 19971231 +GROUP BY C_CITY, S_CITY, YEAR +ORDER BY YEAR ASC, revenue DESC; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q1.1.sql b/tools/ssb-tools/ssb-flat-queries/q4.1.sql similarity index 73% copy from tools/ssb-tools/queries/q1.1.sql copy to tools/ssb-tools/ssb-flat-queries/q4.1.sql index 4ef15e93ea..aedd0e047e 100644 --- a/tools/ssb-tools/queries/q1.1.sql +++ b/tools/ssb-tools/ssb-flat-queries/q4.1.sql @@ -14,11 +14,14 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT SUM(lo_extendedprice*lo_discount) AS -REVENUE -FROM lineorder, date -WHERE lo_orderdate = d_datekey -AND d_year = 1993 -AND lo_discount BETWEEN 1 AND 3 -AND lo_quantity < 25; +--Q4.1 +SELECT (LO_ORDERDATE DIV 10000) AS YEAR, + C_NATION, + SUM(LO_REVENUE - LO_SUPPLYCOST) AS profit +FROM lineorder_flat +WHERE + C_REGION = 'AMERICA' + AND S_REGION = 'AMERICA' + AND P_MFGR IN ('MFGR#1', 'MFGR#2') +GROUP BY YEAR, C_NATION +ORDER BY YEAR ASC, C_NATION ASC; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q1.1.sql b/tools/ssb-tools/ssb-flat-queries/q4.2.sql similarity index 66% copy from tools/ssb-tools/queries/q1.1.sql copy to tools/ssb-tools/ssb-flat-queries/q4.2.sql index 4ef15e93ea..b9891ee408 100644 --- a/tools/ssb-tools/queries/q1.1.sql +++ b/tools/ssb-tools/ssb-flat-queries/q4.2.sql @@ -14,11 +14,20 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT SUM(lo_extendedprice*lo_discount) AS -REVENUE -FROM lineorder, date -WHERE lo_orderdate = d_datekey -AND d_year = 1993 -AND lo_discount BETWEEN 1 AND 3 -AND lo_quantity < 25; +--Q4.2 +SELECT (LO_ORDERDATE DIV 10000) AS YEAR, + S_NATION, + P_CATEGORY, + SUM(LO_REVENUE - LO_SUPPLYCOST) AS profit +FROM lineorder_flat +WHERE + C_REGION = 'AMERICA' + AND S_REGION = 'AMERICA' + AND LO_ORDERDATE >= 19970101 + AND LO_ORDERDATE <= 19981231 + AND P_MFGR IN ('MFGR#1', 'MFGR#2') +GROUP BY YEAR, S_NATION, P_CATEGORY +ORDER BY + YEAR ASC, + S_NATION ASC, + P_CATEGORY ASC; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q2.1.sql b/tools/ssb-tools/ssb-flat-queries/q4.3.sql similarity index 69% copy from tools/ssb-tools/queries/q2.1.sql copy to tools/ssb-tools/ssb-flat-queries/q4.3.sql index e1a1f52d18..6871023137 100644 --- a/tools/ssb-tools/queries/q2.1.sql +++ b/tools/ssb-tools/ssb-flat-queries/q4.3.sql @@ -14,13 +14,16 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT SUM(lo_revenue), d_year, p_brand -FROM lineorder, date, part, supplier -WHERE lo_orderdate = d_datekey -AND lo_partkey = p_partkey -AND lo_suppkey = s_suppkey -AND p_category = 'MFGR#12' -AND s_region = 'AMERICA' -GROUP BY d_year, p_brand -ORDER BY d_year, p_brand; +--Q4.3 +SELECT (LO_ORDERDATE DIV 10000) AS YEAR, + S_CITY, + P_BRAND, + SUM(LO_REVENUE - LO_SUPPLYCOST) AS profit +FROM lineorder_flat +WHERE + S_NATION = 'UNITED STATES' + AND LO_ORDERDATE >= 19970101 + AND LO_ORDERDATE <= 19981231 + AND P_CATEGORY = 'MFGR#14' +GROUP BY YEAR, S_CITY, P_BRAND +ORDER BY YEAR ASC, S_CITY ASC, P_BRAND ASC; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q1.1.sql b/tools/ssb-tools/ssb-queries/q1.1.sql similarity index 80% rename from tools/ssb-tools/queries/q1.1.sql rename to tools/ssb-tools/ssb-queries/q1.1.sql index 4ef15e93ea..d8a2840ca7 100644 --- a/tools/ssb-tools/queries/q1.1.sql +++ b/tools/ssb-tools/ssb-queries/q1.1.sql @@ -14,11 +14,10 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT SUM(lo_extendedprice*lo_discount) AS -REVENUE -FROM lineorder, date -WHERE lo_orderdate = d_datekey -AND d_year = 1993 -AND lo_discount BETWEEN 1 AND 3 -AND lo_quantity < 25; +SELECT SUM(lo_extendedprice * lo_discount) AS REVENUE +FROM lineorder, dates +WHERE + lo_orderdate = d_datekey + AND d_year = 1993 + AND lo_discount BETWEEN 1 AND 3 + AND lo_quantity < 25; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q1.2.sql b/tools/ssb-tools/ssb-queries/q1.2.sql similarity index 78% rename from tools/ssb-tools/queries/q1.2.sql rename to tools/ssb-tools/ssb-queries/q1.2.sql index 1b8442bd93..db6eb0c613 100644 --- a/tools/ssb-tools/queries/q1.2.sql +++ b/tools/ssb-tools/ssb-queries/q1.2.sql @@ -14,11 +14,10 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT SUM(lo_extendedprice*lo_discount) AS -REVENUE -FROM lineorder, date -WHERE lo_orderdate = d_datekey -AND d_yearmonth = 'Jan1994' -AND lo_discount BETWEEN 4 AND 6 -AND lo_quantity BETWEEN 26 AND 35; +SELECT SUM(lo_extendedprice * lo_discount) AS REVENUE +FROM lineorder, dates +WHERE + lo_orderdate = d_datekey + AND d_yearmonth = 'Jan1994' + AND lo_discount BETWEEN 4 AND 6 + AND lo_quantity BETWEEN 26 AND 35; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q1.3.sql b/tools/ssb-tools/ssb-queries/q1.3.sql similarity index 77% rename from tools/ssb-tools/queries/q1.3.sql rename to tools/ssb-tools/ssb-queries/q1.3.sql index ed6e51b1cf..dbb91b0c46 100644 --- a/tools/ssb-tools/queries/q1.3.sql +++ b/tools/ssb-tools/ssb-queries/q1.3.sql @@ -14,12 +14,12 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT SUM(lo_extendedprice*lo_discount) AS -REVENUE -FROM lineorder, date -WHERE lo_orderdate = d_datekey -AND d_weeknuminyear= 6 -AND d_year = 1994 -AND lo_discount BETWEEN 5 AND 7 -AND lo_quantity BETWEEN 26 AND 35; +SELECT + SUM(lo_extendedprice * lo_discount) AS REVENUE +FROM lineorder, dates +WHERE + lo_orderdate = d_datekey + AND d_weeknuminyear = 6 + AND d_year = 1994 + AND lo_discount BETWEEN 5 AND 7 + AND lo_quantity BETWEEN 26 AND 35; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q2.1.sql b/tools/ssb-tools/ssb-queries/q2.1.sql similarity index 80% rename from tools/ssb-tools/queries/q2.1.sql rename to tools/ssb-tools/ssb-queries/q2.1.sql index e1a1f52d18..70a8de9d42 100644 --- a/tools/ssb-tools/queries/q2.1.sql +++ b/tools/ssb-tools/ssb-queries/q2.1.sql @@ -14,13 +14,13 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - SELECT SUM(lo_revenue), d_year, p_brand -FROM lineorder, date, part, supplier -WHERE lo_orderdate = d_datekey -AND lo_partkey = p_partkey -AND lo_suppkey = s_suppkey -AND p_category = 'MFGR#12' -AND s_region = 'AMERICA' +FROM lineorder, dates, part, supplier +WHERE + lo_orderdate = d_datekey + AND lo_partkey = p_partkey + AND lo_suppkey = s_suppkey + AND p_category = 'MFGR#12' + AND s_region = 'AMERICA' GROUP BY d_year, p_brand -ORDER BY d_year, p_brand; +ORDER BY p_brand; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q2.2.sql b/tools/ssb-tools/ssb-queries/q2.2.sql similarity index 78% rename from tools/ssb-tools/queries/q2.2.sql rename to tools/ssb-tools/ssb-queries/q2.2.sql index 3db6170119..e283dbdb05 100644 --- a/tools/ssb-tools/queries/q2.2.sql +++ b/tools/ssb-tools/ssb-queries/q2.2.sql @@ -14,14 +14,13 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - SELECT SUM(lo_revenue), d_year, p_brand -FROM lineorder, date, part, supplier -WHERE lo_orderdate = d_datekey -AND lo_partkey = p_partkey -AND lo_suppkey = s_suppkey -AND p_brand BETWEEN 'MFGR#2221' -AND 'MFGR#2228' -AND s_region = 'ASIA' +FROM lineorder, dates, part, supplier +WHERE + lo_orderdate = d_datekey + AND lo_partkey = p_partkey + AND lo_suppkey = s_suppkey + AND p_brand BETWEEN 'MFGR#2221' AND 'MFGR#2228' + AND s_region = 'ASIA' GROUP BY d_year, p_brand -ORDER BY d_year, p_brand; +ORDER BY d_year, p_brand; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q2.3.sql b/tools/ssb-tools/ssb-queries/q2.3.sql similarity index 79% rename from tools/ssb-tools/queries/q2.3.sql rename to tools/ssb-tools/ssb-queries/q2.3.sql index b70ca90666..22d2419621 100644 --- a/tools/ssb-tools/queries/q2.3.sql +++ b/tools/ssb-tools/ssb-queries/q2.3.sql @@ -14,13 +14,13 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - SELECT SUM(lo_revenue), d_year, p_brand -FROM lineorder, date, part, supplier -WHERE lo_orderdate = d_datekey -AND lo_partkey = p_partkey -AND lo_suppkey = s_suppkey -AND p_brand = 'MFGR#2239' -AND s_region = 'EUROPE' +FROM lineorder, dates, part, supplier +WHERE + lo_orderdate = d_datekey + AND lo_partkey = p_partkey + AND lo_suppkey = s_suppkey + AND p_brand = 'MFGR#2239' + AND s_region = 'EUROPE' GROUP BY d_year, p_brand -ORDER BY d_year, p_brand; +ORDER BY d_year, p_brand; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q3.1.sql b/tools/ssb-tools/ssb-queries/q3.1.sql similarity index 70% rename from tools/ssb-tools/queries/q3.1.sql rename to tools/ssb-tools/ssb-queries/q3.1.sql index 70f17d789b..d674337995 100644 --- a/tools/ssb-tools/queries/q3.1.sql +++ b/tools/ssb-tools/ssb-queries/q3.1.sql @@ -14,15 +14,19 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT c_nation, s_nation, d_year, -SUM(lo_revenue) AS REVENUE -FROM customer, lineorder, supplier, date -WHERE lo_custkey = c_custkey -AND lo_suppkey = s_suppkey -AND lo_orderdate = d_datekey -AND c_region = 'ASIA' -AND s_region = 'ASIA' -AND d_year >= 1992 AND d_year <= 1997 +SELECT + c_nation, + s_nation, + d_year, + SUM(lo_revenue) AS REVENUE +FROM customer, lineorder, supplier, dates +WHERE + lo_custkey = c_custkey + AND lo_suppkey = s_suppkey + AND lo_orderdate = d_datekey + AND c_region = 'ASIA' + AND s_region = 'ASIA' + AND d_year >= 1992 + AND d_year <= 1997 GROUP BY c_nation, s_nation, d_year -ORDER BY d_year ASC, REVENUE DESC; +ORDER BY d_year ASC, REVENUE DESC; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q3.2.sql b/tools/ssb-tools/ssb-queries/q3.2.sql similarity index 69% rename from tools/ssb-tools/queries/q3.2.sql rename to tools/ssb-tools/ssb-queries/q3.2.sql index a416fbea8b..2969efb1a2 100644 --- a/tools/ssb-tools/queries/q3.2.sql +++ b/tools/ssb-tools/ssb-queries/q3.2.sql @@ -14,15 +14,19 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT c_city, s_city, d_year, sum(lo_revenue) -AS REVENUE -FROM customer, lineorder, supplier, date -WHERE lo_custkey = c_custkey -AND lo_suppkey = s_suppkey -AND lo_orderdate = d_datekey -AND c_nation = 'UNITED STATES' -AND s_nation = 'UNITED STATES' -AND d_year >= 1992 AND d_year <= 1997 +SELECT + c_city, + s_city, + d_year, + SUM(lo_revenue) AS REVENUE +FROM customer, lineorder, supplier, dates +WHERE + lo_custkey = c_custkey + AND lo_suppkey = s_suppkey + AND lo_orderdate = d_datekey + AND c_nation = 'UNITED STATES' + AND s_nation = 'UNITED STATES' + AND d_year >= 1992 + AND d_year <= 1997 GROUP BY c_city, s_city, d_year -ORDER BY d_year ASC, REVENUE DESC; +ORDER BY d_year ASC, REVENUE DESC; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q3.3.sql b/tools/ssb-tools/ssb-queries/q3.3.sql similarity index 64% rename from tools/ssb-tools/queries/q3.3.sql rename to tools/ssb-tools/ssb-queries/q3.3.sql index 98e29b72e7..ac1cb324d0 100644 --- a/tools/ssb-tools/queries/q3.3.sql +++ b/tools/ssb-tools/ssb-queries/q3.3.sql @@ -14,17 +14,25 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT c_city, s_city, d_year, SUM(lo_revenue) -AS REVENUE -FROM customer, lineorder, supplier, date -WHERE lo_custkey = c_custkey -AND lo_suppkey = s_suppkey -AND lo_orderdate = d_datekey -AND (c_city='UNITED KI1' -OR c_city='UNITED KI5') -AND (s_city='UNITED KI1' -OR s_city='UNITED KI5') -AND d_year >= 1992 AND d_year <= 1997 +SELECT + c_city, + s_city, + d_year, + SUM(lo_revenue) AS REVENUE +FROM customer, lineorder, supplier, dates +WHERE + lo_custkey = c_custkey + AND lo_suppkey = s_suppkey + AND lo_orderdate = d_datekey + AND ( + c_city = 'UNITED KI1' + OR c_city = 'UNITED KI5' + ) + AND ( + s_city = 'UNITED KI1' + OR s_city = 'UNITED KI5' + ) + AND d_year >= 1992 + AND d_year <= 1997 GROUP BY c_city, s_city, d_year -ORDER BY d_year ASC, REVENUE DESC; +ORDER BY d_year ASC, REVENUE DESC; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q3.4.sql b/tools/ssb-tools/ssb-queries/q3.4.sql similarity index 65% rename from tools/ssb-tools/queries/q3.4.sql rename to tools/ssb-tools/ssb-queries/q3.4.sql index 65fe992ca4..2be6a5cd70 100644 --- a/tools/ssb-tools/queries/q3.4.sql +++ b/tools/ssb-tools/ssb-queries/q3.4.sql @@ -14,17 +14,24 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT c_city, s_city, d_year, SUM(lo_revenue) -AS REVENUE -FROM customer, lineorder, supplier, date -WHERE lo_custkey = c_custkey -AND lo_suppkey = s_suppkey -AND lo_orderdate = d_datekey -AND (c_city='UNITED KI1' -OR c_city='UNITED KI5') -AND (s_city='UNITED KI1' -OR s_city='UNITED KI5') -AND d_yearmonth = 'Dec1997' +SELECT + c_city, + s_city, + d_year, + SUM(lo_revenue) AS REVENUE +FROM customer, lineorder, supplier, dates +WHERE + lo_custkey = c_custkey + AND lo_suppkey = s_suppkey + AND lo_orderdate = d_datekey + AND ( + c_city = 'UNITED KI1' + OR c_city = 'UNITED KI5' + ) + AND ( + s_city = 'UNITED KI1' + OR s_city = 'UNITED KI5' + ) + AND d_yearmonth = 'Dec1997' GROUP BY c_city, s_city, d_year -ORDER BY d_year ASC, REVENUE DESC; +ORDER BY d_year ASC, REVENUE DESC; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q4.1.sql b/tools/ssb-tools/ssb-queries/q4.1.sql similarity index 66% rename from tools/ssb-tools/queries/q4.1.sql rename to tools/ssb-tools/ssb-queries/q4.1.sql index bdcd730bf9..f0cfcdd403 100644 --- a/tools/ssb-tools/queries/q4.1.sql +++ b/tools/ssb-tools/ssb-queries/q4.1.sql @@ -14,17 +14,21 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT d_year, c_nation, -SUM(lo_revenue - lo_supplycost) AS PROFIT -FROM date, customer, supplier, part, lineorder -WHERE lo_custkey = c_custkey -AND lo_suppkey = s_suppkey -AND lo_partkey = p_partkey -AND lo_orderdate = d_datekey -AND c_region = 'AMERICA' -AND s_region = 'AMERICA' -AND (p_mfgr = 'MFGR#1' -OR p_mfgr = 'MFGR#2') +SELECT + d_year, + c_nation, + SUM(lo_revenue - lo_supplycost) AS PROFIT +FROM dates, customer, supplier, part, lineorder +WHERE + lo_custkey = c_custkey + AND lo_suppkey = s_suppkey + AND lo_partkey = p_partkey + AND lo_orderdate = d_datekey + AND c_region = 'AMERICA' + AND s_region = 'AMERICA' + AND ( + p_mfgr = 'MFGR#1' + OR p_mfgr = 'MFGR#2' + ) GROUP BY d_year, c_nation -ORDER BY d_year, c_nation; +ORDER BY d_year, c_nation; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q4.2.sql b/tools/ssb-tools/ssb-queries/q4.2.sql similarity index 62% rename from tools/ssb-tools/queries/q4.2.sql rename to tools/ssb-tools/ssb-queries/q4.2.sql index 24c82cf682..fbbaef00e8 100644 --- a/tools/ssb-tools/queries/q4.2.sql +++ b/tools/ssb-tools/ssb-queries/q4.2.sql @@ -14,18 +14,26 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT d_year, s_nation, p_category, -SUM(lo_revenue - lo_supplycost) AS PROFIT -FROM date, customer, supplier, part, lineorder -WHERE lo_custkey = c_custkey -AND lo_suppkey = s_suppkey -AND lo_partkey = p_partkey -AND lo_orderdate = d_datekey -AND c_region = 'AMERICA' -AND s_region = 'AMERICA' -AND (d_year = 1997 OR d_year = 1998) -AND (p_mfgr = 'MFGR#1' -OR p_mfgr = 'MFGR#2') +SELECT + d_year, + s_nation, + p_category, + SUM(lo_revenue - lo_supplycost) AS PROFIT +FROM dates, customer, supplier, part, lineorder +WHERE + lo_custkey = c_custkey + AND lo_suppkey = s_suppkey + AND lo_partkey = p_partkey + AND lo_orderdate = d_datekey + AND c_region = 'AMERICA' + AND s_region = 'AMERICA' + AND ( + d_year = 1997 + OR d_year = 1998 + ) + AND ( + p_mfgr = 'MFGR#1' + OR p_mfgr = 'MFGR#2' + ) GROUP BY d_year, s_nation, p_category -ORDER BY d_year, s_nation, p_category; +ORDER BY d_year, s_nation, p_category; \ No newline at end of file diff --git a/tools/ssb-tools/queries/q4.3.sql b/tools/ssb-tools/ssb-queries/q4.3.sql similarity index 66% rename from tools/ssb-tools/queries/q4.3.sql rename to tools/ssb-tools/ssb-queries/q4.3.sql index 0dcc08bd26..64582cc6ac 100644 --- a/tools/ssb-tools/queries/q4.3.sql +++ b/tools/ssb-tools/ssb-queries/q4.3.sql @@ -14,16 +14,22 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. - -SELECT d_year, s_city, p_brand, -SUM(lo_revenue - lo_supplycost) AS PROFIT -FROM date, customer, supplier, part, lineorder -WHERE lo_custkey = c_custkey -AND lo_suppkey = s_suppkey -AND lo_partkey = p_partkey -AND lo_orderdate = d_datekey -AND s_nation = 'UNITED STATES' -AND (d_year = 1997 OR d_year = 1998) -AND p_category = 'MFGR#14' +SELECT + d_year, + s_city, + p_brand, + SUM(lo_revenue - lo_supplycost) AS PROFIT +FROM dates, customer, supplier, part, lineorder +WHERE + lo_custkey = c_custkey + AND lo_suppkey = s_suppkey + AND lo_partkey = p_partkey + AND lo_orderdate = d_datekey + AND s_nation = 'UNITED STATES' + AND ( + d_year = 1997 + OR d_year = 1998 + ) + AND p_category = 'MFGR#14' GROUP BY d_year, s_city, p_brand -ORDER BY d_year, s_city, p_brand; +ORDER BY d_year, s_city, p_brand; \ No newline at end of file diff --git a/tools/tpch-tools/README.md b/tools/tpch-tools/README.md index f06e96289b..bd9e483eec 100644 --- a/tools/tpch-tools/README.md +++ b/tools/tpch-tools/README.md @@ -23,12 +23,25 @@ These scripts are used to make tpc-h test. follow the steps below: ### 1. build tpc-h dbgen tool. - ./build-tpch-dbgen.sh + + ./bin/build-tpch-dbgen.sh + ### 2. generate tpc-h data. use -h for more infomations. - ./gen-tpch-data.sh -s 1 -### 3. create tpc-h tables. modify `doris-cluster.conf` to specify doris info, then run script below. - ./create-tpch-tables.sh + + ./bin/gen-tpch-data.sh -s 1 + +### 3. create tpc-h tables. modify `conf/doris-cluster.conf` to specify doris info, then run script below. + + ./bin/create-tpch-tables.sh + ### 4. load tpc-h data. use -h for help. - ./load-tpch-data.sh + + ./bin/load-tpch-data.sh + ### 5. run tpc-h queries. - ./run-tpch-queries.sh + + ./bin/run-tpch-queries.sh + + NOTICE: At present, Doris's query optimizer and statistical information functions are not complete, so we rewrite some queries in TPC-H to adapt to Doris' execution framework, but it does not affect the correctness of the results. The rewritten SQL is marked with "Modified" in the corresponding .sql file. + + A new query optimizer will be released in subsequent releases. diff --git a/tools/tpch-tools/build-tpch-dbgen.sh b/tools/tpch-tools/bin/build-tpch-dbgen.sh similarity index 92% rename from tools/tpch-tools/build-tpch-dbgen.sh rename to tools/tpch-tools/bin/build-tpch-dbgen.sh index 9c2c63702c..b0cce4b1da 100755 --- a/tools/tpch-tools/build-tpch-dbgen.sh +++ b/tools/tpch-tools/bin/build-tpch-dbgen.sh @@ -51,13 +51,12 @@ if [[ -d $TPCH_DBGEN_DIR ]]; then echo "If you want to download TPC-H_Tools_v3.0.0 again, please delete this dir first." else wget "https://tools-chengdu.oss-cn-chengdu.aliyuncs.com/TPC-H_Tools_v3.0.0.zip" - - unzip TPC-H_Tools_v3.0.0.zip -d $CURDIR/ + unzip TPC-H_Tools_v3.0.0.zip -d "$CURDIR"/ fi # modify tpcd.h -cd $TPCH_DBGEN_DIR/ -echo ' +cd "$TPCH_DBGEN_DIR"/ +printf '%s' ' #ifdef MYSQL #define GEN_QUERY_PLAN "" #define START_TRAN "START TRANSACTION" @@ -66,7 +65,7 @@ echo ' #define SET_ROWCOUNT "limit %d;\n" #define SET_DBASE "use %s;\n" #endif -' >> tpcd.h +' >>tpcd.h # modify makefile cp makefile.suite makefile @@ -76,12 +75,16 @@ sed -i 's/^MACHINE =/MACHINE = LINUX/g' makefile sed -i 's/^WORKLOAD =/WORKLOAD = TPCH/g' makefile # compile tpch-dbgen -make > /dev/null +make >/dev/null cd - # check if [[ -f $TPCH_DBGEN_DIR/dbgen ]]; then - echo "Build succeed! Run $TPCH_DBGEN_DIR/dbgen -h" + echo " +################ +Build succeed! +################ +Run $TPCH_DBGEN_DIR/dbgen -h" exit 0 else echo "Build failed!" diff --git a/tools/tpch-tools/create-tpch-tables.sh b/tools/tpch-tools/bin/create-tpch-tables.sh similarity index 84% rename from tools/tpch-tools/create-tpch-tables.sh rename to tools/tpch-tools/bin/create-tpch-tables.sh index 2263f47e1b..01ca7a1345 100755 --- a/tools/tpch-tools/create-tpch-tables.sh +++ b/tools/tpch-tools/bin/create-tpch-tables.sh @@ -40,7 +40,7 @@ Usage: $0 } OPTS=$(getopt \ - -n $0 \ + -n "$0" \ -o '' \ -- "$@") @@ -84,7 +84,9 @@ check_prerequest() { check_prerequest "mysql --version" "mysql" -source $CURDIR/doris-cluster.conf +# shellcheck source=/dev/null +source "$CURDIR/../conf/doris-cluster.conf" +export MYSQL_PWD=$PASSWORD echo "FE_HOST: $FE_HOST" echo "FE_QUERY_PORT: $FE_QUERY_PORT" @@ -92,7 +94,7 @@ echo "USER: $USER" echo "PASSWORD: $PASSWORD" echo "DB: $DB" -mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -e "CREATE DATABASE IF NOT EXISTS $DB" +mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -e "CREATE DATABASE IF NOT EXISTS $DB" -echo $CURDIR/create-tpch-tables.sql -mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB <$CURDIR/create-tpch-tables.sql +echo "Run SQLs from $CURDIR/create-tpch-tables.sql" +mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" <"$CURDIR"/../ddl/create-tpch-tables.sql diff --git a/tools/tpch-tools/gen-tpch-data.sh b/tools/tpch-tools/bin/gen-tpch-data.sh similarity index 80% rename from tools/tpch-tools/gen-tpch-data.sh rename to tools/tpch-tools/bin/gen-tpch-data.sh index 0e7359d601..4202e3c58d 100755 --- a/tools/tpch-tools/gen-tpch-data.sh +++ b/tools/tpch-tools/bin/gen-tpch-data.sh @@ -48,7 +48,7 @@ Usage: $0 <options> } OPTS=$(getopt \ - -n $0 \ + -n "$0" \ -o '' \ -o 'hs:c:' \ -- "$@") @@ -107,40 +107,40 @@ if [[ -d $TPCH_DATA_DIR/ ]]; then exit 1 fi -mkdir $TPCH_DATA_DIR/ +mkdir "$TPCH_DATA_DIR"/ # gen data -cd $TPCH_DBGEN_DIR +cd "$TPCH_DBGEN_DIR" echo "Begin to generate data for table: region" -$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T r +"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T r echo "Begin to generate data for table: nation" -$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T n +"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T n echo "Begin to generate data for table: supplier" -$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T s +"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T s echo "Begin to generate data for table: part" -$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T P +"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T P echo "Begin to generate data for table: customer" -$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T c +"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T c echo "Begin to generate data for table: partsupp" -for i in $(seq 1 $PARALLEL); do +for i in $(seq 1 "$PARALLEL"); do { - $TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T S -C $PARALLEL -S ${i} + "$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T S -C "$PARALLEL" -S "$i" } & done wait echo "Begin to generate data for table: orders" -for i in $(seq 1 $PARALLEL); do +for i in $(seq 1 "$PARALLEL"); do { - $TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T O -C $PARALLEL -S ${i} + "$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T O -C "$PARALLEL" -S "$i" } & done wait echo "Begin to generate data for table: lineitem" -for i in $(seq 1 $PARALLEL); do +for i in $(seq 1 "$PARALLEL"); do { - $TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T L -C $PARALLEL -S ${i} + "$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T L -C "$PARALLEL" -S "$i" } & done wait @@ -148,7 +148,7 @@ wait cd - # move data to $TPCH_DATA_DIR -mv $TPCH_DBGEN_DIR/*.tbl* $TPCH_DATA_DIR/ +mv "$TPCH_DBGEN_DIR"/*.tbl* "$TPCH_DATA_DIR"/ # check data -du -sh $TPCH_DATA_DIR/*.tbl* +du -sh "$TPCH_DATA_DIR"/*.tbl* diff --git a/tools/tpch-tools/load-tpch-data.sh b/tools/tpch-tools/bin/load-tpch-data.sh similarity index 69% rename from tools/tpch-tools/load-tpch-data.sh rename to tools/tpch-tools/bin/load-tpch-data.sh index f04e21bb9c..7a250ef2f4 100755 --- a/tools/tpch-tools/load-tpch-data.sh +++ b/tools/tpch-tools/bin/load-tpch-data.sh @@ -46,9 +46,9 @@ Usage: $0 <options> } OPTS=$(getopt \ - -n $0 \ + -n "$0" \ -o '' \ - -o 'c:' \ + -o 'hc:' \ -- "$@") eval set -- "$OPTS" @@ -89,7 +89,7 @@ fi echo "Parallelism: $PARALLEL" # check if tpch-data exists -if [[ ! -d $TPCH_DATA_DIR/ ]]; then +if [[ ! -d "$TPCH_DATA_DIR"/ ]]; then echo "$TPCH_DATA_DIR does not exist. Run sh gen-tpch-data.sh first." exit 1 fi @@ -106,7 +106,9 @@ check_prerequest() { check_prerequest "curl --version" "curl" # load tables -source $CURDIR/doris-cluster.conf +# shellcheck source=/dev/null +source "$CURDIR/../conf/doris-cluster.conf" +export MYSQL_PWD=$PASSWORD echo "FE_HOST: $FE_HOST" echo "FE_HTTP_PORT: $FE_HTTP_PORT" @@ -115,61 +117,62 @@ echo "PASSWORD: $PASSWORD" echo "DB: $DB" function load_region() { - echo $@ - curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + echo "$*" + curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \ -H "columns: r_regionkey, r_name, r_comment, temp" \ - -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/region/_stream_load + -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/region/_stream_load } function load_nation() { - echo $@ - curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + echo "$*" + curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \ -H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \ - -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/nation/_stream_load + -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/nation/_stream_load } function load_supplier() { - echo $@ - curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + echo "$*" + curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \ -H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, temp" \ - -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/supplier/_stream_load + -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/supplier/_stream_load } function load_customer() { - echo $@ - curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + echo "$*" + curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \ -H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment, temp" \ - -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/customer/_stream_load + -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/customer/_stream_load } function load_part() { - echo $@ - curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + echo "$*" + curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \ -H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, temp" \ - -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/part/_stream_load + -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/part/_stream_load } function load_partsupp() { - echo $@ - curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + echo "$*" + curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \ -H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, temp" \ - -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/partsupp/_stream_load + -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/partsupp/_stream_load } function load_orders() { - echo $@ - curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + echo "$*" + curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \ -H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \ - -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/orders/_stream_load + -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/orders/_stream_load } function load_lineitem() { - echo $@ - curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + echo "$*" + curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \ -H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus, l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp" \ - -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/lineitem/_stream_load + -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/lineitem/_stream_load } # start load -load_region $TPCH_DATA_DIR/region.tbl -load_nation $TPCH_DATA_DIR/nation.tbl -load_supplier $TPCH_DATA_DIR/supplier.tbl -load_customer $TPCH_DATA_DIR/customer.tbl -load_part $TPCH_DATA_DIR/part.tbl - +date +load_region "$TPCH_DATA_DIR"/region.tbl +load_nation "$TPCH_DATA_DIR"/nation.tbl +load_supplier "$TPCH_DATA_DIR"/supplier.tbl +load_customer "$TPCH_DATA_DIR"/customer.tbl +load_part "$TPCH_DATA_DIR"/part.tbl +date # set parallelism # 以PID为名, 防止创建命名管道时与已有文件重名,从而失败 @@ -182,20 +185,21 @@ exec 3<>${fifo} rm -rf ${fifo} # 在fd3中放置$PARALLEL个空行作为令牌 -for ((i = 1; i <= $PARALLEL; i++)); do +for ((i = 1; i <= PARALLEL; i++)); do echo >&3 done -for file in $(ls $TPCH_DATA_DIR/lineitem.tbl*); do +date +for file in "$TPCH_DATA_DIR"/lineitem.tbl*; do # 领取令牌, 即从fd3中读取行, 每次一行 # 对管道,读一行便少一行,每次只能读取一行 # 所有行读取完毕, 执行挂起, 直到管道再次有可读行 # 因此实现了进程数量控制 - read -u3 + read -r -u3 # 要批量执行的命令放在大括号内, 后台运行 { - load_lineitem $file + load_lineitem "$file" echo "----loaded $file" sleep 2 # 归还令牌, 即进程结束后,再写入一行,使挂起的循环继续执行 @@ -203,20 +207,22 @@ for file in $(ls $TPCH_DATA_DIR/lineitem.tbl*); do } & done -for file in $(ls $TPCH_DATA_DIR/orders.tbl*); do - read -u3 +date +for file in "$TPCH_DATA_DIR"/orders.tbl*; do + read -r -u3 { - load_orders $file + load_orders "$file" echo "----loaded $file" sleep 2 echo >&3 } & done -for file in $(ls $TPCH_DATA_DIR/partsupp.tbl*); do - read -u3 +date +for file in "$TPCH_DATA_DIR"/partsupp.tbl*; do + read -r -u3 { - load_partsupp $file + load_partsupp "$file" echo "----loaded $file" sleep 2 echo >&3 @@ -227,3 +233,6 @@ done wait # 删除文件标识符 exec 3>&- +date + +echo "DONE." \ No newline at end of file diff --git a/tools/tpch-tools/run-tpch-queries.sh b/tools/tpch-tools/bin/run-tpch-queries.sh similarity index 73% rename from tools/tpch-tools/run-tpch-queries.sh rename to tools/tpch-tools/bin/run-tpch-queries.sh index 5a0d06d512..14d00e17bb 100755 --- a/tools/tpch-tools/run-tpch-queries.sh +++ b/tools/tpch-tools/bin/run-tpch-queries.sh @@ -29,7 +29,7 @@ ROOT=$( ) CURDIR=${ROOT} -QUERIES_DIR=$CURDIR/queries +QUERIES_DIR=$CURDIR/../queries usage() { echo " @@ -41,7 +41,7 @@ Usage: $0 } OPTS=$(getopt \ - -n $0 \ + -n "$0" \ -o '' \ -- "$@") @@ -85,33 +85,41 @@ check_prerequest() { check_prerequest "mysql --version" "mysql" -source $CURDIR/doris-cluster.conf +# shellcheck source=/dev/null +source "$CURDIR/../conf/doris-cluster.conf" +export MYSQL_PWD=$PASSWORD echo "FE_HOST: $FE_HOST" echo "FE_QUERY_PORT: $FE_QUERY_PORT" echo "USER: $USER" echo "PASSWORD: $PASSWORD" echo "DB: $DB" +echo "Time Unit: ms" pre_set() { - echo $@ - mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB -e "$@" + echo "$*" + mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" -e "$*" } -pre_set "set global enable_vectorized_engine=1;" -pre_set "set global parallel_fragment_exec_instance_num=8;" -pre_set "set global exec_mem_limit=48G;" -pre_set "set global batch_size=4096;" -# pre_set "show variables like 'batch_size';" +echo '============================================' +pre_set "show variables;" +echo '============================================' +pre_set "show table status;" +echo '============================================' +sum=0 for i in $(seq 1 22); do total=0 - # Each query is executed three times and takes the average time - for j in $(seq 1 3); do + run=3 + # Each query is executed ${run} times and takes the average time + for j in $(seq 1 ${run}); do start=$(date +%s%3N) - mysql -h$FE_HOST -u $USER -P$FE_QUERY_PORT -D$DB <$QUERIES_DIR/q$i.sql >/dev/null + mysql -h"$FE_HOST" -u "$USER" -P"$FE_QUERY_PORT" -D"$DB" --comments <"$QUERIES_DIR"/q"$i".sql >/dev/null end=$(date +%s%3N) total=$((total + end - start)) done - echo "q$i: $((total / 3))ms" + cost=$((total / run)) + echo "q$i: ${cost}" + sum=$((sum + cost)) done +echo "Total cost: $sum" diff --git a/tools/tpch-tools/doris-cluster.conf b/tools/tpch-tools/conf/doris-cluster.conf similarity index 95% rename from tools/tpch-tools/doris-cluster.conf rename to tools/tpch-tools/conf/doris-cluster.conf index 5d76264104..9417bcb9e0 100644 --- a/tools/tpch-tools/doris-cluster.conf +++ b/tools/tpch-tools/conf/doris-cluster.conf @@ -16,7 +16,7 @@ # under the License. # Any of FE host -export FE_HOST='172.20.194.235' +export FE_HOST='127.0.0.1' # http_port in fe.conf export FE_HTTP_PORT=8030 # query_port in fe.conf @@ -26,4 +26,4 @@ export USER='root' # Doris password export PASSWORD='' # The database where TPC-H tables located -export DB='tpch1' +export DB='tpch' diff --git a/tools/tpch-tools/create-tpch-tables.sql b/tools/tpch-tools/create-tpch-tables.sql deleted file mode 100644 index f5acc7e842..0000000000 --- a/tools/tpch-tools/create-tpch-tables.sql +++ /dev/null @@ -1,100 +0,0 @@ --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. -CREATE TABLE `region` ( - `r_regionkey` integer NOT NULL, - `r_name` char(25) NOT NULL, - `r_comment` varchar(152) -) DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1 PROPERTIES ("replication_num" = "1"); - -CREATE TABLE `nation` ( - `n_nationkey` integer NOT NULL, - `n_name` char(25) NOT NULL, - `n_regionkey` integer NOT NULL, - `n_comment` varchar(152) -) DISTRIBUTED BY HASH(`n_nationkey`) BUCKETS 1 PROPERTIES ("replication_num" = "1"); - -CREATE TABLE `part` ( - `p_partkey` integer NOT NULL, - `p_name` varchar(55) NOT NULL, - `p_mfgr` char(25) NOT NULL, - `p_brand` char(10) NOT NULL, - `p_type` varchar(25) NOT NULL, - `p_size` integer NOT NULL, - `p_container` char(10) NOT NULL, - `p_retailprice` decimal(12, 2) NOT NULL, - `p_comment` varchar(23) NOT NULL -) DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1"); - -CREATE TABLE `supplier` ( - `s_suppkey` integer NOT NULL, - `s_name` char(25) NOT NULL, - `s_address` varchar(40) NOT NULL, - `s_nationkey` integer NOT NULL, - `s_phone` char(15) NOT NULL, - `s_acctbal` decimal(12, 2) NOT NULL, - `s_comment` varchar(101) NOT NULL -) DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1"); - -CREATE TABLE `customer` ( - `c_custkey` integer NOT NULL, - `c_name` varchar(25) NOT NULL, - `c_address` varchar(40) NOT NULL, - `c_nationkey` integer NOT NULL, - `c_phone` char(15) NOT NULL, - `c_acctbal` decimal(12, 2) NOT NULL, - `c_mktsegment` char(10) NOT NULL, - `c_comment` varchar(117) NOT NULL -) DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1"); - -CREATE TABLE `partsupp` ( - `ps_partkey` integer NOT NULL, - `ps_suppkey` integer NOT NULL, - `ps_availqty` integer NOT NULL, - `ps_supplycost` decimal(12, 2) NOT NULL, - `ps_comment` varchar(199) NOT NULL -) DISTRIBUTED BY HASH(`ps_partkey`, `ps_suppkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1"); - -CREATE TABLE `orders` ( - `o_orderkey` integer NOT NULL, - `o_custkey` integer NOT NULL, - `o_orderstatus` char(1) NOT NULL, - `o_totalprice` decimal(12, 2) NOT NULL, - `o_orderdate` date NOT NULL, - `o_orderpriority` char(15) NOT NULL, - `o_clerk` char(15) NOT NULL, - `o_shippriority` integer NOT NULL, - `o_comment` varchar(79) NOT NULL -) DISTRIBUTED BY HASH(`o_orderkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1"); - -CREATE TABLE `lineitem` ( - `l_orderkey` integer NOT NULL, - `l_linenumber` integer NOT NULL, - `l_partkey` integer NOT NULL, - `l_suppkey` integer NOT NULL, - `l_quantity` decimal(12, 2) NOT NULL, - `l_extendedprice` decimal(12, 2) NOT NULL, - `l_discount` decimal(12, 2) NOT NULL, - `l_tax` decimal(12, 2) NOT NULL, - `l_returnflag` char(1) NOT NULL, - `l_linestatus` char(1) NOT NULL, - `l_shipdate` date NOT NULL, - `l_commitdate` date NOT NULL, - `l_receiptdate` date NOT NULL, - `l_shipinstruct` char(25) NOT NULL, - `l_shipmode` char(10) NOT NULL, - `l_comment` varchar(44) NOT NULL -) DISTRIBUTED BY HASH(`l_orderkey`, `l_linenumber`) BUCKETS 48 PROPERTIES ("replication_num" = "1"); diff --git a/tools/tpch-tools/ddl/create-tpch-tables.sql b/tools/tpch-tools/ddl/create-tpch-tables.sql new file mode 100644 index 0000000000..1627720899 --- /dev/null +++ b/tools/tpch-tools/ddl/create-tpch-tables.sql @@ -0,0 +1,174 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +drop table if exists lineitem; +CREATE TABLE lineitem ( + l_shipdate DATE NOT NULL, + l_orderkey bigint NOT NULL, + l_linenumber int not null, + l_partkey int NOT NULL, + l_suppkey int not null, + l_quantity decimal(15, 2) NOT NULL, + l_extendedprice decimal(15, 2) NOT NULL, + l_discount decimal(15, 2) NOT NULL, + l_tax decimal(15, 2) NOT NULL, + l_returnflag VARCHAR(1) NOT NULL, + l_linestatus VARCHAR(1) NOT NULL, + l_commitdate DATE NOT NULL, + l_receiptdate DATE NOT NULL, + l_shipinstruct VARCHAR(25) NOT NULL, + l_shipmode VARCHAR(10) NOT NULL, + l_comment VARCHAR(44) NOT NULL +)ENGINE=OLAP +DUPLICATE KEY(`l_shipdate`, `l_orderkey`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96 +PROPERTIES ( + "replication_num" = "1", + "colocate_with" = "lineitem_orders" +); + +drop table if exists orders; +CREATE TABLE orders ( + o_orderkey bigint NOT NULL, + o_orderdate DATE NOT NULL, + o_custkey int NOT NULL, + o_orderstatus VARCHAR(1) NOT NULL, + o_totalprice decimal(15, 2) NOT NULL, + o_orderpriority VARCHAR(15) NOT NULL, + o_clerk VARCHAR(15) NOT NULL, + o_shippriority int NOT NULL, + o_comment VARCHAR(79) NOT NULL +)ENGINE=OLAP +DUPLICATE KEY(`o_orderkey`, `o_orderdate`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`o_orderkey`) BUCKETS 96 +PROPERTIES ( + "replication_num" = "1", + "colocate_with" = "lineitem_orders" +); + +drop table if exists partsupp; +CREATE TABLE partsupp ( + ps_partkey int NOT NULL, + ps_suppkey int NOT NULL, + ps_availqty int NOT NULL, + ps_supplycost decimal(15, 2) NOT NULL, + ps_comment VARCHAR(199) NOT NULL +)ENGINE=OLAP +DUPLICATE KEY(`ps_partkey`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`ps_partkey`) BUCKETS 24 +PROPERTIES ( + "replication_num" = "1", + "colocate_with" = "part_partsupp" +); + +drop table if exists part; +CREATE TABLE part ( + p_partkey int NOT NULL, + p_name VARCHAR(55) NOT NULL, + p_mfgr VARCHAR(25) NOT NULL, + p_brand VARCHAR(10) NOT NULL, + p_type VARCHAR(25) NOT NULL, + p_size int NOT NULL, + p_container VARCHAR(10) NOT NULL, + p_retailprice decimal(15, 2) NOT NULL, + p_comment VARCHAR(23) NOT NULL +)ENGINE=OLAP +DUPLICATE KEY(`p_partkey`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 24 +PROPERTIES ( + "replication_num" = "1", + "colocate_with" = "part_partsupp" +); + +drop table if exists customer; +CREATE TABLE customer ( + c_custkey int NOT NULL, + c_name VARCHAR(25) NOT NULL, + c_address VARCHAR(40) NOT NULL, + c_nationkey int NOT NULL, + c_phone VARCHAR(15) NOT NULL, + c_acctbal decimal(15, 2) NOT NULL, + c_mktsegment VARCHAR(10) NOT NULL, + c_comment VARCHAR(117) NOT NULL +)ENGINE=OLAP +DUPLICATE KEY(`c_custkey`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 24 +PROPERTIES ( + "replication_num" = "1" +); + +drop table if exists supplier; +CREATE TABLE supplier ( + s_suppkey int NOT NULL, + s_name VARCHAR(25) NOT NULL, + s_address VARCHAR(40) NOT NULL, + s_nationkey int NOT NULL, + s_phone VARCHAR(15) NOT NULL, + s_acctbal decimal(15, 2) NOT NULL, + s_comment VARCHAR(101) NOT NULL +)ENGINE=OLAP +DUPLICATE KEY(`s_suppkey`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 12 +PROPERTIES ( + "replication_num" = "1" +); + +drop table if exists nation; +CREATE TABLE `nation` ( + `n_nationkey` int(11) NOT NULL, + `n_name` varchar(25) NOT NULL, + `n_regionkey` int(11) NOT NULL, + `n_comment` varchar(152) NULL +) ENGINE=OLAP +DUPLICATE KEY(`N_NATIONKEY`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`N_NATIONKEY`) BUCKETS 1 +PROPERTIES ( + "replication_num" = "1" +); + +drop table if exists region; +CREATE TABLE region ( + r_regionkey int NOT NULL, + r_name VARCHAR(25) NOT NULL, + r_comment VARCHAR(152) +)ENGINE=OLAP +DUPLICATE KEY(`r_regionkey`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1 +PROPERTIES ( + "replication_num" = "1" +); + +drop view if exists revenue0; +create view revenue0 (supplier_no, total_revenue) as +select + l_suppkey, + sum(l_extendedprice * (1 - l_discount)) +from + lineitem +where + l_shipdate >= date '1996-01-01' + and l_shipdate < date '1996-01-01' + interval '3' month +group by + l_suppkey; diff --git a/tools/tpch-tools/queries/q1.sql b/tools/tpch-tools/queries/q1.sql index a888c2bdb1..6075f47ccb 100644 --- a/tools/tpch-tools/queries/q1.sql +++ b/tools/tpch-tools/queries/q1.sql @@ -15,7 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -select +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=16, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=false, enable_projection=false) */ l_returnflag, l_linestatus, sum(l_quantity) as sum_qty, diff --git a/tools/tpch-tools/queries/q10.sql b/tools/tpch-tools/queries/q10.sql index 6a12c1f5b4..52a0d313d7 100644 --- a/tools/tpch-tools/queries/q10.sql +++ b/tools/tpch-tools/queries/q10.sql @@ -15,10 +15,12 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=16, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=false, enable_projection=true) */ c_custkey, c_name, - sum(l_extendedprice * (1 - l_discount)) as revenue, + sum(t1.l_extendedprice * (1 - t1.l_discount)) as revenue, c_acctbal, n_name, c_address, @@ -26,15 +28,16 @@ select c_comment from customer, - orders, - lineitem, + ( + select o_custkey,l_extendedprice,l_discount from lineitem, orders + where l_orderkey = o_orderkey + and o_orderdate >= date '1993-10-01' + and o_orderdate < date '1993-10-01' + interval '3' month + and l_returnflag = 'R' + ) t1, nation where - c_custkey = o_custkey - and l_orderkey = o_orderkey - and o_orderdate >= date '1993-10-01' - and o_orderdate < date '1993-10-01' + interval '3' month - and l_returnflag = 'R' + c_custkey = t1.o_custkey and c_nationkey = n_nationkey group by c_custkey, @@ -47,3 +50,4 @@ group by order by revenue desc limit 20; + diff --git a/tools/tpch-tools/queries/q11.sql b/tools/tpch-tools/queries/q11.sql index f70fab2be9..99fbf49149 100644 --- a/tools/tpch-tools/queries/q11.sql +++ b/tools/tpch-tools/queries/q11.sql @@ -15,30 +15,31 @@ -- specific language governing permissions and limitations -- under the License. -select +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=2, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=true, enable_projection=true) */ ps_partkey, sum(ps_supplycost * ps_availqty) as value from partsupp, - supplier, - nation + ( + select s_suppkey + from supplier, nation + where s_nationkey = n_nationkey and n_name = 'GERMANY' + ) B where - ps_suppkey = s_suppkey - and s_nationkey = n_nationkey - and n_name = 'GERMANY' + ps_suppkey = B.s_suppkey group by ps_partkey having sum(ps_supplycost * ps_availqty) > ( select - sum(ps_supplycost * ps_availqty) * 0.0001000000 + sum(ps_supplycost * ps_availqty) * 0.000002 from partsupp, - supplier, - nation + (select s_suppkey + from supplier, nation + where s_nationkey = n_nationkey and n_name = 'GERMANY' + ) A where - ps_suppkey = s_suppkey - and s_nationkey = n_nationkey - and n_name = 'GERMANY' + ps_suppkey = A.s_suppkey ) order by - value desc; + value desc; \ No newline at end of file diff --git a/tools/tpch-tools/queries/q12.sql b/tools/tpch-tools/queries/q12.sql index d53dd9c809..5fcd65c635 100644 --- a/tools/tpch-tools/queries/q12.sql +++ b/tools/tpch-tools/queries/q12.sql @@ -15,7 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -select +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=2, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=true, enable_projection=true) */ l_shipmode, sum(case when o_orderpriority = '1-URGENT' diff --git a/tools/tpch-tools/queries/q13.sql b/tools/tpch-tools/queries/q13.sql index a35497cd57..cf8ef42817 100644 --- a/tools/tpch-tools/queries/q13.sql +++ b/tools/tpch-tools/queries/q13.sql @@ -15,7 +15,9 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=16, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=true, enable_projection=true) */ c_count, count(*) as custdist from @@ -24,7 +26,7 @@ from c_custkey, count(o_orderkey) as c_count from - customer left outer join orders on + orders right outer join customer on c_custkey = o_custkey and o_comment not like '%special%requests%' group by diff --git a/tools/tpch-tools/queries/q14.sql b/tools/tpch-tools/queries/q14.sql index 960cd15fb3..637808b261 100644 --- a/tools/tpch-tools/queries/q14.sql +++ b/tools/tpch-tools/queries/q14.sql @@ -15,15 +15,17 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=true, enable_projection=true) */ 100.00 * sum(case when p_type like 'PROMO%' then l_extendedprice * (1 - l_discount) else 0 end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue from - lineitem, - part + part, + lineitem where l_partkey = p_partkey and l_shipdate >= date '1995-09-01' diff --git a/tools/tpch-tools/queries/q15.sql b/tools/tpch-tools/queries/q15.sql index 1f7f19b34b..db621695cf 100644 --- a/tools/tpch-tools/queries/q15.sql +++ b/tools/tpch-tools/queries/q15.sql @@ -15,20 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -create view revenue0 (supplier_no, total_revenue) as - select - l_suppkey, - sum(l_extendedprice * (1 - l_discount)) - from - lineitem - where - l_shipdate >= date '1996-01-01' - and l_shipdate < date '1996-01-01' + interval '3' month - group by - l_suppkey; - - -select +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=true, enable_projection=true) */ s_suppkey, s_name, s_address, @@ -47,5 +34,3 @@ where ) order by s_suppkey; - -drop view revenue0; diff --git a/tools/tpch-tools/queries/q16.sql b/tools/tpch-tools/queries/q16.sql index 6027b5c2d0..a7e2d4ca74 100644 --- a/tools/tpch-tools/queries/q16.sql +++ b/tools/tpch-tools/queries/q16.sql @@ -15,7 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -select +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=true, enable_projection=true) */ p_brand, p_type, p_size, diff --git a/tools/tpch-tools/queries/q17.sql b/tools/tpch-tools/queries/q17.sql index 7724caac8f..a13fbd89b6 100644 --- a/tools/tpch-tools/queries/q17.sql +++ b/tools/tpch-tools/queries/q17.sql @@ -15,20 +15,24 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=1, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=true, enable_projection=true) */ sum(l_extendedprice) / 7.0 as avg_yearly from - lineitem, - part + lineitem join [broadcast] + part p1 on p1.p_partkey = l_partkey where - p_partkey = l_partkey - and p_brand = 'Brand#23' - and p_container = 'MED BOX' + p1.p_brand = 'Brand#23' + and p1.p_container = 'MED BOX' and l_quantity < ( select 0.2 * avg(l_quantity) from - lineitem + lineitem join [broadcast] + part p2 on p2.p_partkey = l_partkey where - l_partkey = p_partkey + l_partkey = p1.p_partkey + and p2.p_brand = 'Brand#23' + and p2.p_container = 'MED BOX' ); diff --git a/tools/tpch-tools/queries/q18.sql b/tools/tpch-tools/queries/q18.sql index 352d8be84a..23d1513076 100644 --- a/tools/tpch-tools/queries/q18.sql +++ b/tools/tpch-tools/queries/q18.sql @@ -15,36 +15,44 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=16, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=true, enable_projection=true) */ c_name, c_custkey, - o_orderkey, - o_orderdate, - o_totalprice, - sum(l_quantity) + t3.o_orderkey, + t3.o_orderdate, + t3.o_totalprice, + sum(t3.l_quantity) from - customer, - orders, - lineitem -where - o_orderkey in ( - select - l_orderkey - from - lineitem - group by - l_orderkey having - sum(l_quantity) > 300 - ) - and c_custkey = o_custkey - and o_orderkey = l_orderkey +customer join +( + select * from + lineitem join + ( + select * from + orders left semi join + ( + select + l_orderkey + from + lineitem + group by + l_orderkey having sum(l_quantity) > 300 + ) t1 + on o_orderkey = t1.l_orderkey + ) t2 + on t2.o_orderkey = l_orderkey +) t3 +on c_custkey = t3.o_custkey group by c_name, c_custkey, - o_orderkey, - o_orderdate, - o_totalprice + t3.o_orderkey, + t3.o_orderdate, + t3.o_totalprice order by - o_totalprice desc, - o_orderdate + t3.o_totalprice desc, + t3.o_orderdate limit 100; + diff --git a/tools/tpch-tools/queries/q19.sql b/tools/tpch-tools/queries/q19.sql index a20a62533a..857ffc9267 100644 --- a/tools/tpch-tools/queries/q19.sql +++ b/tools/tpch-tools/queries/q19.sql @@ -15,7 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -select +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=2, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=false, enable_projection=true) */ sum(l_extendedprice* (1 - l_discount)) as revenue from lineitem, diff --git a/tools/tpch-tools/queries/q2.sql b/tools/tpch-tools/queries/q2.sql index d1ccca0933..99b049d25a 100644 --- a/tools/tpch-tools/queries/q2.sql +++ b/tools/tpch-tools/queries/q2.sql @@ -15,7 +15,9 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=1, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=false, enable_projection=true) */ s_acctbal, s_name, n_name, @@ -25,24 +27,14 @@ select s_phone, s_comment from - part, - supplier, - partsupp, - nation, - region -where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and p_size = 15 - and p_type like '%BRASS' - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'EUROPE' - and ps_supplycost = ( + partsupp join + ( select - min(ps_supplycost) + ps_partkey as a_partkey, + min(ps_supplycost) as a_min from partsupp, + part, supplier, nation, region @@ -52,10 +44,26 @@ where and s_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'EUROPE' - ) + and p_size = 15 + and p_type like '%BRASS' + group by a_partkey + ) A on ps_partkey = a_partkey and ps_supplycost=a_min , + part, + supplier, + nation, + region +where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and p_size = 15 + and p_type like '%BRASS' + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'EUROPE' + order by s_acctbal desc, n_name, s_name, p_partkey -limit 100; +limit 100; \ No newline at end of file diff --git a/tools/tpch-tools/queries/q20.sql b/tools/tpch-tools/queries/q20.sql index 6803b4e3c3..77a2b9de0a 100644 --- a/tools/tpch-tools/queries/q20.sql +++ b/tools/tpch-tools/queries/q20.sql @@ -15,40 +15,30 @@ -- specific language governing permissions and limitations -- under the License. -select - s_name, - s_address -from - supplier, - nation -where - s_suppkey in ( - select - ps_suppkey - from - partsupp - where - ps_partkey in ( - select - p_partkey - from - part - where - p_name like 'forest%' - ) - and ps_availqty > ( - select - 0.5 * sum(l_quantity) - from - lineitem - where - l_partkey = ps_partkey - and l_suppkey = ps_suppkey - and l_shipdate >= date '1994-01-01' - and l_shipdate < date '1994-01-01' + interval '1' year - ) - ) - and s_nationkey = n_nationkey +-- Modified + +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=16, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=true, enable_projection=true) */ +s_name, s_address from +supplier left semi join +( + select * from + ( + select l_partkey,l_suppkey, 0.5 * sum(l_quantity) as l_q + from lineitem + where l_shipdate >= date '1994-01-01' + and l_shipdate < date '1994-01-01' + interval '1' year + group by l_partkey,l_suppkey + ) t2 join + ( + select ps_partkey, ps_suppkey, ps_availqty + from partsupp left semi join part + on ps_partkey = p_partkey and p_name like 'forest%' + ) t1 + on t2.l_partkey = t1.ps_partkey and t2.l_suppkey = t1.ps_suppkey + and t1.ps_availqty > t2.l_q +) t3 +on s_suppkey = t3.ps_suppkey +join nation +where s_nationkey = n_nationkey and n_name = 'CANADA' -order by - s_name; +order by s_name; diff --git a/tools/tpch-tools/queries/q21.sql b/tools/tpch-tools/queries/q21.sql index f3cdf2228d..6b13b1ca77 100644 --- a/tools/tpch-tools/queries/q21.sql +++ b/tools/tpch-tools/queries/q21.sql @@ -15,43 +15,37 @@ -- specific language governing permissions and limitations -- under the License. -select - s_name, - count(*) as numwait -from - supplier, - lineitem l1, - orders, - nation -where - s_suppkey = l1.l_suppkey - and o_orderkey = l1.l_orderkey - and o_orderstatus = 'F' - and l1.l_receiptdate > l1.l_commitdate - and exists ( - select - * - from - lineitem l2 - where - l2.l_orderkey = l1.l_orderkey - and l2.l_suppkey <> l1.l_suppkey - ) - and not exists ( - select - * - from - lineitem l3 - where - l3.l_orderkey = l1.l_orderkey - and l3.l_suppkey <> l1.l_suppkey - and l3.l_receiptdate > l3.l_commitdate - ) - and s_nationkey = n_nationkey - and n_name = 'SAUDI ARABIA' +-- Modified + +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=16, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=true, enable_projection=true) */ +s_name, count(*) as numwait +from orders join +( + select * from + lineitem l2 right semi join + ( + select * from + lineitem l3 right anti join + ( + select * from + lineitem l1 join + ( + select * from + supplier join nation + where s_nationkey = n_nationkey + and n_name = 'SAUDI ARABIA' + ) t1 + where t1.s_suppkey = l1.l_suppkey and l1.l_receiptdate > l1.l_commitdate + ) t2 + on l3.l_orderkey = t2.l_orderkey and l3.l_suppkey <> t2.l_suppkey and l3.l_receiptdate > l3.l_commitdate + ) t3 + on l2.l_orderkey = t3.l_orderkey and l2.l_suppkey <> t3.l_suppkey +) t4 +on o_orderkey = t4.l_orderkey and o_orderstatus = 'F' group by - s_name + t4.s_name order by numwait desc, - s_name + t4.s_name limit 100; + diff --git a/tools/tpch-tools/queries/q22.sql b/tools/tpch-tools/queries/q22.sql index 48d7384bb5..44a663a0b1 100644 --- a/tools/tpch-tools/queries/q22.sql +++ b/tools/tpch-tools/queries/q22.sql @@ -15,7 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -select +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=16, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=true, enable_projection=true) */ cntrycode, count(*) as numcust, sum(c_acctbal) as totacctbal diff --git a/tools/tpch-tools/queries/q3.sql b/tools/tpch-tools/queries/q3.sql index 73fbb51ede..01561b56cc 100644 --- a/tools/tpch-tools/queries/q3.sql +++ b/tools/tpch-tools/queries/q3.sql @@ -15,21 +15,23 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=false, enable_projection=true) */ l_orderkey, sum(l_extendedprice * (1 - l_discount)) as revenue, o_orderdate, o_shippriority from - customer, - orders, - lineitem -where - c_mktsegment = 'BUILDING' - and c_custkey = o_custkey - and l_orderkey = o_orderkey - and o_orderdate < date '1995-03-15' - and l_shipdate > date '1995-03-15' + ( + select l_orderkey, l_extendedprice, l_discount, o_orderdate, o_shippriority, o_custkey from + lineitem join orders + where l_orderkey = o_orderkey + and o_orderdate < date '1995-03-15' + and l_shipdate > date '1995-03-15' + ) t1 join customer c + on c.c_custkey = t1.o_custkey + where c_mktsegment = 'BUILDING' group by l_orderkey, o_orderdate, diff --git a/tools/tpch-tools/queries/q4.sql b/tools/tpch-tools/queries/q4.sql index b6bc24d511..0fc4e80885 100644 --- a/tools/tpch-tools/queries/q4.sql +++ b/tools/tpch-tools/queries/q4.sql @@ -15,23 +15,24 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=4, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=false, enable_projection=true) */ o_orderpriority, count(*) as order_count from - orders -where - o_orderdate >= date '1993-07-01' - and o_orderdate < date '1993-07-01' + interval '3' month - and exists ( + ( select * from lineitem - where - l_orderkey = o_orderkey - and l_commitdate < l_receiptdate - ) + where l_commitdate < l_receiptdate + ) t1 + right semi join orders + on t1.l_orderkey = o_orderkey +where + o_orderdate >= date '1993-07-01' + and o_orderdate < date '1993-07-01' + interval '3' month group by o_orderpriority order by diff --git a/tools/tpch-tools/queries/q5.sql b/tools/tpch-tools/queries/q5.sql index 5e3bc4116f..135f59801d 100644 --- a/tools/tpch-tools/queries/q5.sql +++ b/tools/tpch-tools/queries/q5.sql @@ -15,7 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -select +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=false, enable_projection=true) */ n_name, sum(l_extendedprice * (1 - l_discount)) as revenue from diff --git a/tools/tpch-tools/queries/q6.sql b/tools/tpch-tools/queries/q6.sql index 2d62a6a4f1..819f4de56c 100644 --- a/tools/tpch-tools/queries/q6.sql +++ b/tools/tpch-tools/queries/q6.sql @@ -15,7 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -select +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=1, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=false, enable_projection=true) */ sum(l_extendedprice * l_discount) as revenue from lineitem diff --git a/tools/tpch-tools/queries/q7.sql b/tools/tpch-tools/queries/q7.sql index 9a6e4a9b9f..f1044a31c2 100644 --- a/tools/tpch-tools/queries/q7.sql +++ b/tools/tpch-tools/queries/q7.sql @@ -15,7 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -select +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=false, enable_projection=true) */ supp_nation, cust_nation, l_year, diff --git a/tools/tpch-tools/queries/q8.sql b/tools/tpch-tools/queries/q8.sql index e61c8632d0..3ebdf57272 100644 --- a/tools/tpch-tools/queries/q8.sql +++ b/tools/tpch-tools/queries/q8.sql @@ -15,7 +15,9 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(exec_mem_limit=8589934592, parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=false, enable_projection=true) */ o_year, sum(case when nation = 'BRAZIL' then volume @@ -28,11 +30,11 @@ from l_extendedprice * (1 - l_discount) as volume, n2.n_name as nation from - part, - supplier, lineitem, orders, customer, + supplier, + part, nation n1, nation n2, region diff --git a/tools/tpch-tools/queries/q9.sql b/tools/tpch-tools/queries/q9.sql index 8e486d8fa3..2a5eebf54d 100644 --- a/tools/tpch-tools/queries/q9.sql +++ b/tools/tpch-tools/queries/q9.sql @@ -15,7 +15,9 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select/*+SET_VAR(exec_mem_limit=17179869184, parallel_fragment_exec_instance_num=4, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=false, enable_projection=true) */ nation, o_year, sum(amount) as sum_profit @@ -26,24 +28,18 @@ from extract(year from o_orderdate) as o_year, l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount from - part, - supplier, - lineitem, - partsupp, - orders, - nation + lineitem join orders on o_orderkey = l_orderkey + join[shuffle] part on p_partkey = l_partkey + join[shuffle] partsupp on ps_partkey = l_partkey + join[shuffle] supplier on s_suppkey = l_suppkey + join[broadcast] nation on s_nationkey = n_nationkey where - s_suppkey = l_suppkey - and ps_suppkey = l_suppkey - and ps_partkey = l_partkey - and p_partkey = l_partkey - and o_orderkey = l_orderkey - and s_nationkey = n_nationkey - and p_name like '%green%' + ps_suppkey = l_suppkey and + p_name like '%green%' ) as profit group by nation, o_year order by nation, - o_year desc; + o_year desc; \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org