This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new c2b684876b3 [Bench](tools) Add coffee-bench tools for Doris (#55730)
c2b684876b3 is described below
commit c2b684876b3c3d4790fcbb8ef9cf851e3731a7c3
Author: HappenLee <[email protected]>
AuthorDate: Mon Sep 8 23:24:34 2025 +0800
[Bench](tools) Add coffee-bench tools for Doris (#55730)
---
tools/coffeebench-tools/README.md | 42 ++++++
tools/coffeebench-tools/bin/create-tables.sh | 100 +++++++++++++
tools/coffeebench-tools/bin/load-data.sh | 159 +++++++++++++++++++++
tools/coffeebench-tools/bin/run-queries.sh | 180 ++++++++++++++++++++++++
tools/coffeebench-tools/conf/doris-cluster.conf | 31 ++++
tools/coffeebench-tools/ddl/create-tables.sql | 144 +++++++++++++++++++
tools/coffeebench-tools/queries/q1.sql | 18 +++
tools/coffeebench-tools/queries/q10.sql | 25 ++++
tools/coffeebench-tools/queries/q11.sql | 26 ++++
tools/coffeebench-tools/queries/q12.sql | 30 ++++
tools/coffeebench-tools/queries/q13.sql | 30 ++++
tools/coffeebench-tools/queries/q14.sql | 29 ++++
tools/coffeebench-tools/queries/q15.sql | 61 ++++++++
tools/coffeebench-tools/queries/q16.sql | 27 ++++
tools/coffeebench-tools/queries/q17.sql | 66 +++++++++
tools/coffeebench-tools/queries/q2.sql | 17 +++
tools/coffeebench-tools/queries/q3.sql | 31 ++++
tools/coffeebench-tools/queries/q4.sql | 20 +++
tools/coffeebench-tools/queries/q5.sql | 23 +++
tools/coffeebench-tools/queries/q6.sql | 19 +++
tools/coffeebench-tools/queries/q7.sql | 30 ++++
tools/coffeebench-tools/queries/q8.sql | 25 ++++
tools/coffeebench-tools/queries/q9.sql | 23 +++
23 files changed, 1156 insertions(+)
diff --git a/tools/coffeebench-tools/README.md
b/tools/coffeebench-tools/README.md
new file mode 100644
index 00000000000..343dcd99c0a
--- /dev/null
+++ b/tools/coffeebench-tools/README.md
@@ -0,0 +1,42 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## Usage
+
+These scripts are used to make coffee-bench test.
+follow the steps below:
+
+### 1. create tables. modify `conf/doris-cluster.conf` to specify doris info,
then run script below.
+
+ ./bin/create-tables.sh
+
+### 2. load coffee-bench data. use -s for scale. "500m/1b/5b"
+
+ ./bin/load-data.sh -s 500m
+
+ **Note: The data loading process uses S3 data located in the US East
(us-east-1) region.**
+ If you cannot access this region due to network restrictions or other
reasons, you can:
+ 1. Use AWS CLI to copy the data to an S3 bucket in your preferred region
+ 2. Download the data locally and use alternative methods to import it into
Doris
+
+### 3. run queries.
+
+ ./bin/run-queries.sh
+
+ NOTICE: At present, we rewrite some queries in coffee bench to adapt to
Doris' execution framework, but it does not affect the correctness of the
results.
diff --git a/tools/coffeebench-tools/bin/create-tables.sh
b/tools/coffeebench-tools/bin/create-tables.sh
new file mode 100755
index 00000000000..d0de6e8665c
--- /dev/null
+++ b/tools/coffeebench-tools/bin/create-tables.sh
@@ -0,0 +1,100 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##############################################################
+# This script is used to create coffee-bench tables
+##############################################################
+
+set -eo pipefail
+
+ROOT=$(dirname "$0")
+ROOT=$(
+ cd "${ROOT}"
+ pwd
+)
+
+CURDIR=${ROOT}
+
+usage() {
+ echo "
+This script is used to create coffee-bench tables,
+will use mysql client to connect Doris server which is specified in
doris-cluster.conf file.
+Usage: $0
+ "
+ exit 1
+}
+
+OPTS=$(getopt \
+ -n "$0" \
+ -o '' \
+ -o 'hs:' \
+ -- "$@")
+
+eval set -- "${OPTS}"
+HELP=0
+
+if [[ $# == 0 ]]; then
+ usage
+fi
+
+while true; do
+ case "$1" in
+ -h)
+ HELP=1
+ shift
+ ;;
+ --)
+ shift
+ break
+ ;;
+ *)
+ echo "Internal error"
+ exit 1
+ ;;
+ esac
+done
+
+if [[ "${HELP}" -eq 1 ]]; then
+ usage
+fi
+
+check_prerequest() {
+ local CMD=$1
+ local NAME=$2
+ if ! ${CMD}; then
+ echo "${NAME} is missing. This script depends on mysql to create
tables in Doris."
+ exit 1
+ fi
+}
+
+check_prerequest "mysql --version" "mysql"
+
+source "${CURDIR}/../conf/doris-cluster.conf"
+export MYSQL_PWD=${PASSWORD}
+
+echo "FE_HOST: ${FE_HOST}"
+echo "FE_QUERY_PORT: ${FE_QUERY_PORT}"
+echo "USER: ${USER}"
+echo "DB: ${DB}"
+
+mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -e "CREATE DATABASE IF
NOT EXISTS ${DB}"
+
+echo "Run SQLs from ${CURDIR}/../ddl/create-tables.sql"
+mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}"
<"${CURDIR}"/../ddl/create-tables.sql
+
+echo "coffee benchmark tables has been created"
diff --git a/tools/coffeebench-tools/bin/load-data.sh
b/tools/coffeebench-tools/bin/load-data.sh
new file mode 100755
index 00000000000..b19b83610b5
--- /dev/null
+++ b/tools/coffeebench-tools/bin/load-data.sh
@@ -0,0 +1,159 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##############################################################
+# This script is used to generate TPC-H data set
+##############################################################
+
+set -eo pipefail
+
+ROOT=$(dirname "$0")
+ROOT=$(
+ cd "${ROOT}"
+ pwd
+)
+
+CURDIR="${ROOT}"
+
+usage() {
+ echo "
+Usage: $0 <options>
+ Optional options:
+ -s scale factor, default is 500m
+ -c parallelism to generate data of (lineitem, orders,
partsupp) table, default is 10
+
+ Eg.
+ $0 generate data using default value.
+ $0 -s 1b generate data with scale 1b.
+ "
+ exit 1
+}
+
+OPTS=$(getopt \
+ -n "$0" \
+ -o '' \
+ -o 'hs:c:' \
+ -- "$@")
+
+eval set -- "${OPTS}"
+
+SCALE_FACTOR="500m"
+HELP=0
+
+if [[ $# == 0 ]]; then
+ usage
+fi
+
+while true; do
+ case "$1" in
+ -h)
+ HELP=1
+ shift
+ ;;
+ -s)
+ SCALE_FACTOR=$2
+ shift 2
+ ;;
+ --)
+ shift
+ break
+ ;;
+ *)
+ echo "Internal error"
+ exit 1
+ ;;
+ esac
+done
+
+if [[ "${HELP}" -eq 1 ]]; then
+ usage
+fi
+
+echo "Scale Factor: ${SCALE_FACTOR}"
+
+source "${CURDIR}/../conf/doris-cluster.conf"
+export MYSQL_PWD=${PASSWORD:-}
+
+echo "FE_HOST: ${FE_HOST:='127.0.0.1'}"
+echo "FE_QUERY_PORT: ${FE_QUERY_PORT:='9030'}"
+echo "USER: ${USER:='root'}"
+echo "DB: ${DB:='coffee_bench'}"
+
+run_sql() {
+ mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e "$*"
+}
+
+# Load dim_products data from S3
+run_sql "INSERT INTO dim_products
+SELECT
+ COALESCE(record_id, '') AS record_id,
+ COALESCE(product_id, '') AS product_id,
+ COALESCE(name, '') AS name,
+ COALESCE(category, '') AS category,
+ COALESCE(subcategory, '') AS subcategory,
+ COALESCE(standard_cost, 0.0) AS standard_cost,
+ COALESCE(standard_price, 0.0) AS standard_price,
+ COALESCE(from_date, '1970-01-01') AS from_date,
+ COALESCE(to_date, '9999-12-31') AS to_date
+FROM s3(
+ 'uri' = 's3://doris-regression/coffee_bench/dim_products/*',
+ 's3.endpoint' = 's3.us-east-1.amazonaws.com',
+ 's3.region' = 'us-east-1',
+ 'format' = 'parquet'
+);"
+
+# Load dim_locations data from S3
+run_sql "INSERT INTO dim_locations
+SELECT
+ COALESCE(record_id, '') AS record_id,
+ COALESCE(location_id, '') AS location_id,
+ COALESCE(city, '') AS city,
+ COALESCE(state, '') AS state,
+ COALESCE(country, '') AS country,
+ COALESCE(region, '') AS region
+FROM s3(
+ 'uri' = 's3://doris-regression/coffee_bench/dim_locations/*',
+ 's3.endpoint' = 's3.us-east-1.amazonaws.com',
+ 's3.region' = 'us-east-1',
+ 'format' = 'parquet'
+);"
+
+# load fact_sale
+run_sql "INSERT INTO fact_sales
+SELECT
+ COALESCE(order_id, '') AS order_id,
+ COALESCE(order_line_id, '') AS order_line_id,
+ COALESCE(order_date, '1970-01-01') AS order_date,
+ COALESCE(time_of_day, '') AS time_of_day,
+ COALESCE(season, '') AS season,
+ COALESCE(month, 0) AS month,
+ COALESCE(location_id, '') AS location_id,
+ COALESCE(region, '') AS region,
+ COALESCE(product_name, '') AS product_name,
+ COALESCE(quantity, 0) AS quantity,
+ COALESCE(sales_amount, 0.0) AS sales_amount,
+ COALESCE(discount_percentage, 0) AS discount_percentage,
+ COALESCE(product_id, '') AS product_id
+FROM s3(
+ 'uri' = 's3://doris-regression/coffee_bench/fact_sales_${SCALE_FACTOR}/*',
+ 's3.endpoint' = 's3.us-east-1.amazonaws.com',
+ 's3.region' = 'us-east-1',
+ 'format' = 'parquet'
+);"
+
+echo "load finish"
diff --git a/tools/coffeebench-tools/bin/run-queries.sh
b/tools/coffeebench-tools/bin/run-queries.sh
new file mode 100755
index 00000000000..3b2c7164ae3
--- /dev/null
+++ b/tools/coffeebench-tools/bin/run-queries.sh
@@ -0,0 +1,180 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##############################################################
+# This script is used to run coffee-bench 17 queries
+##############################################################
+
+set -eo pipefail
+
+ROOT=$(dirname "$0")
+ROOT=$(
+ cd "${ROOT}"
+ pwd
+)
+
+CURDIR="${ROOT}"
+
+usage() {
+ echo "
+This script is used to run coffee_benchmark 17 queries,
+will use mysql client to connect Doris server which parameter is specified in
doris-cluster.conf file.
+Usage: $0
+ "
+ exit 1
+}
+
+OPTS=$(getopt \
+ -n "$0" \
+ -o '' \
+ -o 'hs:' \
+ -- "$@")
+
+eval set -- "${OPTS}"
+HELP=0
+
+if [[ $# == 0 ]]; then
+ usage
+fi
+
+while true; do
+ case "$1" in
+ -h)
+ HELP=1
+ shift
+ ;;
+ --)
+ shift
+ break
+ ;;
+ *)
+ echo "Internal error"
+ exit 1
+ ;;
+ esac
+done
+
+if [[ "${HELP}" -eq 1 ]]; then
+ usage
+fi
+
+
+check_prerequest() {
+ local CMD=$1
+ local NAME=$2
+ if ! ${CMD}; then
+ echo "${NAME} is missing. This script depends on mysql to create
tables in Doris."
+ exit 1
+ fi
+}
+
+check_prerequest "mysql --version" "mysql"
+
+source "${CURDIR}/../conf/doris-cluster.conf"
+export MYSQL_PWD=${PASSWORD:-}
+
+echo "FE_HOST: ${FE_HOST:='127.0.0.1'}"
+echo "FE_QUERY_PORT: ${FE_QUERY_PORT:='9030'}"
+echo "USER: ${USER:='root'}"
+echo "DB: ${DB:='coffee_shop'}"
+echo "Time Unit: ms"
+
+run_sql() {
+ echo "$*"
+ mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e "$*"
+}
+
+run_sql "set global parallel_pipeline_task_num = 30;"
+echo '============================================'
+run_sql "show variables;"
+echo '============================================'
+run_sql "show table status;"
+echo '============================================'
+
+RESULT_DIR="${CURDIR}/result"
+QUERIES_DIR="${CURDIR}/../queries"
+
+if [[ -d "${RESULT_DIR}" ]]; then
+ rm -r "${RESULT_DIR}"
+fi
+mkdir -p "${RESULT_DIR}"
+touch result.csv
+best_run_sum=0
+# run part of queries, set their index to query_array
+query_array=$(seq 1 17)
+# shellcheck disable=SC2068
+for i in ${query_array[@]}; do
+ cold=0
+ hot1=0
+ hot2=0
+ hot3=0
+ hot4=0
+ echo -ne "q${i}\t" | tee -a result.csv
+
+ # Cold run test (first execution)
+ start=$(date +%s%3N)
+ mysql -h"${FE_HOST}" -u "${USER}" -P"${FE_QUERY_PORT}" -D"${DB}"
--comments <"${QUERIES_DIR}"/q"${i}".sql >"${RESULT_DIR}"/result"${i}".out
2>"${RESULT_DIR}"/result"${i}".log
+ end=$(date +%s%3N)
+ cold=$((end - start))
+ echo -ne "${cold}\t" | tee -a result.csv
+
+ # Hot run test 1 (second execution)
+ start=$(date +%s%3N)
+ mysql -h"${FE_HOST}" -u "${USER}" -P"${FE_QUERY_PORT}" -D"${DB}"
--comments <"${QUERIES_DIR}"/q"${i}".sql >"${RESULT_DIR}"/result"${i}".out
2>"${RESULT_DIR}"/result"${i}".log
+ end=$(date +%s%3N)
+ hot1=$((end - start))
+ echo -ne "${hot1}\t" | tee -a result.csv
+
+ # Hot run test 2 (third execution)
+ start=$(date +%s%3N)
+ mysql -h"${FE_HOST}" -u "${USER}" -P"${FE_QUERY_PORT}" -D"${DB}"
--comments <"${QUERIES_DIR}"/q"${i}".sql >"${RESULT_DIR}"/result"${i}".out
2>"${RESULT_DIR}"/result"${i}".log
+ end=$(date +%s%3N)
+ hot2=$((end - start))
+ echo -ne "${hot2}\t" | tee -a result.csv
+
+ # Hot run test 3 (fourth execution)
+ start=$(date +%s%3N)
+ mysql -h"${FE_HOST}" -u "${USER}" -P"${FE_QUERY_PORT}" -D"${DB}"
--comments <"${QUERIES_DIR}"/q"${i}".sql >"${RESULT_DIR}"/result"${i}".out
2>"${RESULT_DIR}"/result"${i}".log
+ end=$(date +%s%3N)
+ hot3=$((end - start))
+ echo -ne "${hot3}\t" | tee -a result.csv
+
+ # Hot run test 4 (fifth execution)
+ start=$(date +%s%3N)
+ mysql -h"${FE_HOST}" -u "${USER}" -P"${FE_QUERY_PORT}" -D"${DB}"
--comments <"${QUERIES_DIR}"/q"${i}".sql >"${RESULT_DIR}"/result"${i}".out
2>"${RESULT_DIR}"/result"${i}".log
+ end=$(date +%s%3N)
+ hot4=$((end - start))
+ echo -ne "${hot4}\t" | tee -a result.csv
+
+ # Find the minimum execution time from all runs
+ min_time=${cold}
+ if [[ ${hot1} -lt ${min_time} ]]; then min_time=${hot1}; fi
+ if [[ ${hot2} -lt ${min_time} ]]; then min_time=${hot2}; fi
+ if [[ ${hot3} -lt ${min_time} ]]; then min_time=${hot3}; fi
+ if [[ ${hot4} -lt ${min_time} ]]; then min_time=${hot4}; fi
+
+ # Update cumulative time counters
+ best_run_sum=$((best_run_sum + min_time))
+
+ # Output the best time for this query
+ echo -ne "${min_time}" | tee -a result.csv
+ echo "" | tee -a result.csv
+done
+
+echo "Total hot run time: ${best_run_sum} ms"
+echo 'Finish coffee bench queries.'
diff --git a/tools/coffeebench-tools/conf/doris-cluster.conf
b/tools/coffeebench-tools/conf/doris-cluster.conf
new file mode 100644
index 00000000000..11c5b19c80b
--- /dev/null
+++ b/tools/coffeebench-tools/conf/doris-cluster.conf
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Any of FE host
+export FE_HOST='127.0.0.1'
+# http_port in fe.conf
+export FE_HTTP_PORT=8030
+# query_port in fe.conf
+export FE_QUERY_PORT=9137
+# Doris username
+export USER='root'
+# Doris password
+export PASSWORD=''
+# The database where coffee_benchmark tables located
+export DB='coffee_shop'
+
+
diff --git a/tools/coffeebench-tools/ddl/create-tables.sql
b/tools/coffeebench-tools/ddl/create-tables.sql
new file mode 100644
index 00000000000..e2c1c11deb9
--- /dev/null
+++ b/tools/coffeebench-tools/ddl/create-tables.sql
@@ -0,0 +1,144 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied. See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+drop table if exists dim_locations;
+CREATE TABLE `dim_locations` (
+ `record_id` varchar(4) NOT NULL,
+ `location_id` varchar(8) NOT NULL,
+ `city` varchar(11) NOT NULL,
+ `state` varchar(2) NOT NULL,
+ `country` varchar(3) NOT NULL,
+ `region` varchar(9) NOT NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`record_id`)
+DISTRIBUTED BY HASH(`record_id`) BUCKETS 12
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1"
+);
+
+drop table if exists dim_products;
+CREATE TABLE `dim_products` (
+ `record_id` varchar(2) NOT NULL,
+ `product_id` varchar(2) NOT NULL,
+ `name` varchar(22) NOT NULL,
+ `category` varchar(5) NOT NULL,
+ `subcategory` varchar(8) NOT NULL,
+ `standard_cost` double NOT NULL,
+ `standard_price` double NOT NULL,
+ `from_date` date NOT NULL,
+ `to_date` date NOT NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`record_id`)
+DISTRIBUTED BY HASH(`record_id`) BUCKETS 12
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1"
+);
+
+
+drop table if exists fact_sales;
+CREATE TABLE `fact_sales` (
+ `order_id` varchar(255) NOT NULL,
+ `order_line_id` varchar(255) NOT NULL,
+ `order_date` date NOT NULL,
+ `time_of_day` varchar(50) NOT NULL,
+ `season` varchar(50) NOT NULL,
+ `month` int NOT NULL,
+ `location_id` varchar(8) NOT NULL,
+ `region` varchar(100) NOT NULL,
+ `product_name` varchar(255) NOT NULL,
+ `quantity` int NOT NULL,
+ `sales_amount` double NOT NULL,
+ `discount_percentage` int NOT NULL,
+ `product_id` varchar(255) NOT NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`order_id`, `order_line_id`)
+DISTRIBUTED BY HASH(`order_id`) BUCKETS 256
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1"
+);
+
+drop table if exists temp1;
+drop table if exists temp2;
+drop table if exists temp3;
+
+
+CREATE TABLE `temp1` (
+t1 date not null,
+t2 varchar(5) not null,
+t3 double not null
+) ENGINE=OLAP
+DUPLICATE KEY(`t1`)
+DISTRIBUTED BY RANDOM BUCKETS 10
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1",
+"min_load_replica_num" = "-1",
+"is_being_synced" = "false",
+"storage_medium" = "hdd",
+"storage_format" = "V2",
+"inverted_index_storage_format" = "V2",
+"light_schema_change" = "true",
+"disable_auto_compaction" = "false",
+"enable_single_replica_compaction" = "false",
+"group_commit_interval_ms" = "10000",
+"group_commit_data_bytes" = "134217728"
+);
+
+CREATE TABLE `temp2` (
+ `t1` date NOT NULL,
+ `t2` string NOT NULL,
+ `t3` bigint NOT NULL,
+ `t4` bigint NOT NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`t1`)
+DISTRIBUTED BY RANDOM BUCKETS 10
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1",
+"min_load_replica_num" = "-1",
+"is_being_synced" = "false",
+"storage_medium" = "hdd",
+"storage_format" = "V2",
+"inverted_index_storage_format" = "V2",
+"light_schema_change" = "true",
+"disable_auto_compaction" = "false",
+"enable_single_replica_compaction" = "false",
+"group_commit_interval_ms" = "10000",
+"group_commit_data_bytes" = "134217728"
+);
+
+CREATE TABLE `temp3` (
+ `t1` varchar(11) NOT NULL,
+ `t2` string NOT NULL,
+ `t3` date NULL,
+ `t4` double NULL,
+ `t5` double NULL,
+ `t6` double NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`t1`)
+DISTRIBUTED BY RANDOM BUCKETS 10
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1",
+"min_load_replica_num" = "-1",
+"is_being_synced" = "false",
+"storage_medium" = "hdd",
+"storage_format" = "V2",
+"inverted_index_storage_format" = "V2",
+"light_schema_change" = "true",
+"disable_auto_compaction" = "false",
+"enable_single_replica_compaction" = "false",
+"group_commit_interval_ms" = "10000",
+"group_commit_data_bytes" = "134217728"
+);
diff --git a/tools/coffeebench-tools/queries/q1.sql
b/tools/coffeebench-tools/queries/q1.sql
new file mode 100644
index 00000000000..e3401706202
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q1.sql
@@ -0,0 +1,18 @@
+SELECT
+ f.order_date,
+ l.city,
+ SUM(f.sales_amount) AS total_sales,
+ AVG(SUM(f.sales_amount)) OVER (
+ PARTITION BY l.city
+ ORDER BY f.order_date
+ ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
+ ) AS rolling_7day_avg
+FROM fact_sales f
+JOIN dim_locations l
+ ON f.location_id = l.location_id
+GROUP BY
+ f.order_date,
+ l.city
+ORDER BY
+ l.city,
+ f.order_date;
diff --git a/tools/coffeebench-tools/queries/q10.sql
b/tools/coffeebench-tools/queries/q10.sql
new file mode 100644
index 00000000000..264f6963921
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q10.sql
@@ -0,0 +1,25 @@
+insert into temp2
+WITH daily_orders AS (
+ SELECT
+ f.order_date,
+ l.city,
+ COUNT(DISTINCT f.order_id) AS daily_distinct_orders
+ FROM fact_sales f
+ JOIN dim_locations l
+ ON f.location_id = l.location_id
+ GROUP BY
+ l.city,
+ f.order_date
+
+)
+SELECT
+ coalesce(order_date, DATE('1970-01-01')) AS order_date,
+ coalesce(city, '') AS city,
+ daily_distinct_orders,
+ SUM(daily_distinct_orders) OVER (
+ PARTITION BY city
+ ORDER BY order_date
+ ROWS BETWEEN 89 PRECEDING AND CURRENT ROW
+ ) AS rolling_90d_distinct_orders
+FROM daily_orders
+ORDER BY city, order_date;
diff --git a/tools/coffeebench-tools/queries/q11.sql
b/tools/coffeebench-tools/queries/q11.sql
new file mode 100644
index 00000000000..a079f6f4c48
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q11.sql
@@ -0,0 +1,26 @@
+WITH city_quarter_subcat AS (
+ SELECT
+ l.city,
+ DATE_TRUNC('quarter', f.order_date) AS sales_quarter,
+ p.subcategory,
+ SUM(f.sales_amount) AS total_sales
+ FROM fact_sales f
+ JOIN dim_locations l
+ ON f.location_id = l.location_id
+ JOIN dim_products p
+ ON f.product_name = p.name
+ AND f.order_date BETWEEN p.from_date AND p.to_date
+ WHERE l.city IN ('Charlotte', 'Houston')
+ GROUP BY
+ l.city,
+ DATE_TRUNC('quarter', f.order_date),
+ p.subcategory
+)
+SELECT
+ city,
+ sales_quarter,
+ subcategory,
+ total_sales,
+ RANK() OVER (PARTITION BY city, sales_quarter ORDER BY total_sales DESC)
AS subcat_rank
+FROM city_quarter_subcat
+ORDER BY city, sales_quarter, subcat_rank;
diff --git a/tools/coffeebench-tools/queries/q12.sql
b/tools/coffeebench-tools/queries/q12.sql
new file mode 100644
index 00000000000..59d48f9a781
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q12.sql
@@ -0,0 +1,30 @@
+WITH city_quarter_subcat AS (
+ SELECT
+ l.city,
+ DATE_TRUNC('quarter', f.order_date) AS sales_quarter,
+ p.subcategory,
+ SUM(f.sales_amount) AS total_sales
+ FROM fact_sales f
+ JOIN dim_locations l
+ ON f.location_id = l.location_id
+ JOIN dim_products p
+ ON f.product_name = p.name
+ AND f.order_date BETWEEN p.from_date AND p.to_date
+ WHERE l.city IN ('Charlotte', 'Houston')
+ AND DATE_TRUNC('quarter', f.order_date) IN (
+ DATE('2023-01-01'), DATE('2023-04-01'),
+ DATE('2024-01-01'), DATE('2024-04-01')
+ )
+ GROUP BY
+ l.city,
+ DATE_TRUNC('quarter', f.order_date),
+ p.subcategory
+)
+SELECT
+ city,
+ sales_quarter,
+ subcategory,
+ total_sales,
+ RANK() OVER (PARTITION BY city, sales_quarter ORDER BY total_sales DESC)
AS subcat_rank
+FROM city_quarter_subcat
+ORDER BY city, sales_quarter, subcat_rank;
diff --git a/tools/coffeebench-tools/queries/q13.sql
b/tools/coffeebench-tools/queries/q13.sql
new file mode 100644
index 00000000000..31ef76d7a14
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q13.sql
@@ -0,0 +1,30 @@
+WITH city_quarter_subcat AS (
+ SELECT
+ l.city,
+ DATE_TRUNC('quarter', f.order_date) AS sales_quarter,
+ p.subcategory,
+ SUM(f.sales_amount) AS total_sales
+ FROM fact_sales f
+ JOIN dim_locations l
+ ON f.location_id = l.location_id
+ JOIN dim_products p
+ ON f.product_name = p.name
+ AND f.order_date BETWEEN p.from_date AND p.to_date
+ WHERE l.city = 'Austin'
+ AND DATE_TRUNC('quarter', f.order_date) IN (
+ DATE('2023-01-01'), DATE('2023-04-01'),
+ DATE('2024-01-01'), DATE('2024-04-01')
+ )
+ GROUP BY
+ l.city,
+ DATE_TRUNC('quarter', f.order_date),
+ p.subcategory
+)
+SELECT
+ city,
+ sales_quarter,
+ subcategory,
+ total_sales,
+ RANK() OVER (PARTITION BY city, sales_quarter ORDER BY total_sales DESC)
AS subcat_rank
+FROM city_quarter_subcat
+ORDER BY city, sales_quarter, subcat_rank;
diff --git a/tools/coffeebench-tools/queries/q14.sql
b/tools/coffeebench-tools/queries/q14.sql
new file mode 100644
index 00000000000..af978e3b260
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q14.sql
@@ -0,0 +1,29 @@
+WITH city_quarter_subcat AS (
+ SELECT
+ l.city,
+ DATE_TRUNC('quarter', f.order_date) AS sales_quarter,
+ p.subcategory,
+ SUM(f.sales_amount) AS total_sales
+ FROM fact_sales f
+ JOIN dim_locations l
+ ON f.location_id = l.location_id
+ JOIN dim_products p
+ ON f.product_name = p.name
+ AND f.order_date BETWEEN p.from_date AND p.to_date
+ WHERE DATE_TRUNC('quarter', f.order_date) IN (
+ DATE('2023-01-01'), DATE('2023-04-01'),
+ DATE('2024-01-01'), DATE('2024-04-01')
+ )
+ GROUP BY
+ l.city,
+ DATE_TRUNC('quarter', f.order_date),
+ p.subcategory
+)
+SELECT
+ city,
+ sales_quarter,
+ subcategory,
+ total_sales,
+ RANK() OVER (PARTITION BY city, sales_quarter ORDER BY total_sales DESC)
AS subcat_rank
+FROM city_quarter_subcat
+ORDER BY city, sales_quarter, subcat_rank;
diff --git a/tools/coffeebench-tools/queries/q15.sql
b/tools/coffeebench-tools/queries/q15.sql
new file mode 100644
index 00000000000..99ffc40c377
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q15.sql
@@ -0,0 +1,61 @@
+insert into temp3
+WITH base_data AS (
+ SELECT
+ f.location_id as location_id,
+ l.city,
+ f.product_name,
+ DATE_TRUNC('quarter', f.order_date) AS sales_quarter,
+ SUM(f.sales_amount) AS total_sales,
+ SUM(f.sales_amount * (f.discount_percentage / 100.0)) AS
total_discount,
+ SUM(f.quantity * p.standard_cost) AS total_cogs
+ FROM fact_sales f
+ INNER JOIN dim_products p
+ ON f.product_name = p.name
+ AND f.order_date BETWEEN p.from_date AND p.to_date
+ INNER JOIN dim_locations l
+ ON f.location_id = l.location_id
+ WHERE f.order_date BETWEEN '2022-01-01' AND '2024-12-31'
+ GROUP BY
+ f.location_id,
+ l.city,
+ f.product_name,
+ DATE_TRUNC('quarter', f.order_date)
+),
+with_profit AS (
+ SELECT
+ *,
+ total_sales - total_discount - total_cogs AS profit
+ FROM base_data
+)
+SELECT
+ city,
+ product_name,
+ sales_quarter,
+ profit,
+ LAG(profit, 1) OVER (
+ PARTITION BY location_id, product_name
+ ORDER BY sales_quarter
+ ) AS prev_profit,
+ ROUND(
+ CASE
+ WHEN LAG(profit, 1) OVER (
+ PARTITION BY location_id, product_name
+ ORDER BY sales_quarter
+ ) = 0 OR
+ LAG(profit, 1) OVER (
+ PARTITION BY location_id, product_name
+ ORDER BY sales_quarter
+ ) IS NULL
+ THEN NULL
+ ELSE 100.0 * (profit - LAG(profit, 1) OVER (
+ PARTITION BY location_id, product_name
+ ORDER BY sales_quarter
+ )) / LAG(profit, 1) OVER (
+ PARTITION BY location_id, product_name
+ ORDER BY sales_quarter
+ )
+ END,
+ 2
+ ) AS yoy_profit_pct
+FROM with_profit
+ORDER BY location_id, product_name, sales_quarter;
diff --git a/tools/coffeebench-tools/queries/q16.sql
b/tools/coffeebench-tools/queries/q16.sql
new file mode 100644
index 00000000000..5522a3c2552
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q16.sql
@@ -0,0 +1,27 @@
+WITH seasonal_data AS (
+ SELECT
+ l.state,
+ f.season,
+ p.category,
+ SUM(f.sales_amount) AS total_sales,
+ SUM(f.quantity) AS total_units,
+ COUNT(DISTINCT f.order_id) AS order_count
+ FROM fact_sales f
+ JOIN dim_products p
+ ON f.product_name = p.name
+ AND DATE(f.order_date) BETWEEN DATE(p.from_date) AND DATE(p.to_date)
+ JOIN dim_locations l
+ ON f.location_id = l.location_id
+ WHERE DATE(f.order_date) BETWEEN DATE('2023-01-01') AND DATE('2024-06-30')
+ GROUP BY l.state, f.season, p.category
+),
+ranked AS (
+ SELECT
+ *,
+ DENSE_RANK() OVER (PARTITION BY state, season ORDER BY total_sales
DESC) AS category_rank
+ FROM seasonal_data
+)
+SELECT *
+FROM ranked
+WHERE category_rank <= 3
+ORDER BY state, season, category_rank;
diff --git a/tools/coffeebench-tools/queries/q17.sql
b/tools/coffeebench-tools/queries/q17.sql
new file mode 100644
index 00000000000..5291125242d
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q17.sql
@@ -0,0 +1,66 @@
+WITH raw_agg AS (
+ SELECT
+ l.state,
+ f.season,
+ p.category,
+ SUM(f.sales_amount) AS total_sales,
+ SUM(f.quantity) AS total_units,
+ COUNT(DISTINCT f.order_id) AS order_count
+ FROM fact_sales AS f
+ JOIN dim_products AS p
+ ON f.product_id = p.product_id
+ AND DATE(f.order_date) BETWEEN DATE(p.from_date) AND DATE(p.to_date)
+ JOIN dim_locations AS l
+ ON f.location_id = l.location_id
+ WHERE
+ l.region = 'West'
+ AND f.time_of_day = 'Morning'
+ AND p.name = 'Frappe'
+ AND DATE(f.order_date) BETWEEN DATE('2023-01-01') AND DATE('2024-06-30')
+ GROUP BY
+ l.state,
+ f.season,
+ p.category
+),
+seasonal_data AS (
+ SELECT
+ state,
+ season,
+ category,
+ total_sales,
+ total_units,
+ order_count,
+ SUM(total_sales) OVER (PARTITION BY state, season) AS
season_total_sales
+ FROM raw_agg
+),
+ranked AS (
+ SELECT
+ state,
+ season,
+ category,
+ total_sales,
+ total_units,
+ order_count,
+ season_total_sales,
+ ROUND(100.0 * total_sales / season_total_sales, 2) AS pct_of_season,
+ DENSE_RANK() OVER (
+ PARTITION BY state, season
+ ORDER BY total_sales DESC
+ ) AS category_rank
+ FROM seasonal_data
+)
+SELECT
+ state,
+ season,
+ category,
+ total_sales,
+ total_units,
+ order_count,
+ pct_of_season,
+ category_rank
+FROM ranked
+WHERE category_rank <= 3
+ORDER BY
+ state,
+ season,
+ category_rank;
diff --git a/tools/coffeebench-tools/queries/q2.sql
b/tools/coffeebench-tools/queries/q2.sql
new file mode 100644
index 00000000000..aac53769be8
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q2.sql
@@ -0,0 +1,17 @@
+WITH monthly_sales AS (
+ SELECT
+ DATE_TRUNC('month', f.order_date) AS sales_month,
+ f.product_name,
+ SUM(f.sales_amount) AS total_sales
+ FROM fact_sales f
+ GROUP BY
+ DATE_TRUNC('month', f.order_date),
+ f.product_name
+)
+SELECT
+ sales_month,
+ product_name,
+ total_sales,
+ RANK() OVER (PARTITION BY sales_month ORDER BY total_sales DESC) AS
sales_rank
+FROM monthly_sales
+ORDER BY sales_month, sales_rank;
diff --git a/tools/coffeebench-tools/queries/q3.sql
b/tools/coffeebench-tools/queries/q3.sql
new file mode 100644
index 00000000000..fb28025e279
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q3.sql
@@ -0,0 +1,31 @@
+WITH season_discount AS (
+ SELECT
+ l.city,
+ l.state,
+ f.season,
+ AVG(f.discount_percentage) AS avg_discount
+ FROM fact_sales f
+ JOIN dim_locations l
+ ON f.location_id = l.location_id
+ GROUP BY
+ l.city,
+ l.state,
+ f.season
+)
+SELECT
+ city,
+ state,
+ season,
+ avg_discount,
+ discount_rank
+FROM (
+ SELECT
+ city,
+ state,
+ season,
+ avg_discount,
+ DENSE_RANK() OVER (PARTITION BY season ORDER BY avg_discount DESC) AS
discount_rank
+ FROM season_discount
+) t
+WHERE discount_rank <= 3
+ORDER BY season, discount_rank;
diff --git a/tools/coffeebench-tools/queries/q4.sql
b/tools/coffeebench-tools/queries/q4.sql
new file mode 100644
index 00000000000..d706ded5225
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q4.sql
@@ -0,0 +1,20 @@
+SELECT
+ f.order_date,
+ f.product_name,
+ p.standard_price,
+ p.standard_cost,
+ SUM(f.quantity) AS total_quantity_sold,
+ SUM(f.sales_amount) AS total_sales_amount,
+ (p.standard_price - p.standard_cost) * SUM(f.quantity) AS
theoretical_margin
+FROM fact_sales f
+JOIN dim_products p
+ ON f.product_name = p.name
+ AND f.order_date BETWEEN p.from_date AND p.to_date
+GROUP BY
+ f.order_date,
+ f.product_name,
+ p.standard_price,
+ p.standard_cost
+ORDER BY
+ f.order_date,
+ f.product_name;
diff --git a/tools/coffeebench-tools/queries/q5.sql
b/tools/coffeebench-tools/queries/q5.sql
new file mode 100644
index 00000000000..ea117484b93
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q5.sql
@@ -0,0 +1,23 @@
+WITH daily_city_qty AS (
+ SELECT
+ f.order_date,
+ l.city,
+ SUM(f.quantity) AS daily_qty
+ FROM fact_sales f
+ JOIN dim_locations l
+ ON f.location_id = l.location_id
+ GROUP BY
+ f.order_date,
+ l.city
+)
+SELECT
+ order_date,
+ city,
+ daily_qty,
+ SUM(daily_qty) OVER (
+ PARTITION BY city
+ ORDER BY order_date
+ ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
+ ) AS rolling_30day_qty
+FROM daily_city_qty
+ORDER BY city, order_date;
diff --git a/tools/coffeebench-tools/queries/q6.sql
b/tools/coffeebench-tools/queries/q6.sql
new file mode 100644
index 00000000000..6173883e81e
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q6.sql
@@ -0,0 +1,19 @@
+insert into temp1
+WITH monthly_cat AS (
+ SELECT
+ DATE_TRUNC('month', f.order_date) AS sales_month,
+ p.category,
+ SUM(f.sales_amount) AS monthly_revenue
+ FROM fact_sales f
+ JOIN dim_products p
+ ON f.product_name = p.name
+ AND f.order_date BETWEEN p.from_date AND p.to_date
+ GROUP BY
+ DATE_TRUNC('month', f.order_date),
+ p.category
+)
+SELECT
+ coalesce(sales_month, DATE('1970-01-01')) AS sales_month,
+ category,
+ monthly_revenue
+FROM monthly_cat;
diff --git a/tools/coffeebench-tools/queries/q7.sql
b/tools/coffeebench-tools/queries/q7.sql
new file mode 100644
index 00000000000..d754a4f436e
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q7.sql
@@ -0,0 +1,30 @@
+WITH yearly_sales AS (
+ SELECT
+ l.location_id,
+ l.city,
+ l.state,
+ YEAR(f.order_date) AS sales_year,
+ SUM(f.sales_amount) AS total_sales_year
+ FROM fact_sales f
+ JOIN dim_locations l
+ ON f.location_id = l.location_id
+ GROUP BY
+ l.location_id,
+ l.city,
+ l.state,
+ YEAR(f.order_date)
+)
+SELECT
+ city,
+ state,
+ SUM(CASE WHEN sales_year = 2023 THEN total_sales_year ELSE 0 END) AS
sales_2023,
+ SUM(CASE WHEN sales_year = 2024 THEN total_sales_year ELSE 0 END) AS
sales_2024,
+ (SUM(CASE WHEN sales_year = 2024 THEN total_sales_year ELSE 0 END)
+ - SUM(CASE WHEN sales_year = 2023 THEN total_sales_year ELSE 0 END)) AS
yoy_diff
+FROM yearly_sales
+GROUP BY
+ city,
+ state
+ORDER BY
+ city,
+ state;
diff --git a/tools/coffeebench-tools/queries/q8.sql
b/tools/coffeebench-tools/queries/q8.sql
new file mode 100644
index 00000000000..e1af1c0fc93
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q8.sql
@@ -0,0 +1,25 @@
+WITH city_quarter_subcat AS (
+ SELECT
+ l.city,
+ DATE_TRUNC('quarter', f.order_date) AS sales_quarter,
+ p.subcategory,
+ SUM(f.sales_amount) AS total_sales
+ FROM fact_sales f
+ JOIN dim_locations l
+ ON f.location_id = l.location_id
+ JOIN dim_products p
+ ON f.product_name = p.name
+ AND f.order_date BETWEEN p.from_date AND p.to_date
+ GROUP BY
+ l.city,
+ DATE_TRUNC('quarter', f.order_date),
+ p.subcategory
+)
+SELECT
+ city,
+ sales_quarter,
+ subcategory,
+ total_sales,
+ RANK() OVER (PARTITION BY city, sales_quarter ORDER BY total_sales DESC)
AS subcat_rank
+FROM city_quarter_subcat
+ORDER BY city, sales_quarter, subcat_rank;
diff --git a/tools/coffeebench-tools/queries/q9.sql
b/tools/coffeebench-tools/queries/q9.sql
new file mode 100644
index 00000000000..2ddc885c3f2
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q9.sql
@@ -0,0 +1,23 @@
+WITH daily_discount AS (
+ SELECT
+ l.city,
+ f.order_date,
+ AVG(f.discount_percentage) AS avg_discount
+ FROM fact_sales f
+ JOIN dim_locations l
+ ON f.location_id = l.location_id
+ GROUP BY
+ l.city,
+ f.order_date
+)
+SELECT
+ city,
+ order_date,
+ avg_discount,
+ AVG(avg_discount) OVER (
+ PARTITION BY city
+ ORDER BY order_date
+ ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+ ) AS cumulative_avg_discount
+FROM daily_discount
+ORDER BY city, order_date;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]