This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c2b684876b3 [Bench](tools) Add coffee-bench tools for Doris (#55730)
c2b684876b3 is described below

commit c2b684876b3c3d4790fcbb8ef9cf851e3731a7c3
Author: HappenLee <[email protected]>
AuthorDate: Mon Sep 8 23:24:34 2025 +0800

    [Bench](tools) Add coffee-bench tools for Doris (#55730)
---
 tools/coffeebench-tools/README.md               |  42 ++++++
 tools/coffeebench-tools/bin/create-tables.sh    | 100 +++++++++++++
 tools/coffeebench-tools/bin/load-data.sh        | 159 +++++++++++++++++++++
 tools/coffeebench-tools/bin/run-queries.sh      | 180 ++++++++++++++++++++++++
 tools/coffeebench-tools/conf/doris-cluster.conf |  31 ++++
 tools/coffeebench-tools/ddl/create-tables.sql   | 144 +++++++++++++++++++
 tools/coffeebench-tools/queries/q1.sql          |  18 +++
 tools/coffeebench-tools/queries/q10.sql         |  25 ++++
 tools/coffeebench-tools/queries/q11.sql         |  26 ++++
 tools/coffeebench-tools/queries/q12.sql         |  30 ++++
 tools/coffeebench-tools/queries/q13.sql         |  30 ++++
 tools/coffeebench-tools/queries/q14.sql         |  29 ++++
 tools/coffeebench-tools/queries/q15.sql         |  61 ++++++++
 tools/coffeebench-tools/queries/q16.sql         |  27 ++++
 tools/coffeebench-tools/queries/q17.sql         |  66 +++++++++
 tools/coffeebench-tools/queries/q2.sql          |  17 +++
 tools/coffeebench-tools/queries/q3.sql          |  31 ++++
 tools/coffeebench-tools/queries/q4.sql          |  20 +++
 tools/coffeebench-tools/queries/q5.sql          |  23 +++
 tools/coffeebench-tools/queries/q6.sql          |  19 +++
 tools/coffeebench-tools/queries/q7.sql          |  30 ++++
 tools/coffeebench-tools/queries/q8.sql          |  25 ++++
 tools/coffeebench-tools/queries/q9.sql          |  23 +++
 23 files changed, 1156 insertions(+)

diff --git a/tools/coffeebench-tools/README.md 
b/tools/coffeebench-tools/README.md
new file mode 100644
index 00000000000..343dcd99c0a
--- /dev/null
+++ b/tools/coffeebench-tools/README.md
@@ -0,0 +1,42 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## Usage
+
+These scripts are used to make coffee-bench test.
+follow the steps below:
+
+### 1. create tables. modify `conf/doris-cluster.conf` to specify doris info, 
then run script below.
+
+    ./bin/create-tables.sh
+
+### 2. load coffee-bench data. use -s for scale. "500m/1b/5b"
+
+    ./bin/load-data.sh -s 500m
+
+    **Note: The data loading process uses S3 data located in the US East 
(us-east-1) region.**
+    If you cannot access this region due to network restrictions or other 
reasons, you can:
+    1. Use AWS CLI to copy the data to an S3 bucket in your preferred region
+    2. Download the data locally and use alternative methods to import it into 
Doris
+
+### 3. run queries.
+
+    ./bin/run-queries.sh
+
+    NOTICE: At present, we rewrite some queries in coffee bench to adapt to 
Doris' execution framework, but it does not affect the correctness of the 
results.
diff --git a/tools/coffeebench-tools/bin/create-tables.sh 
b/tools/coffeebench-tools/bin/create-tables.sh
new file mode 100755
index 00000000000..d0de6e8665c
--- /dev/null
+++ b/tools/coffeebench-tools/bin/create-tables.sh
@@ -0,0 +1,100 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##############################################################
+# This script is used to create coffee-bench tables
+##############################################################
+
+set -eo pipefail
+
+ROOT=$(dirname "$0")
+ROOT=$(
+    cd "${ROOT}"
+    pwd
+)
+
+CURDIR=${ROOT}
+
+usage() {
+    echo "
+This script is used to create coffee-bench tables, 
+will use mysql client to connect Doris server which is specified in 
doris-cluster.conf file.
+Usage: $0 
+  "
+    exit 1
+}
+
+OPTS=$(getopt \
+    -n "$0" \
+    -o '' \
+    -o 'hs:' \
+    -- "$@")
+
+eval set -- "${OPTS}"
+HELP=0
+
+if [[ $# == 0 ]]; then
+    usage
+fi
+
+while true; do
+    case "$1" in
+    -h)
+        HELP=1
+        shift
+        ;;
+    --)
+        shift
+        break
+        ;;
+    *)
+        echo "Internal error"
+        exit 1
+        ;;
+    esac
+done
+
+if [[ "${HELP}" -eq 1 ]]; then
+    usage
+fi
+
+check_prerequest() {
+    local CMD=$1
+    local NAME=$2
+    if ! ${CMD}; then
+        echo "${NAME} is missing. This script depends on mysql to create 
tables in Doris."
+        exit 1
+    fi
+}
+
+check_prerequest "mysql --version" "mysql"
+
+source "${CURDIR}/../conf/doris-cluster.conf"
+export MYSQL_PWD=${PASSWORD}
+
+echo "FE_HOST: ${FE_HOST}"
+echo "FE_QUERY_PORT: ${FE_QUERY_PORT}"
+echo "USER: ${USER}"
+echo "DB: ${DB}"
+
+mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -e "CREATE DATABASE IF 
NOT EXISTS ${DB}"
+
+echo "Run SQLs from ${CURDIR}/../ddl/create-tables.sql"
+mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" 
<"${CURDIR}"/../ddl/create-tables.sql
+
+echo "coffee benchmark tables has been created"
diff --git a/tools/coffeebench-tools/bin/load-data.sh 
b/tools/coffeebench-tools/bin/load-data.sh
new file mode 100755
index 00000000000..b19b83610b5
--- /dev/null
+++ b/tools/coffeebench-tools/bin/load-data.sh
@@ -0,0 +1,159 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##############################################################
+# This script is used to generate TPC-H data set
+##############################################################
+
+set -eo pipefail
+
+ROOT=$(dirname "$0")
+ROOT=$(
+    cd "${ROOT}"
+    pwd
+)
+
+CURDIR="${ROOT}"
+
+usage() {
+    echo "
+Usage: $0 <options>
+  Optional options:
+     -s             scale factor, default is 500m
+     -c             parallelism to generate data of (lineitem, orders, 
partsupp) table, default is 10
+
+  Eg.
+    $0              generate data using default value.
+    $0 -s 1b       generate data with scale 1b.
+  "
+    exit 1
+}
+
+OPTS=$(getopt \
+    -n "$0" \
+    -o '' \
+    -o 'hs:c:' \
+    -- "$@")
+
+eval set -- "${OPTS}"
+
+SCALE_FACTOR="500m"
+HELP=0
+
+if [[ $# == 0 ]]; then
+    usage
+fi
+
+while true; do
+    case "$1" in
+    -h)
+        HELP=1
+        shift
+        ;;
+    -s)
+        SCALE_FACTOR=$2
+        shift 2
+        ;;
+    --)
+        shift
+        break
+        ;;
+    *)
+        echo "Internal error"
+        exit 1
+        ;;
+    esac
+done
+
+if [[ "${HELP}" -eq 1 ]]; then
+    usage
+fi
+
+echo "Scale Factor: ${SCALE_FACTOR}"
+
+source "${CURDIR}/../conf/doris-cluster.conf"
+export MYSQL_PWD=${PASSWORD:-}
+
+echo "FE_HOST: ${FE_HOST:='127.0.0.1'}"
+echo "FE_QUERY_PORT: ${FE_QUERY_PORT:='9030'}"
+echo "USER: ${USER:='root'}"
+echo "DB: ${DB:='coffee_bench'}"
+
+run_sql() {
+    mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e "$*"
+}
+
+# Load dim_products data from S3
+run_sql "INSERT INTO dim_products
+SELECT
+    COALESCE(record_id, '') AS record_id,
+    COALESCE(product_id, '') AS product_id,
+    COALESCE(name, '') AS name,
+    COALESCE(category, '') AS category,
+    COALESCE(subcategory, '') AS subcategory,
+    COALESCE(standard_cost, 0.0) AS standard_cost,
+    COALESCE(standard_price, 0.0) AS standard_price,
+    COALESCE(from_date, '1970-01-01') AS from_date,
+    COALESCE(to_date, '9999-12-31') AS to_date
+FROM s3(
+    'uri' = 's3://doris-regression/coffee_bench/dim_products/*',
+    's3.endpoint' = 's3.us-east-1.amazonaws.com',
+    's3.region' = 'us-east-1',
+    'format' = 'parquet'
+);"
+
+# Load dim_locations data from S3
+run_sql "INSERT INTO dim_locations
+SELECT
+    COALESCE(record_id, '') AS record_id,
+    COALESCE(location_id, '') AS location_id,
+    COALESCE(city, '') AS city,
+    COALESCE(state, '') AS state,
+    COALESCE(country, '') AS country,
+    COALESCE(region, '') AS region
+FROM s3(
+    'uri' = 's3://doris-regression/coffee_bench/dim_locations/*',
+    's3.endpoint' = 's3.us-east-1.amazonaws.com',
+    's3.region' = 'us-east-1',
+    'format' = 'parquet'
+);"
+
+# load fact_sale
+run_sql "INSERT INTO fact_sales
+SELECT
+    COALESCE(order_id, '') AS order_id,
+    COALESCE(order_line_id, '') AS order_line_id,
+    COALESCE(order_date, '1970-01-01') AS order_date,
+    COALESCE(time_of_day, '') AS time_of_day,
+    COALESCE(season, '') AS season,
+    COALESCE(month, 0) AS month,
+    COALESCE(location_id, '') AS location_id,
+    COALESCE(region, '') AS region,
+    COALESCE(product_name, '') AS product_name,
+    COALESCE(quantity, 0) AS quantity,
+    COALESCE(sales_amount, 0.0) AS sales_amount,
+    COALESCE(discount_percentage, 0) AS discount_percentage,
+    COALESCE(product_id, '') AS product_id
+FROM s3(
+    'uri' = 's3://doris-regression/coffee_bench/fact_sales_${SCALE_FACTOR}/*',
+    's3.endpoint' = 's3.us-east-1.amazonaws.com',
+    's3.region' = 'us-east-1',
+    'format' = 'parquet'
+);"
+
+echo "load finish"
diff --git a/tools/coffeebench-tools/bin/run-queries.sh 
b/tools/coffeebench-tools/bin/run-queries.sh
new file mode 100755
index 00000000000..3b2c7164ae3
--- /dev/null
+++ b/tools/coffeebench-tools/bin/run-queries.sh
@@ -0,0 +1,180 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##############################################################
+# This script is used to run coffee-bench 17 queries
+##############################################################
+
+set -eo pipefail
+
+ROOT=$(dirname "$0")
+ROOT=$(
+    cd "${ROOT}"
+    pwd
+)
+
+CURDIR="${ROOT}"
+
+usage() {
+    echo "
+This script is used to run coffee_benchmark 17 queries, 
+will use mysql client to connect Doris server which parameter is specified in 
doris-cluster.conf file.
+Usage: $0 
+  "
+    exit 1
+}
+
+OPTS=$(getopt \
+    -n "$0" \
+    -o '' \
+    -o 'hs:' \
+    -- "$@")
+
+eval set -- "${OPTS}"
+HELP=0
+
+if [[ $# == 0 ]]; then
+    usage
+fi
+
+while true; do
+    case "$1" in
+    -h)
+        HELP=1
+        shift
+        ;;
+    --)
+        shift
+        break
+        ;;
+    *)
+        echo "Internal error"
+        exit 1
+        ;;
+    esac
+done
+
+if [[ "${HELP}" -eq 1 ]]; then
+    usage
+fi
+
+
+check_prerequest() {
+    local CMD=$1
+    local NAME=$2
+    if ! ${CMD}; then
+        echo "${NAME} is missing. This script depends on mysql to create 
tables in Doris."
+        exit 1
+    fi
+}
+
+check_prerequest "mysql --version" "mysql"
+
+source "${CURDIR}/../conf/doris-cluster.conf"
+export MYSQL_PWD=${PASSWORD:-}
+
+echo "FE_HOST: ${FE_HOST:='127.0.0.1'}"
+echo "FE_QUERY_PORT: ${FE_QUERY_PORT:='9030'}"
+echo "USER: ${USER:='root'}"
+echo "DB: ${DB:='coffee_shop'}"
+echo "Time Unit: ms"
+
+run_sql() {
+    echo "$*"
+    mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e "$*"
+}
+
+run_sql "set global parallel_pipeline_task_num = 30;"
+echo '============================================'
+run_sql "show variables;"
+echo '============================================'
+run_sql "show table status;"
+echo '============================================'
+
+RESULT_DIR="${CURDIR}/result"
+QUERIES_DIR="${CURDIR}/../queries"
+
+if [[ -d "${RESULT_DIR}" ]]; then
+    rm -r "${RESULT_DIR}"
+fi
+mkdir -p "${RESULT_DIR}"
+touch result.csv
+best_run_sum=0
+# run part of queries, set their index to query_array
+query_array=$(seq 1 17)
+# shellcheck disable=SC2068
+for i in ${query_array[@]}; do
+    cold=0
+    hot1=0
+    hot2=0
+    hot3=0
+    hot4=0
+    echo -ne "q${i}\t" | tee -a result.csv
+
+    # Cold run test (first execution)
+    start=$(date +%s%3N)
+    mysql -h"${FE_HOST}" -u "${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" 
--comments <"${QUERIES_DIR}"/q"${i}".sql >"${RESULT_DIR}"/result"${i}".out 
2>"${RESULT_DIR}"/result"${i}".log
+    end=$(date +%s%3N)
+    cold=$((end - start))
+    echo -ne "${cold}\t" | tee -a result.csv
+
+    # Hot run test 1 (second execution)
+    start=$(date +%s%3N)
+    mysql -h"${FE_HOST}" -u "${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" 
--comments <"${QUERIES_DIR}"/q"${i}".sql >"${RESULT_DIR}"/result"${i}".out 
2>"${RESULT_DIR}"/result"${i}".log
+    end=$(date +%s%3N)
+    hot1=$((end - start))
+    echo -ne "${hot1}\t" | tee -a result.csv
+
+    # Hot run test 2 (third execution)
+    start=$(date +%s%3N)
+    mysql -h"${FE_HOST}" -u "${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" 
--comments <"${QUERIES_DIR}"/q"${i}".sql >"${RESULT_DIR}"/result"${i}".out 
2>"${RESULT_DIR}"/result"${i}".log
+    end=$(date +%s%3N)
+    hot2=$((end - start))
+    echo -ne "${hot2}\t" | tee -a result.csv
+
+    # Hot run test 3 (fourth execution)
+    start=$(date +%s%3N)
+    mysql -h"${FE_HOST}" -u "${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" 
--comments <"${QUERIES_DIR}"/q"${i}".sql >"${RESULT_DIR}"/result"${i}".out 
2>"${RESULT_DIR}"/result"${i}".log
+    end=$(date +%s%3N)
+    hot3=$((end - start))
+    echo -ne "${hot3}\t" | tee -a result.csv
+
+    # Hot run test 4 (fifth execution)
+    start=$(date +%s%3N)
+    mysql -h"${FE_HOST}" -u "${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" 
--comments <"${QUERIES_DIR}"/q"${i}".sql >"${RESULT_DIR}"/result"${i}".out 
2>"${RESULT_DIR}"/result"${i}".log
+    end=$(date +%s%3N)
+    hot4=$((end - start))
+    echo -ne "${hot4}\t" | tee -a result.csv
+
+    # Find the minimum execution time from all runs
+    min_time=${cold}
+    if [[ ${hot1} -lt ${min_time} ]]; then min_time=${hot1}; fi
+    if [[ ${hot2} -lt ${min_time} ]]; then min_time=${hot2}; fi
+    if [[ ${hot3} -lt ${min_time} ]]; then min_time=${hot3}; fi
+    if [[ ${hot4} -lt ${min_time} ]]; then min_time=${hot4}; fi
+
+    # Update cumulative time counters
+    best_run_sum=$((best_run_sum + min_time))
+
+    # Output the best time for this query
+    echo -ne "${min_time}" | tee -a result.csv
+    echo "" | tee -a result.csv
+done
+
+echo "Total hot run time: ${best_run_sum} ms"
+echo 'Finish coffee bench queries.'
diff --git a/tools/coffeebench-tools/conf/doris-cluster.conf 
b/tools/coffeebench-tools/conf/doris-cluster.conf
new file mode 100644
index 00000000000..11c5b19c80b
--- /dev/null
+++ b/tools/coffeebench-tools/conf/doris-cluster.conf
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Any of FE host
+export FE_HOST='127.0.0.1'
+# http_port in fe.conf
+export FE_HTTP_PORT=8030
+# query_port in fe.conf
+export FE_QUERY_PORT=9137
+# Doris username
+export USER='root'
+# Doris password
+export PASSWORD=''
+# The database where coffee_benchmark tables located
+export DB='coffee_shop'
+
+
diff --git a/tools/coffeebench-tools/ddl/create-tables.sql 
b/tools/coffeebench-tools/ddl/create-tables.sql
new file mode 100644
index 00000000000..e2c1c11deb9
--- /dev/null
+++ b/tools/coffeebench-tools/ddl/create-tables.sql
@@ -0,0 +1,144 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+-- 
+--   http://www.apache.org/licenses/LICENSE-2.0
+-- 
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied.  See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+drop table if exists dim_locations;
+CREATE TABLE `dim_locations` (
+  `record_id` varchar(4) NOT NULL,
+  `location_id` varchar(8) NOT NULL,
+  `city` varchar(11) NOT NULL,
+  `state` varchar(2) NOT NULL,
+  `country` varchar(3) NOT NULL,
+  `region` varchar(9) NOT NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`record_id`)
+DISTRIBUTED BY HASH(`record_id`) BUCKETS 12
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1"
+);
+
+drop table if exists dim_products;
+CREATE TABLE `dim_products` (
+  `record_id` varchar(2) NOT NULL,
+  `product_id` varchar(2) NOT NULL,
+  `name` varchar(22) NOT NULL,
+  `category` varchar(5) NOT NULL,
+  `subcategory` varchar(8) NOT NULL,
+  `standard_cost` double NOT NULL,
+  `standard_price` double NOT NULL,
+  `from_date` date NOT NULL,
+  `to_date` date NOT NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`record_id`)
+DISTRIBUTED BY HASH(`record_id`) BUCKETS 12
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1"
+);
+
+
+drop table if exists fact_sales;
+CREATE TABLE `fact_sales` (
+  `order_id` varchar(255) NOT NULL,
+  `order_line_id` varchar(255) NOT NULL,
+  `order_date` date NOT NULL,
+  `time_of_day` varchar(50) NOT NULL,
+  `season` varchar(50) NOT NULL,
+  `month` int NOT NULL,
+  `location_id` varchar(8) NOT NULL,
+  `region` varchar(100) NOT NULL,
+  `product_name` varchar(255) NOT NULL,
+  `quantity` int NOT NULL,
+  `sales_amount` double NOT NULL,
+  `discount_percentage` int NOT NULL,
+  `product_id` varchar(255) NOT NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`order_id`, `order_line_id`)
+DISTRIBUTED BY HASH(`order_id`) BUCKETS 256
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1"
+);
+
+drop table if exists temp1;
+drop table if exists temp2;
+drop table if exists temp3;
+
+
+CREATE TABLE `temp1` (
+t1 date not null,
+t2 varchar(5) not null,
+t3 double not null
+) ENGINE=OLAP
+DUPLICATE KEY(`t1`)
+DISTRIBUTED BY RANDOM BUCKETS 10
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1",
+"min_load_replica_num" = "-1",
+"is_being_synced" = "false",
+"storage_medium" = "hdd",
+"storage_format" = "V2",
+"inverted_index_storage_format" = "V2",
+"light_schema_change" = "true",
+"disable_auto_compaction" = "false",
+"enable_single_replica_compaction" = "false",
+"group_commit_interval_ms" = "10000",
+"group_commit_data_bytes" = "134217728"
+);
+
+CREATE TABLE `temp2` (
+  `t1` date NOT NULL,
+  `t2` string NOT NULL,
+  `t3` bigint NOT NULL,
+  `t4` bigint NOT NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`t1`)
+DISTRIBUTED BY RANDOM BUCKETS 10
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1",
+"min_load_replica_num" = "-1",
+"is_being_synced" = "false",
+"storage_medium" = "hdd",
+"storage_format" = "V2",
+"inverted_index_storage_format" = "V2",
+"light_schema_change" = "true",
+"disable_auto_compaction" = "false",
+"enable_single_replica_compaction" = "false",
+"group_commit_interval_ms" = "10000",
+"group_commit_data_bytes" = "134217728"
+);
+
+CREATE TABLE `temp3` (
+  `t1` varchar(11) NOT NULL,
+  `t2` string NOT NULL,
+  `t3` date NULL,
+  `t4` double  NULL,
+ `t5` double  NULL,
+ `t6` double NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`t1`)
+DISTRIBUTED BY RANDOM BUCKETS 10
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1",
+"min_load_replica_num" = "-1",
+"is_being_synced" = "false",
+"storage_medium" = "hdd",
+"storage_format" = "V2",
+"inverted_index_storage_format" = "V2",
+"light_schema_change" = "true",
+"disable_auto_compaction" = "false",
+"enable_single_replica_compaction" = "false",
+"group_commit_interval_ms" = "10000",
+"group_commit_data_bytes" = "134217728"
+);
diff --git a/tools/coffeebench-tools/queries/q1.sql 
b/tools/coffeebench-tools/queries/q1.sql
new file mode 100644
index 00000000000..e3401706202
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q1.sql
@@ -0,0 +1,18 @@
+SELECT
+    f.order_date,
+    l.city,
+    SUM(f.sales_amount) AS total_sales,
+    AVG(SUM(f.sales_amount)) OVER (
+        PARTITION BY l.city
+        ORDER BY f.order_date
+        ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
+    ) AS rolling_7day_avg
+FROM fact_sales f
+JOIN dim_locations l
+    ON f.location_id = l.location_id
+GROUP BY
+    f.order_date,
+    l.city
+ORDER BY
+    l.city,
+    f.order_date;
diff --git a/tools/coffeebench-tools/queries/q10.sql 
b/tools/coffeebench-tools/queries/q10.sql
new file mode 100644
index 00000000000..264f6963921
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q10.sql
@@ -0,0 +1,25 @@
+insert into temp2
+WITH daily_orders AS (
+    SELECT
+        f.order_date,
+        l.city,
+        COUNT(DISTINCT f.order_id) AS daily_distinct_orders
+    FROM fact_sales f
+    JOIN dim_locations l
+        ON f.location_id = l.location_id
+    GROUP BY
+        l.city,
+        f.order_date
+
+)
+SELECT
+    coalesce(order_date, DATE('1970-01-01')) AS order_date,
+    coalesce(city, '') AS city,
+    daily_distinct_orders,
+    SUM(daily_distinct_orders) OVER (
+        PARTITION BY city
+        ORDER BY order_date
+        ROWS BETWEEN 89 PRECEDING AND CURRENT ROW
+    ) AS rolling_90d_distinct_orders
+FROM daily_orders
+ORDER BY city, order_date;
diff --git a/tools/coffeebench-tools/queries/q11.sql 
b/tools/coffeebench-tools/queries/q11.sql
new file mode 100644
index 00000000000..a079f6f4c48
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q11.sql
@@ -0,0 +1,26 @@
+WITH city_quarter_subcat AS (
+    SELECT
+        l.city,
+        DATE_TRUNC('quarter', f.order_date) AS sales_quarter,
+        p.subcategory,
+        SUM(f.sales_amount) AS total_sales
+    FROM fact_sales f
+    JOIN dim_locations l
+        ON f.location_id = l.location_id
+    JOIN dim_products p
+        ON f.product_name = p.name
+        AND f.order_date BETWEEN p.from_date AND p.to_date
+    WHERE l.city IN ('Charlotte', 'Houston')
+    GROUP BY
+        l.city,
+        DATE_TRUNC('quarter', f.order_date),
+        p.subcategory
+)
+SELECT
+    city,
+    sales_quarter,
+    subcategory,
+    total_sales,
+    RANK() OVER (PARTITION BY city, sales_quarter ORDER BY total_sales DESC) 
AS subcat_rank
+FROM city_quarter_subcat
+ORDER BY city, sales_quarter, subcat_rank;
diff --git a/tools/coffeebench-tools/queries/q12.sql 
b/tools/coffeebench-tools/queries/q12.sql
new file mode 100644
index 00000000000..59d48f9a781
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q12.sql
@@ -0,0 +1,30 @@
+WITH city_quarter_subcat AS (
+    SELECT
+        l.city,
+        DATE_TRUNC('quarter', f.order_date) AS sales_quarter,
+        p.subcategory,
+        SUM(f.sales_amount) AS total_sales
+    FROM fact_sales f
+    JOIN dim_locations l
+        ON f.location_id = l.location_id
+    JOIN dim_products p
+        ON f.product_name = p.name
+        AND f.order_date BETWEEN p.from_date AND p.to_date
+    WHERE l.city IN ('Charlotte', 'Houston')
+      AND DATE_TRUNC('quarter', f.order_date) IN (
+            DATE('2023-01-01'), DATE('2023-04-01'),
+            DATE('2024-01-01'), DATE('2024-04-01')
+      )
+    GROUP BY
+        l.city,
+        DATE_TRUNC('quarter', f.order_date),
+        p.subcategory
+)
+SELECT
+    city,
+    sales_quarter,
+    subcategory,
+    total_sales,
+    RANK() OVER (PARTITION BY city, sales_quarter ORDER BY total_sales DESC) 
AS subcat_rank
+FROM city_quarter_subcat
+ORDER BY city, sales_quarter, subcat_rank;
diff --git a/tools/coffeebench-tools/queries/q13.sql 
b/tools/coffeebench-tools/queries/q13.sql
new file mode 100644
index 00000000000..31ef76d7a14
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q13.sql
@@ -0,0 +1,30 @@
+WITH city_quarter_subcat AS (
+    SELECT
+        l.city,
+        DATE_TRUNC('quarter', f.order_date) AS sales_quarter,
+        p.subcategory,
+        SUM(f.sales_amount) AS total_sales
+    FROM fact_sales f
+    JOIN dim_locations l
+        ON f.location_id = l.location_id
+    JOIN dim_products p
+        ON f.product_name = p.name
+        AND f.order_date BETWEEN p.from_date AND p.to_date
+    WHERE l.city = 'Austin'
+      AND DATE_TRUNC('quarter', f.order_date) IN (
+            DATE('2023-01-01'), DATE('2023-04-01'),
+            DATE('2024-01-01'), DATE('2024-04-01')
+      )
+    GROUP BY
+        l.city,
+        DATE_TRUNC('quarter', f.order_date),
+        p.subcategory
+)
+SELECT
+    city,
+    sales_quarter,
+    subcategory,
+    total_sales,
+    RANK() OVER (PARTITION BY city, sales_quarter ORDER BY total_sales DESC) 
AS subcat_rank
+FROM city_quarter_subcat
+ORDER BY city, sales_quarter, subcat_rank;
diff --git a/tools/coffeebench-tools/queries/q14.sql 
b/tools/coffeebench-tools/queries/q14.sql
new file mode 100644
index 00000000000..af978e3b260
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q14.sql
@@ -0,0 +1,29 @@
+WITH city_quarter_subcat AS (
+    SELECT
+        l.city,
+        DATE_TRUNC('quarter', f.order_date) AS sales_quarter,
+        p.subcategory,
+        SUM(f.sales_amount) AS total_sales
+    FROM fact_sales f
+    JOIN dim_locations l
+        ON f.location_id = l.location_id
+    JOIN dim_products p
+        ON f.product_name = p.name
+        AND f.order_date BETWEEN p.from_date AND p.to_date
+    WHERE DATE_TRUNC('quarter', f.order_date) IN (
+            DATE('2023-01-01'), DATE('2023-04-01'),
+            DATE('2024-01-01'), DATE('2024-04-01')
+      )
+    GROUP BY
+        l.city,
+        DATE_TRUNC('quarter', f.order_date),
+        p.subcategory
+)
+SELECT
+    city,
+    sales_quarter,
+    subcategory,
+    total_sales,
+    RANK() OVER (PARTITION BY city, sales_quarter ORDER BY total_sales DESC) 
AS subcat_rank
+FROM city_quarter_subcat
+ORDER BY city, sales_quarter, subcat_rank;
diff --git a/tools/coffeebench-tools/queries/q15.sql 
b/tools/coffeebench-tools/queries/q15.sql
new file mode 100644
index 00000000000..99ffc40c377
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q15.sql
@@ -0,0 +1,61 @@
+insert into temp3
+WITH base_data AS (
+    SELECT
+        f.location_id as location_id,
+        l.city,
+        f.product_name,
+        DATE_TRUNC('quarter', f.order_date) AS sales_quarter,
+        SUM(f.sales_amount) AS total_sales,
+        SUM(f.sales_amount * (f.discount_percentage / 100.0)) AS 
total_discount,
+        SUM(f.quantity * p.standard_cost) AS total_cogs
+    FROM fact_sales f
+    INNER JOIN dim_products p
+        ON f.product_name = p.name
+        AND f.order_date BETWEEN p.from_date AND p.to_date
+    INNER JOIN dim_locations l
+        ON f.location_id = l.location_id
+    WHERE f.order_date BETWEEN '2022-01-01' AND '2024-12-31'
+    GROUP BY
+        f.location_id,
+        l.city,
+        f.product_name,
+        DATE_TRUNC('quarter', f.order_date)
+),
+with_profit AS (
+    SELECT
+        *,
+        total_sales - total_discount - total_cogs AS profit
+    FROM base_data
+)
+SELECT
+    city,
+    product_name,
+    sales_quarter,
+    profit,
+    LAG(profit, 1) OVER (
+        PARTITION BY location_id, product_name 
+        ORDER BY sales_quarter
+    ) AS prev_profit,
+    ROUND(
+        CASE 
+            WHEN LAG(profit, 1) OVER (
+                PARTITION BY location_id, product_name 
+                ORDER BY sales_quarter
+            ) = 0 OR 
+            LAG(profit, 1) OVER (
+                PARTITION BY location_id, product_name 
+                ORDER BY sales_quarter
+            ) IS NULL 
+            THEN NULL
+            ELSE 100.0 * (profit - LAG(profit, 1) OVER (
+                PARTITION BY location_id, product_name 
+                ORDER BY sales_quarter
+            )) / LAG(profit, 1) OVER (
+                PARTITION BY location_id, product_name 
+                ORDER BY sales_quarter
+            )
+        END,
+        2
+    ) AS yoy_profit_pct
+FROM with_profit
+ORDER BY location_id, product_name, sales_quarter;
diff --git a/tools/coffeebench-tools/queries/q16.sql 
b/tools/coffeebench-tools/queries/q16.sql
new file mode 100644
index 00000000000..5522a3c2552
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q16.sql
@@ -0,0 +1,27 @@
+WITH seasonal_data AS (
+    SELECT
+        l.state,
+        f.season,
+        p.category,
+        SUM(f.sales_amount) AS total_sales,
+        SUM(f.quantity) AS total_units,
+        COUNT(DISTINCT f.order_id) AS order_count
+    FROM fact_sales f
+    JOIN dim_products p
+        ON f.product_name = p.name
+        AND DATE(f.order_date) BETWEEN DATE(p.from_date) AND DATE(p.to_date)
+    JOIN dim_locations l
+        ON f.location_id = l.location_id
+    WHERE DATE(f.order_date) BETWEEN DATE('2023-01-01') AND DATE('2024-06-30')
+    GROUP BY l.state, f.season, p.category
+),
+ranked AS (
+    SELECT
+        *,
+        DENSE_RANK() OVER (PARTITION BY state, season ORDER BY total_sales 
DESC) AS category_rank
+    FROM seasonal_data
+)
+SELECT *
+FROM ranked
+WHERE category_rank <= 3
+ORDER BY state, season, category_rank;
diff --git a/tools/coffeebench-tools/queries/q17.sql 
b/tools/coffeebench-tools/queries/q17.sql
new file mode 100644
index 00000000000..5291125242d
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q17.sql
@@ -0,0 +1,66 @@
+WITH raw_agg AS (
+    SELECT
+        l.state,
+        f.season,
+        p.category,
+        SUM(f.sales_amount)       AS total_sales,
+        SUM(f.quantity)           AS total_units,
+        COUNT(DISTINCT f.order_id) AS order_count
+    FROM fact_sales AS f
+    JOIN dim_products AS p
+      ON f.product_id = p.product_id
+     AND DATE(f.order_date) BETWEEN DATE(p.from_date) AND DATE(p.to_date)
+    JOIN dim_locations AS l
+      ON f.location_id = l.location_id
+    WHERE
+        l.region        = 'West'
+      AND f.time_of_day  = 'Morning'
+      AND p.name         = 'Frappe'
+      AND DATE(f.order_date)  BETWEEN DATE('2023-01-01') AND DATE('2024-06-30')
+    GROUP BY
+        l.state,
+        f.season,
+        p.category
+),
+seasonal_data AS (
+    SELECT
+        state,
+        season,
+        category,
+        total_sales,
+        total_units,
+        order_count,
+        SUM(total_sales) OVER (PARTITION BY state, season) AS 
season_total_sales
+    FROM raw_agg
+),
+ranked AS (
+    SELECT
+        state,
+        season,
+        category,
+        total_sales,
+        total_units,
+        order_count,
+        season_total_sales,
+        ROUND(100.0 * total_sales / season_total_sales, 2) AS pct_of_season,
+        DENSE_RANK() OVER (
+          PARTITION BY state, season
+          ORDER BY total_sales DESC
+        ) AS category_rank
+    FROM seasonal_data
+)
+SELECT
+    state,
+    season,
+    category,
+    total_sales,
+    total_units,
+    order_count,
+    pct_of_season,
+    category_rank
+FROM ranked
+WHERE category_rank <= 3
+ORDER BY
+    state,
+    season,
+    category_rank;
diff --git a/tools/coffeebench-tools/queries/q2.sql 
b/tools/coffeebench-tools/queries/q2.sql
new file mode 100644
index 00000000000..aac53769be8
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q2.sql
@@ -0,0 +1,17 @@
+WITH monthly_sales AS (
+    SELECT
+        DATE_TRUNC('month', f.order_date) AS sales_month,
+        f.product_name,
+        SUM(f.sales_amount) AS total_sales
+    FROM fact_sales f
+    GROUP BY
+        DATE_TRUNC('month', f.order_date),
+        f.product_name
+)
+SELECT
+    sales_month,
+    product_name,
+    total_sales,
+    RANK() OVER (PARTITION BY sales_month ORDER BY total_sales DESC) AS 
sales_rank
+FROM monthly_sales
+ORDER BY sales_month, sales_rank;
diff --git a/tools/coffeebench-tools/queries/q3.sql 
b/tools/coffeebench-tools/queries/q3.sql
new file mode 100644
index 00000000000..fb28025e279
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q3.sql
@@ -0,0 +1,31 @@
+WITH season_discount AS (
+    SELECT
+        l.city,
+        l.state,
+        f.season,
+        AVG(f.discount_percentage) AS avg_discount
+    FROM fact_sales f
+    JOIN dim_locations l
+        ON f.location_id = l.location_id
+    GROUP BY
+        l.city,
+        l.state,
+        f.season
+)
+SELECT
+    city,
+    state,
+    season,
+    avg_discount,
+    discount_rank
+FROM (
+    SELECT
+        city,
+        state,
+        season,
+        avg_discount,
+        DENSE_RANK() OVER (PARTITION BY season ORDER BY avg_discount DESC) AS 
discount_rank
+    FROM season_discount
+) t
+WHERE discount_rank <= 3
+ORDER BY season, discount_rank;
diff --git a/tools/coffeebench-tools/queries/q4.sql 
b/tools/coffeebench-tools/queries/q4.sql
new file mode 100644
index 00000000000..d706ded5225
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q4.sql
@@ -0,0 +1,20 @@
+SELECT
+    f.order_date,
+    f.product_name,
+    p.standard_price,
+    p.standard_cost,
+    SUM(f.quantity) AS total_quantity_sold,
+    SUM(f.sales_amount) AS total_sales_amount,
+    (p.standard_price - p.standard_cost) * SUM(f.quantity) AS 
theoretical_margin
+FROM fact_sales f
+JOIN dim_products p
+    ON f.product_name = p.name
+    AND f.order_date BETWEEN p.from_date AND p.to_date
+GROUP BY
+    f.order_date,
+    f.product_name,
+    p.standard_price,
+    p.standard_cost
+ORDER BY
+    f.order_date,
+    f.product_name;
diff --git a/tools/coffeebench-tools/queries/q5.sql 
b/tools/coffeebench-tools/queries/q5.sql
new file mode 100644
index 00000000000..ea117484b93
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q5.sql
@@ -0,0 +1,23 @@
+WITH daily_city_qty AS (
+    SELECT
+        f.order_date,
+        l.city,
+        SUM(f.quantity) AS daily_qty
+    FROM fact_sales f
+    JOIN dim_locations l
+        ON f.location_id = l.location_id
+    GROUP BY
+        f.order_date,
+        l.city
+)
+SELECT
+    order_date,
+    city,
+    daily_qty,
+    SUM(daily_qty) OVER (
+        PARTITION BY city
+        ORDER BY order_date
+        ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
+    ) AS rolling_30day_qty
+FROM daily_city_qty
+ORDER BY city, order_date;
diff --git a/tools/coffeebench-tools/queries/q6.sql 
b/tools/coffeebench-tools/queries/q6.sql
new file mode 100644
index 00000000000..6173883e81e
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q6.sql
@@ -0,0 +1,19 @@
+insert into temp1
+WITH monthly_cat AS (
+    SELECT
+        DATE_TRUNC('month', f.order_date) AS sales_month,
+        p.category,
+        SUM(f.sales_amount) AS monthly_revenue
+    FROM fact_sales f
+    JOIN dim_products p
+        ON f.product_name = p.name
+        AND f.order_date BETWEEN p.from_date AND p.to_date
+    GROUP BY
+        DATE_TRUNC('month', f.order_date),
+        p.category
+)
+SELECT
+    coalesce(sales_month, DATE('1970-01-01')) AS sales_month,
+    category,
+    monthly_revenue
+FROM monthly_cat;
diff --git a/tools/coffeebench-tools/queries/q7.sql 
b/tools/coffeebench-tools/queries/q7.sql
new file mode 100644
index 00000000000..d754a4f436e
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q7.sql
@@ -0,0 +1,30 @@
+WITH yearly_sales AS (
+    SELECT
+        l.location_id,
+        l.city,
+        l.state,
+        YEAR(f.order_date) AS sales_year,
+        SUM(f.sales_amount) AS total_sales_year
+    FROM fact_sales f
+    JOIN dim_locations l
+        ON f.location_id = l.location_id
+    GROUP BY
+        l.location_id,
+        l.city,
+        l.state,
+        YEAR(f.order_date)
+)
+SELECT
+    city,
+    state,
+    SUM(CASE WHEN sales_year = 2023 THEN total_sales_year ELSE 0 END) AS 
sales_2023,
+    SUM(CASE WHEN sales_year = 2024 THEN total_sales_year ELSE 0 END) AS 
sales_2024,
+    (SUM(CASE WHEN sales_year = 2024 THEN total_sales_year ELSE 0 END)
+     - SUM(CASE WHEN sales_year = 2023 THEN total_sales_year ELSE 0 END)) AS 
yoy_diff
+FROM yearly_sales
+GROUP BY
+    city,
+    state
+ORDER BY
+    city,
+    state;
diff --git a/tools/coffeebench-tools/queries/q8.sql 
b/tools/coffeebench-tools/queries/q8.sql
new file mode 100644
index 00000000000..e1af1c0fc93
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q8.sql
@@ -0,0 +1,25 @@
+WITH city_quarter_subcat AS (
+    SELECT
+        l.city,
+        DATE_TRUNC('quarter', f.order_date) AS sales_quarter,
+        p.subcategory,
+        SUM(f.sales_amount) AS total_sales
+    FROM fact_sales f
+    JOIN dim_locations l
+        ON f.location_id = l.location_id
+    JOIN dim_products p
+        ON f.product_name = p.name
+        AND f.order_date BETWEEN p.from_date AND p.to_date
+    GROUP BY
+        l.city,
+        DATE_TRUNC('quarter', f.order_date),
+        p.subcategory
+)
+SELECT
+    city,
+    sales_quarter,
+    subcategory,
+    total_sales,
+    RANK() OVER (PARTITION BY city, sales_quarter ORDER BY total_sales DESC) 
AS subcat_rank
+FROM city_quarter_subcat
+ORDER BY city, sales_quarter, subcat_rank;
diff --git a/tools/coffeebench-tools/queries/q9.sql 
b/tools/coffeebench-tools/queries/q9.sql
new file mode 100644
index 00000000000..2ddc885c3f2
--- /dev/null
+++ b/tools/coffeebench-tools/queries/q9.sql
@@ -0,0 +1,23 @@
+WITH daily_discount AS (
+    SELECT
+        l.city,
+        f.order_date,
+        AVG(f.discount_percentage) AS avg_discount
+    FROM fact_sales f
+    JOIN dim_locations l
+        ON f.location_id = l.location_id
+    GROUP BY
+        l.city,
+        f.order_date
+)
+SELECT
+    city,
+    order_date,
+    avg_discount,
+    AVG(avg_discount) OVER (
+        PARTITION BY city
+        ORDER BY order_date
+        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+    ) AS cumulative_avg_discount
+FROM daily_discount
+ORDER BY city, order_date;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to