Hi hackers,
When a query uses multiple filters on a single column in the WHERE
clause, and extended mcv statistics are applied on the column, the
optimizer should take each clause into account when estimating
selectivity. For example, this could be (A = 0 OR A = 1), (A BETWEEN 0
AND 5), or (A > 0 AND A <= 5). Such range conditions are quite common in
practice. However, I did not find any tests covering these scenarios in
the stats_ext.sql regression test.
I propose adding tests for these cases. This will make the behavior of
MCV extended statistics more clear to users and developers.
Any feedback, suggestions, or additions to the tests are welcome and
appreciated.
--
Best regards,
Ilia Evdokimov,
Tantor Labs LLC.
From e2b2e591e0df5a7e2ed589182b204781229d5754 Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <ilya.evdoki...@tantorlabs.com>
Date: Fri, 7 Feb 2025 00:29:53 +0300
Subject: [PATCH v1] Add tests for extended MCV statistics with OR, BETWEEN
Since extended MCV statistics statistics are compatible with conditions
like OR (e.g., A = 0 OR A = 1); AND, BETWEEN (e.g., A >= 0 AND A < 3),
this patch adds tests to verify their correct handling.
---
src/test/regress/expected/stats_ext.out | 24 ++++++++++++++++++++++++
src/test/regress/sql/stats_ext.sql | 8 ++++++++
2 files changed, 32 insertions(+)
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index a4c7be487e..f26d07261e 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -2803,6 +2803,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0
6 | 102
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 OR a = 10) AND (b = 0 OR b = 10) AND (c = 0 OR c = 10)');
+ estimated | actual
+-----------+--------
+ 1 | 104
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a BETWEEN 7 AND 10) AND (b BETWEEN 7 AND 10) AND (c BETWEEN 7 AND 10)');
+ estimated | actual
+-----------+--------
+ 1 | 308
+(1 row)
+
CREATE STATISTICS mcv_lists_partial_stats (mcv) ON a, b, c
FROM mcv_lists_partial;
ANALYZE mcv_lists_partial;
@@ -2854,6 +2866,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0
108 | 102
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 OR a = 10) AND (b = 0 OR b = 10) AND (c = 0 OR c = 10)');
+ estimated | actual
+-----------+--------
+ 104 | 104
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a BETWEEN 7 AND 10) AND (b BETWEEN 7 AND 10) AND (c BETWEEN 7 AND 10)');
+ estimated | actual
+-----------+--------
+ 309 | 308
+(1 row)
+
DROP TABLE mcv_lists_partial;
-- check the ability to use multiple MCV lists
CREATE TABLE mcv_lists_multi (
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 5c786b16c6..b6fe084675 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -1410,6 +1410,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0) OR (a = 0 AND c = 0) OR (b = 0 AND c = 0)');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 OR a = 10) AND (b = 0 OR b = 10) AND (c = 0 OR c = 10)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a BETWEEN 7 AND 10) AND (b BETWEEN 7 AND 10) AND (c BETWEEN 7 AND 10)');
+
CREATE STATISTICS mcv_lists_partial_stats (mcv) ON a, b, c
FROM mcv_lists_partial;
@@ -1431,6 +1435,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0) OR (a = 0 AND c = 0) OR (b = 0 AND c = 0)');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 OR a = 10) AND (b = 0 OR b = 10) AND (c = 0 OR c = 10)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a BETWEEN 7 AND 10) AND (b BETWEEN 7 AND 10) AND (c BETWEEN 7 AND 10)');
+
DROP TABLE mcv_lists_partial;
-- check the ability to use multiple MCV lists
--
2.34.1