On 04.09.2017 15:17, Alexey Chernyshov wrote:
make check-world fails on contrib/postgres_fdw because of Subquery Scan on ...
Probably, query plan was changed.
Hi Alexey,
Thanks for testing! This is the same problem as the one in
'select_distinct' I mentioned before. I changed the test, the updated
patch is attached.
--
Alexander Kuzmenkov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index c19b3318c7..5f5f65d60c 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2203,22 +2203,23 @@ SELECT t1c1, avg(t1c1 + t2c1) FROM (SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2
-- join with lateral reference
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10;
- QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: t1."C 1"
-> Nested Loop
Output: t1."C 1"
-> Index Scan using t1_pkey on "S 1"."T 1" t1
Output: t1."C 1", t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
- -> HashAggregate
- Output: t2.c1, t3.c1
- Group Key: t2.c1, t3.c1
- -> Foreign Scan
+ -> Subquery Scan on q
+ -> HashAggregate
Output: t2.c1, t3.c1
- Relations: (public.ft1 t2) INNER JOIN (public.ft2 t3)
- Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")) AND ((r1.c2 = $1::integer))))
-(13 rows)
+ Group Key: t2.c1, t3.c1
+ -> Foreign Scan
+ Output: t2.c1, t3.c1
+ Relations: (public.ft1 t2) INNER JOIN (public.ft2 t3)
+ Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")) AND ((r1.c2 = $1::integer))))
+(14 rows)
SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10;
C 1
@@ -2672,16 +2673,17 @@ select c2, sum(c1) from ft2 group by c2 having avg(c1) < 500 and sum(c1) < 49800
-- Unshippable HAVING clause will be evaluated locally, and other qual in HAVING clause is pushed down
explain (verbose, costs off)
select count(*) from (select c5, count(c1) from ft1 group by c5, sqrt(c2) having (avg(c1) / avg(c1)) * random() <= 1 and avg(c1) < 500) x;
- QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------------------------------------------------
Aggregate
Output: count(*)
- -> Foreign Scan
- Output: ft1.c5, NULL::bigint, (sqrt((ft1.c2)::double precision))
- Filter: (((((avg(ft1.c1)) / (avg(ft1.c1))))::double precision * random()) <= '1'::double precision)
- Relations: Aggregate on (public.ft1)
- Remote SQL: SELECT c5, NULL::bigint, sqrt(c2), avg("C 1") FROM "S 1"."T 1" GROUP BY c5, (sqrt(c2)) HAVING ((avg("C 1") < 500::numeric))
-(7 rows)
+ -> Subquery Scan on x
+ -> Foreign Scan
+ Output: ft1.c5, NULL::bigint, (sqrt((ft1.c2)::double precision))
+ Filter: (((((avg(ft1.c1)) / (avg(ft1.c1))))::double precision * random()) <= '1'::double precision)
+ Relations: Aggregate on (public.ft1)
+ Remote SQL: SELECT c5, NULL::bigint, sqrt(c2), avg("C 1") FROM "S 1"."T 1" GROUP BY c5, (sqrt(c2)) HAVING ((avg("C 1") < 500::numeric))
+(8 rows)
select count(*) from (select c5, count(c1) from ft1 group by c5, sqrt(c2) having (avg(c1) / avg(c1)) * random() <= 1 and avg(c1) < 500) x;
count
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index 79f534e4e9..d7ea6f6929 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -39,6 +39,7 @@
#include "access/relscan.h"
#include "access/transam.h"
+#include "access/visibilitymap.h"
#include "executor/execdebug.h"
#include "executor/nodeBitmapHeapscan.h"
#include "miscadmin.h"
@@ -225,9 +226,25 @@ BitmapHeapNext(BitmapHeapScanState *node)
}
/*
- * Fetch the current heap page and identify candidate tuples.
+ * If we don't need the tuple contents and are only counting them,
+ * we can skip fetching the page if the bitmap doesn't need rechecking
+ * and all tuples on the page are visible to our transaction
*/
- bitgetpage(scan, tbmres);
+ node->bhs_nofetch = !tbmres->recheck
+ && node->ss.ps.qual == NULL
+ && node->ss.ps.plan->targetlist == NIL
+ && VM_ALL_VISIBLE(node->ss.ss_currentRelation, tbmres->blockno,
+ &node->bhs_vmbuffer);
+
+ if (node->bhs_nofetch)
+ scan->rs_ntuples = tbmres->ntuples;
+ else
+ {
+ /*
+ * Fetch the current heap page and identify candidate tuples.
+ */
+ bitgetpage(scan, tbmres);
+ }
if (tbmres->ntuples >= 0)
node->exact_pages++;
@@ -289,45 +306,58 @@ BitmapHeapNext(BitmapHeapScanState *node)
*/
BitmapPrefetch(node, scan);
- /*
- * Okay to fetch the tuple
- */
- targoffset = scan->rs_vistuples[scan->rs_cindex];
- dp = (Page) BufferGetPage(scan->rs_cbuf);
- lp = PageGetItemId(dp, targoffset);
- Assert(ItemIdIsNormal(lp));
- scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
- scan->rs_ctup.t_len = ItemIdGetLength(lp);
- scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
- ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
+ if (node->bhs_nofetch)
+ {
+ /*
+ * If we don't have to fetch the tuple, just return a
+ * bogus one
+ */
+ slot->tts_isempty = false;
+ slot->tts_nvalid = 0;
+ }
+ else
+ {
+ /*
+ * Okay to fetch the tuple
+ */
+ targoffset = scan->rs_vistuples[scan->rs_cindex];
+ dp = (Page) BufferGetPage(scan->rs_cbuf);
+ lp = PageGetItemId(dp, targoffset);
+ Assert(ItemIdIsNormal(lp));
- pgstat_count_heap_fetch(scan->rs_rd);
+ scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
+ scan->rs_ctup.t_len = ItemIdGetLength(lp);
+ scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
+ ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
- /*
- * Set up the result slot to point to this tuple. Note that the slot
- * acquires a pin on the buffer.
- */
- ExecStoreTuple(&scan->rs_ctup,
- slot,
- scan->rs_cbuf,
- false);
+ pgstat_count_heap_fetch(scan->rs_rd);
- /*
- * If we are using lossy info, we have to recheck the qual conditions
- * at every tuple.
- */
- if (tbmres->recheck)
- {
- econtext->ecxt_scantuple = slot;
- ResetExprContext(econtext);
+ /*
+ * Set up the result slot to point to this tuple. Note that the slot
+ * acquires a pin on the buffer.
+ */
+ ExecStoreTuple(&scan->rs_ctup,
+ slot,
+ scan->rs_cbuf,
+ false);
- if (!ExecQual(node->bitmapqualorig, econtext))
+ /*
+ * If we are using lossy info, we have to recheck the qual conditions
+ * at every tuple.
+ */
+ if (tbmres->recheck)
{
- /* Fails recheck, so drop it and loop back for another */
- InstrCountFiltered2(node, 1);
- ExecClearTuple(slot);
- continue;
+ econtext->ecxt_scantuple = slot;
+ ResetExprContext(econtext);
+
+ if (!ExecQual(node->bitmapqualorig, econtext))
+ {
+ /* Fails recheck, so drop it and loop back for another */
+ InstrCountFiltered2(node, 1);
+ ExecClearTuple(slot);
+ continue;
+ }
}
}
@@ -573,6 +603,15 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
#ifdef USE_PREFETCH
ParallelBitmapHeapState *pstate = node->pstate;
+ if (node->bhs_nofetch)
+ {
+ /*
+ * If we did not need to fetch the current page,
+ * we probably will not need to fetch the next.
+ */
+ return;
+ }
+
if (pstate == NULL)
{
TBMIterator *prefetch_iterator = node->prefetch_iterator;
@@ -778,6 +817,11 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
if (node->shared_prefetch_iterator)
tbm_end_shared_iterate(node->shared_prefetch_iterator);
+ if (node->bhs_vmbuffer != InvalidBuffer)
+ {
+ ReleaseBuffer(node->bhs_vmbuffer);
+ }
+
/*
* close heap scan
*/
@@ -827,6 +871,8 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
scanstate->prefetch_iterator = NULL;
scanstate->prefetch_pages = 0;
scanstate->prefetch_target = 0;
+ scanstate->bhs_vmbuffer = InvalidBuffer;
+ scanstate->bhs_nofetch = false;
/* may be updated below */
scanstate->prefetch_maximum = target_prefetch_pages;
scanstate->pscan_len = 0;
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b35acb7bdc..13fb8d3153 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -982,6 +982,14 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
else
cost_per_page = spc_random_page_cost;
+ /*
+ * Bitmap heap scan tries not to fetch the heap pages if the targetlist
+ * is empty. It should be reflected in the cost somehow; for now, just
+ * halve the number of pages to be fetched.
+ */
+ if (path->pathtarget->exprs == NIL)
+ pages_fetched = pages_fetched / 2;
+
run_cost += pages_fetched * cost_per_page;
/*
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 5c934f223d..0874871b61 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -778,6 +778,13 @@ use_physical_tlist(PlannerInfo *root, Path *path, int flags)
return false;
/*
+ * If the tlist is empty, keep it as is, so that the
+ * bitmap heap scan could try skipping heap page fetches.
+ */
+ if (path->pathtarget->exprs == NIL)
+ return false;
+
+ /*
* We can do this for real relation scans, subquery scans, function scans,
* tablefunc scans, values scans, and CTE scans (but not for, eg, joins).
*/
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 35c28a6143..6f567f25e8 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1334,6 +1334,8 @@ typedef struct ParallelBitmapHeapState
* shared_tbmiterator shared iterator
* shared_prefetch_iterator shared iterator for prefetching
* pstate shared state for parallel bitmap scan
+ * bhs_vmbuffer buffer for visibility map lookups
+ * bhs_nofetch can we not fetch the heap page and return an empty tuple?
* ----------------
*/
typedef struct BitmapHeapScanState
@@ -1354,6 +1356,8 @@ typedef struct BitmapHeapScanState
TBMSharedIterator *shared_tbmiterator;
TBMSharedIterator *shared_prefetch_iterator;
ParallelBitmapHeapState *pstate;
+ Buffer bhs_vmbuffer;
+ bool bhs_nofetch;
} BitmapHeapScanState;
/* ----------------
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 9f4c88dab4..2e82ab1ec0 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3746,7 +3746,6 @@ where tt1.f1 = ss1.c0;
Output: tt1.f1
Filter: (tt1.f1 = 'foo'::text)
-> Seq Scan on public.text_tbl tt2
- Output: tt2.f1
-> Materialize
Output: tt4.f1
-> Nested Loop Left Join
@@ -3765,7 +3764,7 @@ where tt1.f1 = ss1.c0;
Output: (tt4.f1)
-> Seq Scan on public.text_tbl tt5
Output: tt4.f1
-(29 rows)
+(28 rows)
select 1 from
text_tbl as tt1
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index f3696c6d1d..d29c1adaab 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -130,16 +130,17 @@ SELECT DISTINCT p.age FROM person* p ORDER BY age using >;
EXPLAIN (VERBOSE, COSTS OFF)
SELECT count(*) FROM
(SELECT DISTINCT two, four, two FROM tenk1) ss;
- QUERY PLAN
---------------------------------------------------------
+ QUERY PLAN
+--------------------------------------------------------------
Aggregate
Output: count(*)
- -> HashAggregate
- Output: tenk1.two, tenk1.four, tenk1.two
- Group Key: tenk1.two, tenk1.four, tenk1.two
- -> Seq Scan on public.tenk1
+ -> Subquery Scan on ss
+ -> HashAggregate
Output: tenk1.two, tenk1.four, tenk1.two
-(7 rows)
+ Group Key: tenk1.two, tenk1.four, tenk1.two
+ -> Seq Scan on public.tenk1
+ Output: tenk1.two, tenk1.four, tenk1.two
+(8 rows)
SELECT count(*) FROM
(SELECT DISTINCT two, four, two FROM tenk1) ss;
diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out
index fc91f3ce36..4c4ee4d808 100644
--- a/src/test/regress/expected/stats.out
+++ b/src/test/regress/expected/stats.out
@@ -136,12 +136,15 @@ SELECT count(*) FROM tenk2;
(1 row)
-- do an indexscan
+-- disable bitmap scan because it can skip fetching the heap tuples
+SET enable_bitmapscan to OFF;
SELECT count(*) FROM tenk2 WHERE unique1 = 1;
count
-------
1
(1 row)
+SET enable_bitmapscan to ON;
-- We can't just call wait_for_stats() at this point, because we only
-- transmit stats when the session goes idle, and we probably didn't
-- transmit the last couple of counts yet thanks to the rate-limiting logic
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index 2090a411fe..fc1b512a35 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -1658,20 +1658,22 @@ UPDATE rw_view1 SET a = a + 5; -- should fail
ERROR: new row violates check option for view "rw_view1"
DETAIL: Failing row contains (15).
EXPLAIN (costs off) INSERT INTO rw_view1 VALUES (5);
- QUERY PLAN
----------------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------
Insert on base_tbl b
-> Result
SubPlan 1
- -> Index Only Scan using ref_tbl_pkey on ref_tbl r
- Index Cond: (a = b.a)
+ -> Bitmap Heap Scan on ref_tbl r
+ Recheck Cond: (a = b.a)
+ -> Bitmap Index Scan on ref_tbl_pkey
+ Index Cond: (a = b.a)
SubPlan 2
-> Seq Scan on ref_tbl r_1
-(7 rows)
+(9 rows)
EXPLAIN (costs off) UPDATE rw_view1 SET a = a + 5;
- QUERY PLAN
------------------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------
Update on base_tbl b
-> Hash Join
Hash Cond: (b.a = r.a)
@@ -1679,11 +1681,13 @@ EXPLAIN (costs off) UPDATE rw_view1 SET a = a + 5;
-> Hash
-> Seq Scan on ref_tbl r
SubPlan 1
- -> Index Only Scan using ref_tbl_pkey on ref_tbl r_1
- Index Cond: (a = b.a)
+ -> Bitmap Heap Scan on ref_tbl r_1
+ Recheck Cond: (a = b.a)
+ -> Bitmap Index Scan on ref_tbl_pkey
+ Index Cond: (a = b.a)
SubPlan 2
-> Seq Scan on ref_tbl r_2
-(11 rows)
+(13 rows)
DROP TABLE base_tbl, ref_tbl CASCADE;
NOTICE: drop cascades to view rw_view1
@@ -2024,24 +2028,28 @@ EXPLAIN (costs off) DELETE FROM rw_view1 WHERE id = 1 AND snoop(data);
DELETE FROM rw_view1 WHERE id = 1 AND snoop(data);
NOTICE: snooped value: Row 1
EXPLAIN (costs off) INSERT INTO rw_view1 VALUES (2, 'New row 2');
- QUERY PLAN
------------------------------------------------------------
+ QUERY PLAN
+--------------------------------------------------------
Insert on base_tbl
InitPlan 1 (returns $0)
- -> Index Only Scan using base_tbl_pkey on base_tbl t
- Index Cond: (id = 2)
+ -> Bitmap Heap Scan on base_tbl t
+ Recheck Cond: (id = 2)
+ -> Bitmap Index Scan on base_tbl_pkey
+ Index Cond: (id = 2)
-> Result
One-Time Filter: ($0 IS NOT TRUE)
Update on base_tbl
InitPlan 1 (returns $0)
- -> Index Only Scan using base_tbl_pkey on base_tbl t
- Index Cond: (id = 2)
+ -> Bitmap Heap Scan on base_tbl t
+ Recheck Cond: (id = 2)
+ -> Bitmap Index Scan on base_tbl_pkey
+ Index Cond: (id = 2)
-> Result
One-Time Filter: $0
-> Index Scan using base_tbl_pkey on base_tbl
Index Cond: (id = 2)
-(15 rows)
+(19 rows)
INSERT INTO rw_view1 VALUES (2, 'New row 2');
SELECT * FROM base_tbl;
diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql
index 6e882bf3ac..cec32cff2d 100644
--- a/src/test/regress/sql/stats.sql
+++ b/src/test/regress/sql/stats.sql
@@ -138,7 +138,10 @@ ROLLBACK;
-- do a seqscan
SELECT count(*) FROM tenk2;
-- do an indexscan
+-- disable bitmap scan because it can skip fetching the heap tuples
+SET enable_bitmapscan to OFF;
SELECT count(*) FROM tenk2 WHERE unique1 = 1;
+SET enable_bitmapscan to ON;
-- We can't just call wait_for_stats() at this point, because we only
-- transmit stats when the session goes idle, and we probably didn't
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers