Re: [HACKERS] index-only count(*) for indexes supporting bitmap scans

Alexander Kuzmenkov Mon, 04 Sep 2017 09:19:29 -0700

On 04.09.2017 15:17, Alexey Chernyshov wrote:

make check-world fails on contrib/postgres_fdw because of Subquery Scan on ... 
Probably, query plan was changed.


Hi Alexey,

Thanks for testing! This is the same problem as the one in'select_distinct' I mentioned before. I changed the test, the updatedpatch is attached.


--
Alexander Kuzmenkov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index c19b3318c7..5f5f65d60c 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2203,22 +2203,23 @@ SELECT t1c1, avg(t1c1 + t2c1) FROM (SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2
 -- join with lateral reference
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10;
-                                                                             QUERY PLAN                                                                             
---------------------------------------------------------------------------------------------------------------------------------------------------------------------
+                                                                                QUERY PLAN                                                                                
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Limit
    Output: t1."C 1"
    ->  Nested Loop
          Output: t1."C 1"
          ->  Index Scan using t1_pkey on "S 1"."T 1" t1
                Output: t1."C 1", t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
-         ->  HashAggregate
-               Output: t2.c1, t3.c1
-               Group Key: t2.c1, t3.c1
-               ->  Foreign Scan
+         ->  Subquery Scan on q
+               ->  HashAggregate
                      Output: t2.c1, t3.c1
-                     Relations: (public.ft1 t2) INNER JOIN (public.ft2 t3)
-                     Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")) AND ((r1.c2 = $1::integer))))
-(13 rows)
+                     Group Key: t2.c1, t3.c1
+                     ->  Foreign Scan
+                           Output: t2.c1, t3.c1
+                           Relations: (public.ft1 t2) INNER JOIN (public.ft2 t3)
+                           Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")) AND ((r1.c2 = $1::integer))))
+(14 rows)
 
 SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10;
  C 1 
@@ -2672,16 +2673,17 @@ select c2, sum(c1) from ft2 group by c2 having avg(c1) < 500 and sum(c1) < 49800
 -- Unshippable HAVING clause will be evaluated locally, and other qual in HAVING clause is pushed down
 explain (verbose, costs off)
 select count(*) from (select c5, count(c1) from ft1 group by c5, sqrt(c2) having (avg(c1) / avg(c1)) * random() <= 1 and avg(c1) < 500) x;
-                                                                   QUERY PLAN                                                                    
--------------------------------------------------------------------------------------------------------------------------------------------------
+                                                                      QUERY PLAN                                                                       
+-------------------------------------------------------------------------------------------------------------------------------------------------------
  Aggregate
    Output: count(*)
-   ->  Foreign Scan
-         Output: ft1.c5, NULL::bigint, (sqrt((ft1.c2)::double precision))
-         Filter: (((((avg(ft1.c1)) / (avg(ft1.c1))))::double precision * random()) <= '1'::double precision)
-         Relations: Aggregate on (public.ft1)
-         Remote SQL: SELECT c5, NULL::bigint, sqrt(c2), avg("C 1") FROM "S 1"."T 1" GROUP BY c5, (sqrt(c2)) HAVING ((avg("C 1") < 500::numeric))
-(7 rows)
+   ->  Subquery Scan on x
+         ->  Foreign Scan
+               Output: ft1.c5, NULL::bigint, (sqrt((ft1.c2)::double precision))
+               Filter: (((((avg(ft1.c1)) / (avg(ft1.c1))))::double precision * random()) <= '1'::double precision)
+               Relations: Aggregate on (public.ft1)
+               Remote SQL: SELECT c5, NULL::bigint, sqrt(c2), avg("C 1") FROM "S 1"."T 1" GROUP BY c5, (sqrt(c2)) HAVING ((avg("C 1") < 500::numeric))
+(8 rows)
 
 select count(*) from (select c5, count(c1) from ft1 group by c5, sqrt(c2) having (avg(c1) / avg(c1)) * random() <= 1 and avg(c1) < 500) x;
  count 
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index 79f534e4e9..d7ea6f6929 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -39,6 +39,7 @@
 
 #include "access/relscan.h"
 #include "access/transam.h"
+#include "access/visibilitymap.h"
 #include "executor/execdebug.h"
 #include "executor/nodeBitmapHeapscan.h"
 #include "miscadmin.h"
@@ -225,9 +226,25 @@ BitmapHeapNext(BitmapHeapScanState *node)
 			}
 
 			/*
-			 * Fetch the current heap page and identify candidate tuples.
+			 * If we don't need the tuple contents and are only counting them,
+			 * we can skip fetching the page if the bitmap doesn't need rechecking
+			 * and all tuples on the page are visible to our transaction
 			 */
-			bitgetpage(scan, tbmres);
+			node->bhs_nofetch = !tbmres->recheck
+				&& node->ss.ps.qual == NULL
+				&& node->ss.ps.plan->targetlist == NIL
+				&& VM_ALL_VISIBLE(node->ss.ss_currentRelation, tbmres->blockno,
+								  &node->bhs_vmbuffer);
+
+			if (node->bhs_nofetch)
+				scan->rs_ntuples = tbmres->ntuples;
+			else
+			{
+				/*
+				 * Fetch the current heap page and identify candidate tuples.
+				 */
+				bitgetpage(scan, tbmres);
+			}
 
 			if (tbmres->ntuples >= 0)
 				node->exact_pages++;
@@ -289,45 +306,58 @@ BitmapHeapNext(BitmapHeapScanState *node)
 		 */
 		BitmapPrefetch(node, scan);
 
-		/*
-		 * Okay to fetch the tuple
-		 */
-		targoffset = scan->rs_vistuples[scan->rs_cindex];
-		dp = (Page) BufferGetPage(scan->rs_cbuf);
-		lp = PageGetItemId(dp, targoffset);
-		Assert(ItemIdIsNormal(lp));
 
-		scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
-		scan->rs_ctup.t_len = ItemIdGetLength(lp);
-		scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
-		ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
+		if (node->bhs_nofetch)
+		{
+			/*
+			 * If we don't have to fetch the tuple, just return a
+			 * bogus one 
+			 */
+			slot->tts_isempty = false;
+			slot->tts_nvalid = 0;
+		}
+		else
+		{
+			/*
+			 * Okay to fetch the tuple
+			 */
+			targoffset = scan->rs_vistuples[scan->rs_cindex];
+			dp = (Page) BufferGetPage(scan->rs_cbuf);
+			lp = PageGetItemId(dp, targoffset);
+			Assert(ItemIdIsNormal(lp));
 
-		pgstat_count_heap_fetch(scan->rs_rd);
+			scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
+			scan->rs_ctup.t_len = ItemIdGetLength(lp);
+			scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
+			ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
 
-		/*
-		 * Set up the result slot to point to this tuple. Note that the slot
-		 * acquires a pin on the buffer.
-		 */
-		ExecStoreTuple(&scan->rs_ctup,
-					   slot,
-					   scan->rs_cbuf,
-					   false);
+			pgstat_count_heap_fetch(scan->rs_rd);
 
-		/*
-		 * If we are using lossy info, we have to recheck the qual conditions
-		 * at every tuple.
-		 */
-		if (tbmres->recheck)
-		{
-			econtext->ecxt_scantuple = slot;
-			ResetExprContext(econtext);
+			/*
+			 * Set up the result slot to point to this tuple. Note that the slot
+			 * acquires a pin on the buffer.
+			 */
+			ExecStoreTuple(&scan->rs_ctup,
+						   slot,
+						   scan->rs_cbuf,
+						   false);
 
-			if (!ExecQual(node->bitmapqualorig, econtext))
+			/*
+			 * If we are using lossy info, we have to recheck the qual conditions
+			 * at every tuple.
+			 */
+			if (tbmres->recheck)
 			{
-				/* Fails recheck, so drop it and loop back for another */
-				InstrCountFiltered2(node, 1);
-				ExecClearTuple(slot);
-				continue;
+				econtext->ecxt_scantuple = slot;
+				ResetExprContext(econtext);
+
+				if (!ExecQual(node->bitmapqualorig, econtext))
+				{
+					/* Fails recheck, so drop it and loop back for another */
+					InstrCountFiltered2(node, 1);
+					ExecClearTuple(slot);
+					continue;
+				}
 			}
 		}
 
@@ -573,6 +603,15 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
 #ifdef USE_PREFETCH
 	ParallelBitmapHeapState *pstate = node->pstate;
 
+	if (node->bhs_nofetch)
+	{
+		/*
+		 * If we did not need to fetch the current page,
+		 * we probably will not need to fetch the next.
+		 */
+		return;
+	}
+
 	if (pstate == NULL)
 	{
 		TBMIterator *prefetch_iterator = node->prefetch_iterator;
@@ -778,6 +817,11 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
 	if (node->shared_prefetch_iterator)
 		tbm_end_shared_iterate(node->shared_prefetch_iterator);
 
+	if (node->bhs_vmbuffer != InvalidBuffer)
+	{
+		ReleaseBuffer(node->bhs_vmbuffer);
+	}
+
 	/*
 	 * close heap scan
 	 */
@@ -827,6 +871,8 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 	scanstate->prefetch_iterator = NULL;
 	scanstate->prefetch_pages = 0;
 	scanstate->prefetch_target = 0;
+	scanstate->bhs_vmbuffer = InvalidBuffer;
+	scanstate->bhs_nofetch = false;
 	/* may be updated below */
 	scanstate->prefetch_maximum = target_prefetch_pages;
 	scanstate->pscan_len = 0;
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b35acb7bdc..13fb8d3153 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -982,6 +982,14 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
 	else
 		cost_per_page = spc_random_page_cost;
 
+	/*
+	 * Bitmap heap scan tries not to fetch the heap pages if the targetlist
+	 * is empty. It should be reflected in the cost somehow; for now, just
+	 * halve the number of pages to be fetched.
+	 */
+	if (path->pathtarget->exprs == NIL)
+		pages_fetched = pages_fetched / 2;
+
 	run_cost += pages_fetched * cost_per_page;
 
 	/*
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 5c934f223d..0874871b61 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -778,6 +778,13 @@ use_physical_tlist(PlannerInfo *root, Path *path, int flags)
 		return false;
 
 	/*
+	 * If the tlist is empty, keep it as is, so that the
+	 * bitmap heap scan could try skipping heap page fetches.
+	 */
+	if (path->pathtarget->exprs == NIL)
+		return false;
+
+	/*
 	 * We can do this for real relation scans, subquery scans, function scans,
 	 * tablefunc scans, values scans, and CTE scans (but not for, eg, joins).
 	 */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 35c28a6143..6f567f25e8 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1334,6 +1334,8 @@ typedef struct ParallelBitmapHeapState
  *		shared_tbmiterator	   shared iterator
  *		shared_prefetch_iterator shared iterator for prefetching
  *		pstate			   shared state for parallel bitmap scan
+ * 		bhs_vmbuffer	   buffer for visibility map lookups
+ * 		bhs_nofetch		   can we not fetch the heap page and return an empty tuple?
  * ----------------
  */
 typedef struct BitmapHeapScanState
@@ -1354,6 +1356,8 @@ typedef struct BitmapHeapScanState
 	TBMSharedIterator *shared_tbmiterator;
 	TBMSharedIterator *shared_prefetch_iterator;
 	ParallelBitmapHeapState *pstate;
+	Buffer bhs_vmbuffer;
+	bool bhs_nofetch;
 } BitmapHeapScanState;
 
 /* ----------------
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 9f4c88dab4..2e82ab1ec0 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3746,7 +3746,6 @@ where tt1.f1 = ss1.c0;
                      Output: tt1.f1
                      Filter: (tt1.f1 = 'foo'::text)
                ->  Seq Scan on public.text_tbl tt2
-                     Output: tt2.f1
          ->  Materialize
                Output: tt4.f1
                ->  Nested Loop Left Join
@@ -3765,7 +3764,7 @@ where tt1.f1 = ss1.c0;
                Output: (tt4.f1)
                ->  Seq Scan on public.text_tbl tt5
                      Output: tt4.f1
-(29 rows)
+(28 rows)
 
 select 1 from
   text_tbl as tt1
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index f3696c6d1d..d29c1adaab 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -130,16 +130,17 @@ SELECT DISTINCT p.age FROM person* p ORDER BY age using >;
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT count(*) FROM
   (SELECT DISTINCT two, four, two FROM tenk1) ss;
-                       QUERY PLAN                       
---------------------------------------------------------
+                          QUERY PLAN                          
+--------------------------------------------------------------
  Aggregate
    Output: count(*)
-   ->  HashAggregate
-         Output: tenk1.two, tenk1.four, tenk1.two
-         Group Key: tenk1.two, tenk1.four, tenk1.two
-         ->  Seq Scan on public.tenk1
+   ->  Subquery Scan on ss
+         ->  HashAggregate
                Output: tenk1.two, tenk1.four, tenk1.two
-(7 rows)
+               Group Key: tenk1.two, tenk1.four, tenk1.two
+               ->  Seq Scan on public.tenk1
+                     Output: tenk1.two, tenk1.four, tenk1.two
+(8 rows)
 
 SELECT count(*) FROM
   (SELECT DISTINCT two, four, two FROM tenk1) ss;
diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out
index fc91f3ce36..4c4ee4d808 100644
--- a/src/test/regress/expected/stats.out
+++ b/src/test/regress/expected/stats.out
@@ -136,12 +136,15 @@ SELECT count(*) FROM tenk2;
 (1 row)
 
 -- do an indexscan
+-- disable bitmap scan because it can skip fetching the heap tuples
+SET enable_bitmapscan to OFF;
 SELECT count(*) FROM tenk2 WHERE unique1 = 1;
  count 
 -------
      1
 (1 row)
 
+SET enable_bitmapscan to ON;
 -- We can't just call wait_for_stats() at this point, because we only
 -- transmit stats when the session goes idle, and we probably didn't
 -- transmit the last couple of counts yet thanks to the rate-limiting logic
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index 2090a411fe..fc1b512a35 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -1658,20 +1658,22 @@ UPDATE rw_view1 SET a = a + 5; -- should fail
 ERROR:  new row violates check option for view "rw_view1"
 DETAIL:  Failing row contains (15).
 EXPLAIN (costs off) INSERT INTO rw_view1 VALUES (5);
-                          QUERY PLAN                           
----------------------------------------------------------------
+                      QUERY PLAN                       
+-------------------------------------------------------
  Insert on base_tbl b
    ->  Result
          SubPlan 1
-           ->  Index Only Scan using ref_tbl_pkey on ref_tbl r
-                 Index Cond: (a = b.a)
+           ->  Bitmap Heap Scan on ref_tbl r
+                 Recheck Cond: (a = b.a)
+                 ->  Bitmap Index Scan on ref_tbl_pkey
+                       Index Cond: (a = b.a)
          SubPlan 2
            ->  Seq Scan on ref_tbl r_1
-(7 rows)
+(9 rows)
 
 EXPLAIN (costs off) UPDATE rw_view1 SET a = a + 5;
-                           QUERY PLAN                            
------------------------------------------------------------------
+                      QUERY PLAN                       
+-------------------------------------------------------
  Update on base_tbl b
    ->  Hash Join
          Hash Cond: (b.a = r.a)
@@ -1679,11 +1681,13 @@ EXPLAIN (costs off) UPDATE rw_view1 SET a = a + 5;
          ->  Hash
                ->  Seq Scan on ref_tbl r
          SubPlan 1
-           ->  Index Only Scan using ref_tbl_pkey on ref_tbl r_1
-                 Index Cond: (a = b.a)
+           ->  Bitmap Heap Scan on ref_tbl r_1
+                 Recheck Cond: (a = b.a)
+                 ->  Bitmap Index Scan on ref_tbl_pkey
+                       Index Cond: (a = b.a)
          SubPlan 2
            ->  Seq Scan on ref_tbl r_2
-(11 rows)
+(13 rows)
 
 DROP TABLE base_tbl, ref_tbl CASCADE;
 NOTICE:  drop cascades to view rw_view1
@@ -2024,24 +2028,28 @@ EXPLAIN (costs off) DELETE FROM rw_view1 WHERE id = 1 AND snoop(data);
 DELETE FROM rw_view1 WHERE id = 1 AND snoop(data);
 NOTICE:  snooped value: Row 1
 EXPLAIN (costs off) INSERT INTO rw_view1 VALUES (2, 'New row 2');
-                        QUERY PLAN                         
------------------------------------------------------------
+                       QUERY PLAN                       
+--------------------------------------------------------
  Insert on base_tbl
    InitPlan 1 (returns $0)
-     ->  Index Only Scan using base_tbl_pkey on base_tbl t
-           Index Cond: (id = 2)
+     ->  Bitmap Heap Scan on base_tbl t
+           Recheck Cond: (id = 2)
+           ->  Bitmap Index Scan on base_tbl_pkey
+                 Index Cond: (id = 2)
    ->  Result
          One-Time Filter: ($0 IS NOT TRUE)
  
  Update on base_tbl
    InitPlan 1 (returns $0)
-     ->  Index Only Scan using base_tbl_pkey on base_tbl t
-           Index Cond: (id = 2)
+     ->  Bitmap Heap Scan on base_tbl t
+           Recheck Cond: (id = 2)
+           ->  Bitmap Index Scan on base_tbl_pkey
+                 Index Cond: (id = 2)
    ->  Result
          One-Time Filter: $0
          ->  Index Scan using base_tbl_pkey on base_tbl
                Index Cond: (id = 2)
-(15 rows)
+(19 rows)
 
 INSERT INTO rw_view1 VALUES (2, 'New row 2');
 SELECT * FROM base_tbl;
diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql
index 6e882bf3ac..cec32cff2d 100644
--- a/src/test/regress/sql/stats.sql
+++ b/src/test/regress/sql/stats.sql
@@ -138,7 +138,10 @@ ROLLBACK;
 -- do a seqscan
 SELECT count(*) FROM tenk2;
 -- do an indexscan
+-- disable bitmap scan because it can skip fetching the heap tuples
+SET enable_bitmapscan to OFF;
 SELECT count(*) FROM tenk2 WHERE unique1 = 1;
+SET enable_bitmapscan to ON;
 
 -- We can't just call wait_for_stats() at this point, because we only
 -- transmit stats when the session goes idle, and we probably didn't

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] index-only count(*) for indexes supporting bitmap scans

Reply via email to