Greg Stark <gsst...@mit.edu> writes: > On Sat, May 28, 2011 at 12:01 PM, Tom Lane <t...@sss.pgh.pa.us> wrote: >> I also found that Greg was right in thinking that it would help if we >> tweaked lazy_scan_heap to not always scan the first >> SKIP_PAGES_THRESHOLD-1 pages even if they were >> all_visible_according_to_vm.
> You fixed the logic only for the first 32 pages which helps with the > skew. But really the logic is backwards in general. Instead of > counting how many missed opportunities for skipped pages we've seen in > the past we should read the bits for the next 32 pages in advance and > decide what to do before we read those pages. OK, do you like the attached version of that logic? (Other fragments of the patch as before.) regards, tom lane diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index 9393fa0727aaad7508e1163623322b4066412257..231447b31223bc5350ce49a136cffafaa53bc5fb 100644 *** a/src/backend/commands/vacuumlazy.c --- b/src/backend/commands/vacuumlazy.c *************** lazy_scan_heap(Relation onerel, LVRelSta *** 311,317 **** int i; PGRUsage ru0; Buffer vmbuffer = InvalidBuffer; ! BlockNumber all_visible_streak; pg_rusage_init(&ru0); --- 305,312 ---- int i; PGRUsage ru0; Buffer vmbuffer = InvalidBuffer; ! BlockNumber next_not_all_visible_block; ! bool skipping_all_visible_blocks; pg_rusage_init(&ru0); *************** lazy_scan_heap(Relation onerel, LVRelSta *** 329,340 **** nblocks = RelationGetNumberOfBlocks(onerel); vacrelstats->rel_pages = nblocks; vacrelstats->nonempty_pages = 0; vacrelstats->latestRemovedXid = InvalidTransactionId; lazy_space_alloc(vacrelstats, nblocks); ! all_visible_streak = 0; for (blkno = 0; blkno < nblocks; blkno++) { Buffer buf; --- 324,369 ---- nblocks = RelationGetNumberOfBlocks(onerel); vacrelstats->rel_pages = nblocks; + vacrelstats->scanned_pages = 0; vacrelstats->nonempty_pages = 0; vacrelstats->latestRemovedXid = InvalidTransactionId; lazy_space_alloc(vacrelstats, nblocks); ! /* ! * We want to skip pages that don't require vacuuming according to the ! * visibility map, but only when we can skip at least SKIP_PAGES_THRESHOLD ! * consecutive pages. Since we're reading sequentially, the OS should be ! * doing readahead for us, so there's no gain in skipping a page now and ! * then; that's likely to disable readahead and so be counterproductive. ! * Also, skipping even a single page means that we can't update ! * relfrozenxid, so we only want to do it if we can skip a goodly number ! * of pages. ! * ! * Before entering the main loop, establish the invariant that ! * next_not_all_visible_block is the next block number >= blkno that's ! * not all-visible according to the visibility map, or nblocks if there's ! * no such block. Also, we set up the skipping_all_visible_blocks flag, ! * which is needed because we need hysteresis in the decision: once we've ! * started skipping blocks, we may as well skip everything up to the next ! * not-all-visible block. ! * ! * Note: if scan_all is true, we won't actually skip any pages; but we ! * maintain next_not_all_visible_block anyway, so as to set up the ! * all_visible_according_to_vm flag correctly for each page. ! */ ! for (next_not_all_visible_block = 0; ! next_not_all_visible_block < nblocks; ! next_not_all_visible_block++) ! { ! if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer)) ! break; ! } ! if (next_not_all_visible_block >= SKIP_PAGES_THRESHOLD) ! skipping_all_visible_blocks = true; ! else ! skipping_all_visible_blocks = false; ! for (blkno = 0; blkno < nblocks; blkno++) { Buffer buf; *************** lazy_scan_heap(Relation onerel, LVRelSta *** 347,387 **** OffsetNumber frozen[MaxOffsetNumber]; int nfrozen; Size freespace; ! bool all_visible_according_to_vm = false; bool all_visible; bool has_dead_tuples; ! /* ! * Skip pages that don't require vacuuming according to the visibility ! * map. But only if we've seen a streak of at least ! * SKIP_PAGES_THRESHOLD pages marked as clean. Since we're reading ! * sequentially, the OS should be doing readahead for us and there's ! * no gain in skipping a page now and then. You need a longer run of ! * consecutive skipped pages before it's worthwhile. Also, skipping ! * even a single page means that we can't update relfrozenxid or ! * reltuples, so we only want to do it if there's a good chance to ! * skip a goodly number of pages. ! */ ! if (!scan_all) { ! all_visible_according_to_vm = ! visibilitymap_test(onerel, blkno, &vmbuffer); ! if (all_visible_according_to_vm) { ! all_visible_streak++; ! if (all_visible_streak >= SKIP_PAGES_THRESHOLD) ! { ! vacrelstats->scanned_all = false; ! continue; ! } } else ! all_visible_streak = 0; } vacuum_delay_point(); ! scanned_pages++; /* * If we are close to overrunning the available space for dead-tuple --- 376,419 ---- OffsetNumber frozen[MaxOffsetNumber]; int nfrozen; Size freespace; ! bool all_visible_according_to_vm; bool all_visible; bool has_dead_tuples; ! if (blkno == next_not_all_visible_block) { ! /* Time to advance next_not_all_visible_block */ ! for (next_not_all_visible_block++; ! next_not_all_visible_block < nblocks; ! next_not_all_visible_block++) { ! if (!visibilitymap_test(onerel, next_not_all_visible_block, ! &vmbuffer)) ! break; } + + /* + * We know we can't skip the current block. But set up + * skipping_all_visible_blocks to do the right thing at the + * following blocks. + */ + if (next_not_all_visible_block - blkno > SKIP_PAGES_THRESHOLD) + skipping_all_visible_blocks = true; else ! skipping_all_visible_blocks = false; ! all_visible_according_to_vm = false; ! } ! else ! { ! /* Current block is all-visible */ ! if (skipping_all_visible_blocks && !scan_all) ! continue; ! all_visible_according_to_vm = true; } vacuum_delay_point(); ! vacrelstats->scanned_pages++; /* * If we are close to overrunning the available space for dead-tuple -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers