On Thu, Jan 31, 2019 at 10:18 PM Masahiko Sawada <sawada.m...@gmail.com> wrote: > > Thank you. I'll submit the updated patch set. >
Attached the latest patch set. Regards, -- Masahiko Sawada NIPPON TELEGRAPH AND TELEPHONE CORPORATION NTT Open Source Software Center
From 021a179d7696183394db60aedbd1acb0301ad4b0 Mon Sep 17 00:00:00 2001 From: Masahiko Sawada <sawada.m...@gmail.com> Date: Wed, 23 Jan 2019 16:07:53 +0900 Subject: [PATCH v14 2/2] Add -P option to vacuumdb command --- doc/src/sgml/ref/vacuumdb.sgml | 22 +++++++++++++++ src/bin/scripts/t/100_vacuumdb.pl | 10 ++++++- src/bin/scripts/vacuumdb.c | 58 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 88 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/ref/vacuumdb.sgml b/doc/src/sgml/ref/vacuumdb.sgml index 41c7f3d..95ff132 100644 --- a/doc/src/sgml/ref/vacuumdb.sgml +++ b/doc/src/sgml/ref/vacuumdb.sgml @@ -227,6 +227,28 @@ PostgreSQL documentation </varlistentry> <varlistentry> + <term><option>-P <replaceable class="parameter">workers</replaceable></option></term> + <term><option>--parallel=<replaceable class="parameter">workers</replaceable></option></term> + <listitem> + <para> + Execute parallel vacuum with <productname>PostgreSQL</productname>'s + <replaceable class="parameter">workers</replaceable> background workers. + </para> + <para> + This will require background workers, so make sure your + <xref linkend="guc-max-parallel-workers-maintenance"/> setting is + more than one. + </para> + <note> + <para> + This opton is only available for servers runining + <productname>PostgreSQL</productname> 12 and later. + </para> + </note> + </listitem> + </varlistentry> + + <varlistentry> <term><option>-q</option></term> <term><option>--quiet</option></term> <listitem> diff --git a/src/bin/scripts/t/100_vacuumdb.pl b/src/bin/scripts/t/100_vacuumdb.pl index 7f3a9b1..5683ef6 100644 --- a/src/bin/scripts/t/100_vacuumdb.pl +++ b/src/bin/scripts/t/100_vacuumdb.pl @@ -3,7 +3,7 @@ use warnings; use PostgresNode; use TestLib; -use Test::More tests => 44; +use Test::More tests => 48; program_help_ok('vacuumdb'); program_version_ok('vacuumdb'); @@ -48,6 +48,14 @@ $node->issues_sql_like( $node->command_fails( [ 'vacuumdb', '--analyze-only', '--disable-page-skipping', 'postgres' ], '--analyze-only and --disable-page-skipping specified together'); +$node->issues_sql_like( + [ 'vacuumdb', '-P2', 'postgres' ], + qr/statement: VACUUM \(PARALLEL 2\);/, + 'vacuumdb -P2'); +$node->issues_sql_like( + [ 'vacuumdb', '-P', 'postgres' ], + qr/statement: VACUUM \(PARALLEL\);/, + 'vacuumdb -P'); $node->command_ok([qw(vacuumdb -Z --table=pg_am dbname=template1)], 'vacuumdb with connection string'); diff --git a/src/bin/scripts/vacuumdb.c b/src/bin/scripts/vacuumdb.c index 5ac41ea..2aee18b 100644 --- a/src/bin/scripts/vacuumdb.c +++ b/src/bin/scripts/vacuumdb.c @@ -45,6 +45,8 @@ typedef struct vacuumingOptions bool skip_locked; int min_xid_age; int min_mxid_age; + int parallel_workers; /* -1: disabled, 0: PARALLEL without number of + * workers. */ } vacuumingOptions; @@ -111,6 +113,7 @@ main(int argc, char *argv[]) {"full", no_argument, NULL, 'f'}, {"verbose", no_argument, NULL, 'v'}, {"jobs", required_argument, NULL, 'j'}, + {"parallel", optional_argument, NULL, 'P'}, {"maintenance-db", required_argument, NULL, 2}, {"analyze-in-stages", no_argument, NULL, 3}, {"disable-page-skipping", no_argument, NULL, 4}, @@ -140,6 +143,7 @@ main(int argc, char *argv[]) /* initialize options to all false */ memset(&vacopts, 0, sizeof(vacopts)); + vacopts.parallel_workers = -1; progname = get_progname(argv[0]); @@ -147,7 +151,7 @@ main(int argc, char *argv[]) handle_help_version_opts(argc, argv, "vacuumdb", help); - while ((c = getopt_long(argc, argv, "h:p:U:wWeqd:zZFat:fvj:", long_options, &optindex)) != -1) + while ((c = getopt_long(argc, argv, "h:p:P::U:wWeqd:zZFat:fvj:", long_options, &optindex)) != -1) { switch (c) { @@ -214,6 +218,25 @@ main(int argc, char *argv[]) exit(1); } break; + case 'P': + { + int parallel_workers = 0; + + if (optarg != NULL) + { + parallel_workers = atoi(optarg); + if (parallel_workers <= 0) + { + fprintf(stderr, _("%s: number of parallel workers must be at least 1\n"), + progname); + exit(1); + } + } + + /* allow to set 0, meaning PARALLEL without the parallel degree */ + vacopts.parallel_workers = parallel_workers; + break; + } case 2: maintenance_db = pg_strdup(optarg); break; @@ -288,9 +311,22 @@ main(int argc, char *argv[]) progname, "disable-page-skipping"); exit(1); } + if (vacopts.parallel_workers >= 0) + { + fprintf(stderr, _("%s: cannot use the \"%s\" option when performing only analyze\n"), + progname, "parallel"); + exit(1); + } /* allow 'and_analyze' with 'analyze_only' */ } + if (vacopts.full && vacopts.parallel_workers >= 0) + { + fprintf(stderr, _("%s: cannot use the \"%s\" option with \"%s\" option"), + progname, "full", "parallel"); + exit(1); + } + setup_cancel_handler(); /* Avoid opening extra connections. */ @@ -426,6 +462,14 @@ vacuum_one_database(const char *dbname, vacuumingOptions *vacopts, exit(1); } + if (vacopts->parallel_workers > 0 && PQserverVersion(conn) < 120000) + { + PQfinish(conn); + fprintf(stderr, _("%s: cannot use the \"%s\" option on server versions older than PostgreSQL 12\n"), + progname, "parallel"); + exit(1); + } + if (vacopts->min_xid_age != 0 && PQserverVersion(conn) < 90600) { fprintf(stderr, _("%s: cannot use the \"%s\" option on server versions older than PostgreSQL 9.6\n"), @@ -895,6 +939,17 @@ prepare_vacuum_command(PQExpBuffer sql, int serverVersion, appendPQExpBuffer(sql, "%sANALYZE", sep); sep = comma; } + if (vacopts->parallel_workers >= 0) + { + /* PARALLEL is supported since v12 */ + Assert(serverVersion >= 120000); + if (vacopts->parallel_workers == 0) + appendPQExpBuffer(sql, "%sPARALLEL", sep); + else + appendPQExpBuffer(sql, "%sPARALLEL %d", sep, + vacopts->parallel_workers); + sep = comma; + } if (sep != paren) appendPQExpBufferChar(sql, ')'); } @@ -1227,6 +1282,7 @@ help(const char *progname) printf(_(" -j, --jobs=NUM use this many concurrent connections to vacuum\n")); printf(_(" --min-mxid-age=MXID_AGE minimum multixact ID age of tables to vacuum\n")); printf(_(" --min-xid-age=XID_AGE minimum transaction ID age of tables to vacuum\n")); + printf(_(" -P, --parallel=NUM do parallel vacuuming\n")); printf(_(" -q, --quiet don't write any messages\n")); printf(_(" --skip-locked skip relations that cannot be immediately locked\n")); printf(_(" -t, --table='TABLE[(COLUMNS)]' vacuum specific table(s) only\n")); -- 1.8.3.1
From ae50a69d983db2c6811b08b17918033fcacff40a Mon Sep 17 00:00:00 2001 From: Masahiko Sawada <sawada.m...@gmail.com> Date: Tue, 18 Dec 2018 14:48:34 +0900 Subject: [PATCH v14 1/2] Add parallel option to VACUUM command In parallel vacuum, we do both index vacuum and cleanup vacuum in parallel with parallel worker processes if the table has more than one index. All processes including the leader process process indexes one by one. Parallel vacuum can be performed by specifying like VACUUM (PARALLEL 2) tbl, meaning that performing vacuum with 2 parallel worker processes. Or the setting parallel_workers reloption more than 0 invokes parallel vacuum. The parallel vacuum degree is limited by both the number of indexes the table has and max_parallel_maintenance_workers. --- doc/src/sgml/config.sgml | 25 +- doc/src/sgml/ref/vacuum.sgml | 28 ++ src/backend/access/heap/vacuumlazy.c | 897 +++++++++++++++++++++++++++++----- src/backend/access/transam/parallel.c | 4 + src/backend/commands/vacuum.c | 79 +-- src/backend/nodes/copyfuncs.c | 16 +- src/backend/nodes/equalfuncs.c | 13 +- src/backend/parser/gram.y | 72 ++- src/backend/postmaster/autovacuum.c | 14 +- src/backend/tcop/utility.c | 4 +- src/include/access/heapam.h | 5 +- src/include/commands/vacuum.h | 2 +- src/include/nodes/nodes.h | 1 + src/include/nodes/parsenodes.h | 20 +- src/test/regress/expected/vacuum.out | 2 + src/test/regress/sql/vacuum.sql | 3 + 16 files changed, 977 insertions(+), 208 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index b6f5822..1bd1edd 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -2185,18 +2185,19 @@ include_dir 'conf.d' <listitem> <para> Sets the maximum number of parallel workers that can be - started by a single utility command. Currently, the only - parallel utility command that supports the use of parallel - workers is <command>CREATE INDEX</command>, and only when - building a B-tree index. Parallel workers are taken from the - pool of processes established by <xref - linkend="guc-max-worker-processes"/>, limited by <xref - linkend="guc-max-parallel-workers"/>. Note that the requested - number of workers may not actually be available at run time. - If this occurs, the utility operation will run with fewer - workers than expected. The default value is 2. Setting this - value to 0 disables the use of parallel workers by utility - commands. + started by a single utility command. Currently, the parallel + utility commands that support the use of parallel workers are + <command>CREATE INDEX</command> only when building a B-tree + index and <command>VACUUM</command> without + <literal>FULL</literal> option. Parallel workers are taken + from the pool of processes established by + <xref linkend="guc-max-worker-processes"/>, + limited by <xref linkend="guc-max-parallel-workers"/>. + Note that the requested number of workers may not actually be + available at run time. If this occurs, the utility operation + will run with fewer workers than expected. The default value + is 2. Setting this value to 0 disables the use of parallel + workers by utility commands. </para> <para> diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml index fd911f5..3edc623 100644 --- a/doc/src/sgml/ref/vacuum.sgml +++ b/doc/src/sgml/ref/vacuum.sgml @@ -30,6 +30,7 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ ANALYZE ] [ <replaceable class="paramet FREEZE VERBOSE ANALYZE + PARALLEL [ <replaceable class="parameter">N</replaceable> ] DISABLE_PAGE_SKIPPING SKIP_LOCKED @@ -143,6 +144,24 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ ANALYZE ] [ <replaceable class="paramet </varlistentry> <varlistentry> + <term><literal>PARALLEL <replaceable class="parameter">N</replaceable></literal></term> + <listitem> + <para> + Vacuum index and cleanup index in parallel + <replaceable class="parameter">N</replaceable> background workers (for the detail + of each vacuum phases, please refer to <xref linkend="vacuum-phases"/>). If the + parallel degree <replaceable class="parameter">N</replaceable> is omitted, + then <command>VACUUM</command> decides the number of workers based on + number of indexes on the relation which further limited by + <xref linkend="guc-max-parallel-workers-maintenance"/>. Also if this option + is specified multile times, the last parallel degree + <replaceable class="parameter">N</replaceable> is considered into the account. + This option can not use with <literal>FULL</literal> option. + </para> + </listitem> + </varlistentry> + + <varlistentry> <term><literal>DISABLE_PAGE_SKIPPING</literal></term> <listitem> <para> @@ -261,6 +280,15 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ ANALYZE ] [ <replaceable class="paramet </para> <para> + Setting a value for <literal>parallel_workers</literal> via + <xref linkend="sql-altertable"/> also controls how many parallel + worker processes will be requested by a <command>VACUUM</command> + against the table. This setting is overwritten by setting + <replaceable class="parameter">N</replaceable> of <literal>PARALLEL</literal> + option. + </para> + + <para> <command>VACUUM</command> causes a substantial increase in I/O traffic, which might cause poor performance for other active sessions. Therefore, it is sometimes advisable to use the cost-based vacuum delay feature. diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 37aa484..e534022 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -22,6 +22,19 @@ * of index scans performed. So we don't use maintenance_work_mem memory for * the TID array, just enough to hold as many heap tuples as fit on one page. * + * Lazy vacuum supports parallel execution with parallel worker processes. In + * parallel vacuum, we perform both index vacuuming and index cleanup in + * parallel. Individual indexes is processed by one vacuum process. At beginning + * of lazy vacuum (it lazy_scan_heap) we prepare the parallel context and + * initialize the shared memory segments that contains shared information as + * well as the memory space for dead tuples. When starting either index vacuuming + * or index cleanup, we launch parallel worker processes. Once all indexes are + * processed the parallel worker processes exit and the leader process + * re-initializes the shared memory segment. Note that all parallel workers live + * during one either index vacuuming or index cleanup but the leader process neither + * exits from the parallel mode nor destories the parallel context. For updating + * the index statistics, since any updates are not allowed during parallel mode + * we update the index statistics after exited from the parallel mode. * * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -41,8 +54,10 @@ #include "access/heapam_xlog.h" #include "access/htup_details.h" #include "access/multixact.h" +#include "access/parallel.h" #include "access/transam.h" #include "access/visibilitymap.h" +#include "access/xact.h" #include "access/xlog.h" #include "catalog/storage.h" #include "commands/dbcommands.h" @@ -55,6 +70,7 @@ #include "storage/bufmgr.h" #include "storage/freespace.h" #include "storage/lmgr.h" +#include "tcop/tcopprot.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/pg_rusage.h" @@ -110,10 +126,79 @@ */ #define PREFETCH_SIZE ((BlockNumber) 32) +/* DSM keys for parallel lazy vacuum */ +#define PARALLEL_VACUUM_KEY_SHARED UINT64CONST(0xFFFFFFFFFFF00001) +#define PARALLEL_VACUUM_KEY_DEAD_TUPLES UINT64CONST(0xFFFFFFFFFFF00002) +#define PARALLEL_VACUUM_KEY_QUERY_TEXT UINT64CONST(0xFFFFFFFFFFF00003) + +/* + * Structs for an index bulk-deletion statistic that is used for parallel + * lazy vacuum. This is allocated in a dynamic shared memory segment. + */ +typedef struct LVIndStats +{ + bool updated; /* is the stats updated? */ + IndexBulkDeleteResult stats; +} LVIndStats; + +/* + * LVDeadTuples controls the dead tuple TIDs collected during heap scan. + * This is allocated in a dynamic shared memory segment when parallel + * lazy vacuum mode, or allocated in a local memory. + */ +typedef struct LVDeadTuples +{ + int max_tuples; /* # slots allocated in array */ + int num_tuples; /* current # of entries */ + /* List of TIDs of tuples we intend to delete */ + /* NB: this list is ordered by TID address */ + ItemPointerData itemptrs[FLEXIBLE_ARRAY_MEMBER]; /* array of ItemPointerData */ +} LVDeadTuples; +#define SizeOfLVDeadTuples offsetof(LVDeadTuples, itemptrs) + sizeof(ItemPointerData) + +/* + * Status for parallel index vacuuming and index cleanup. This is allocated in + * a dynamic shared memory segment. + */ +typedef struct LVShared +{ + /* + * Target table relid and vacuum settings. These fields are not modified + * during the lazy vacuum. + */ + Oid relid; + bool is_wraparound; + int elevel; + + /* + * An indication for vacuum workers of doing either index vacuuming or + * index cleanup. + */ + bool for_cleanup; + + /* + * Fields for index vacuuming and index cleanup, which are necessary for + * IndexVacuumInfo. + * + * reltuples is the total number of input heap tuples. We set either an + * old live tuples in vacuum index or th new live tuples in index cleanup. + * + * estimated_count is true if the reltuples is estimated value. + */ + double reltuples; + bool estimated_count; + + /* + * Variables to control parallel index vacuuming. An variable-sized field + * 'indstats' must come last. + */ + pg_atomic_uint32 nprocessed; + LVIndStats indstats[FLEXIBLE_ARRAY_MEMBER]; +} LVShared; +#define SizeOfLVShared offsetof(LVShared, indstats) + sizeof(LVIndStats) + typedef struct LVRelStats { - /* hasindex = true means two-pass strategy; false means one-pass */ - bool hasindex; /* Overall statistics about rel */ BlockNumber old_rel_pages; /* previous value of pg_class.relpages */ BlockNumber rel_pages; /* total number of pages */ @@ -128,16 +213,35 @@ typedef struct LVRelStats BlockNumber pages_removed; double tuples_deleted; BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */ - /* List of TIDs of tuples we intend to delete */ - /* NB: this list is ordered by TID address */ - int num_dead_tuples; /* current # of entries */ - int max_dead_tuples; /* # slots allocated in array */ - ItemPointer dead_tuples; /* array of ItemPointerData */ int num_index_scans; TransactionId latestRemovedXid; bool lock_waiter_detected; } LVRelStats; +/* + * Working state for lazy heap vacuum execution used by only leader process. + * This is present only in the leader process. In parallel lazy vacuum, the + * 'lvshared' and 'pcxt' are not NULL and they point to the dynamic shared + * memory segment. + */ +typedef struct LVState +{ + Relation relation; + LVRelStats *vacrelstats; + Relation *indRels; + /* nindexes > 0 means two-pass strategy; false means one-pass */ + int nindexes; + + /* Lazy vacuum options and scan status */ + VacuumOptions *options; + bool is_wraparound; + bool aggressive; + bool parallel_ready; /* true if parallel vacuum is prepared */ + + /* Variables for parallel lazy index vacuuming */ + LVShared *lvshared; + ParallelContext *pcxt; +} LVState; /* A few variables that don't seem worth passing around as parameters */ static int elevel = -1; @@ -150,31 +254,43 @@ static BufferAccessStrategy vac_strategy; /* non-export function prototypes */ -static void lazy_scan_heap(Relation onerel, int options, - LVRelStats *vacrelstats, Relation *Irel, int nindexes, - bool aggressive); -static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats); +static void lazy_scan_heap(LVState *lvstate); +static void lazy_vacuum_heap(LVState *lvstate, LVDeadTuples *dead_tuples); static bool lazy_check_needs_freeze(Buffer buf, bool *hastup); -static void lazy_vacuum_index(Relation indrel, - IndexBulkDeleteResult **stats, - LVRelStats *vacrelstats); -static void lazy_cleanup_index(Relation indrel, - IndexBulkDeleteResult *stats, - LVRelStats *vacrelstats); -static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, - int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer); +static IndexBulkDeleteResult *lazy_vacuum_index(Relation indrel, + IndexBulkDeleteResult *stats, + double reltuples, + LVDeadTuples *dead_tuples); +static IndexBulkDeleteResult *lazy_cleanup_index(Relation indrel, + IndexBulkDeleteResult *stats, + double reltuples, bool estimated_count, + bool update_stats); +static int lazy_vacuum_page(LVState *lvstate, Relation onerel, BlockNumber blkno, + Buffer buffer, int tupindex, Buffer *vmbuffer, + TransactionId latestRemovedXid, LVDeadTuples *dead_tuples); static bool should_attempt_truncation(LVRelStats *vacrelstats); static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats); static BlockNumber count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats); -static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks); -static void lazy_record_dead_tuple(LVRelStats *vacrelstats, - ItemPointer itemptr); +static LVDeadTuples *lazy_space_alloc(LVState *lvstate, BlockNumber relblocks, + int parallel_workers); +static void lazy_record_dead_tuple(LVDeadTuples *dead_tuples, ItemPointer itemptr); static bool lazy_tid_reaped(ItemPointer itemptr, void *state); static int vac_cmp_itemptr(const void *left, const void *right); static bool heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid, bool *all_frozen); - +static LVDeadTuples *lazy_prepare_parallel(LVState *lvstate, long maxtuples, int request); +static void lazy_end_parallel(LVState *lvstate, bool update_indstats); +static bool lazy_begin_parallel_vacuum_index(LVState *lvstate, bool for_cleanup); +static void lazy_end_parallel_vacuum_index(LVState *lvstate); +static void lazy_vacuum_all_indexes(LVState *lvstate, + IndexBulkDeleteResult **stats, + LVDeadTuples *dead_tuples, + bool for_cleanup); +static void lazy_vacuum_indexes_for_worker(Relation *indrels, int nindexes, + LVShared *lvshared, LVDeadTuples *dead_tuples, + bool for_cleanup); +static int compute_parallel_workers(Relation rel, int nrequests, int nindexes); /* * heap_vacuum_rel() -- perform VACUUM for one heap relation @@ -186,9 +302,10 @@ static bool heap_page_is_all_visible(Relation rel, Buffer buf, * and locked the relation. */ void -heap_vacuum_rel(Relation onerel, int options, VacuumParams *params, +heap_vacuum_rel(Relation onerel, VacuumOptions *options, VacuumParams *params, BufferAccessStrategy bstrategy) { + LVState *lvstate; LVRelStats *vacrelstats; Relation *Irel; int nindexes; @@ -200,6 +317,7 @@ heap_vacuum_rel(Relation onerel, int options, VacuumParams *params, write_rate; bool aggressive; /* should we scan all unfrozen pages? */ bool scanned_all_unfrozen; /* actually scanned all such pages? */ + bool hasindex; TransactionId xidFullScanLimit; MultiXactId mxactFullScanLimit; BlockNumber new_rel_pages; @@ -217,7 +335,7 @@ heap_vacuum_rel(Relation onerel, int options, VacuumParams *params, starttime = GetCurrentTimestamp(); } - if (options & VACOPT_VERBOSE) + if (options->flags & VACOPT_VERBOSE) elevel = INFO; else elevel = DEBUG2; @@ -245,7 +363,7 @@ heap_vacuum_rel(Relation onerel, int options, VacuumParams *params, xidFullScanLimit); aggressive |= MultiXactIdPrecedesOrEquals(onerel->rd_rel->relminmxid, mxactFullScanLimit); - if (options & VACOPT_DISABLE_PAGE_SKIPPING) + if (options->flags & VACOPT_DISABLE_PAGE_SKIPPING) aggressive = true; vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats)); @@ -258,10 +376,23 @@ heap_vacuum_rel(Relation onerel, int options, VacuumParams *params, /* Open all indexes of the relation */ vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel); - vacrelstats->hasindex = (nindexes > 0); + hasindex = (nindexes > 0); + + /* Create a lazy vacuum working state */ + lvstate = (LVState *) palloc0(sizeof(LVState)); + lvstate->relation = onerel; + lvstate->vacrelstats = vacrelstats; + lvstate->indRels = Irel; + lvstate->nindexes = nindexes; + lvstate->options = options; + lvstate->is_wraparound = params->is_wraparound; + lvstate->aggressive = aggressive; + lvstate->parallel_ready = false; + lvstate->lvshared = NULL; + lvstate->pcxt = NULL; /* Do the vacuuming */ - lazy_scan_heap(onerel, options, vacrelstats, Irel, nindexes, aggressive); + lazy_scan_heap(lvstate); /* Done with indexes */ vac_close_indexes(nindexes, Irel, NoLock); @@ -332,7 +463,7 @@ heap_vacuum_rel(Relation onerel, int options, VacuumParams *params, new_rel_pages, new_live_tuples, new_rel_allvisible, - vacrelstats->hasindex, + hasindex, new_frozen_xid, new_min_multi, false); @@ -464,14 +595,29 @@ vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats) * dead-tuple TIDs, invoke vacuuming of indexes and call lazy_vacuum_heap * to reclaim dead line pointers. * + * If the table has more than one index and parallel lazy vacuum is requested, + * we execute both index vacuuming and index cleanup with parallel workers. When + * allocating the space for lazy scan heap, we enter the parallel mode, create + * the parallel context and initailize a dynamic shared memory segment for dead + * tuples. The dead_tuples points either to a dynamic shared memory segment in + * parallel vacuum case or to a local memory in single process vacuum case. + * Before starting parallel index vacuuming and parallel index cleanup we launch + * parallel workers. All parallel workers will exit after processed all indexes + * and the leader process re-initialize parallel context and then re-launch them + * at the next execution. The index statistics are updated by the leader after + * exited from the parallel mode since all writes are not allowed during the + * parallel mode. + * * If there are no indexes then we can reclaim line pointers on the fly; * dead line pointers need only be retained until all index pointers that * reference them have been killed. */ static void -lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, - Relation *Irel, int nindexes, bool aggressive) +lazy_scan_heap(LVState *lvstate) { + Relation onerel = lvstate->relation; + LVRelStats *vacrelstats = lvstate->vacrelstats; + LVDeadTuples *dead_tuples = NULL; BlockNumber nblocks, blkno; HeapTupleData tuple; @@ -486,7 +632,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, tups_vacuumed, /* tuples cleaned up by vacuum */ nkeep, /* dead-but-not-removable tuples */ nunused; /* unused item pointers */ - IndexBulkDeleteResult **indstats; + IndexBulkDeleteResult **indstats = NULL; int i; PGRUsage ru0; Buffer vmbuffer = InvalidBuffer; @@ -494,6 +640,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, bool skipping_blocks; xl_heap_freeze_tuple *frozen; StringInfoData buf; + int parallel_workers = 0; const int initprog_index[] = { PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_TOTAL_HEAP_BLKS, @@ -504,7 +651,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, pg_rusage_init(&ru0); relname = RelationGetRelationName(onerel); - if (aggressive) + if (lvstate->aggressive) ereport(elevel, (errmsg("aggressively vacuuming \"%s.%s\"", get_namespace_name(RelationGetNamespace(onerel)), @@ -519,9 +666,6 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, next_fsm_block_to_vacuum = (BlockNumber) 0; num_tuples = live_tuples = tups_vacuumed = nkeep = nunused = 0; - indstats = (IndexBulkDeleteResult **) - palloc0(nindexes * sizeof(IndexBulkDeleteResult *)); - nblocks = RelationGetNumberOfBlocks(onerel); vacrelstats->rel_pages = nblocks; vacrelstats->scanned_pages = 0; @@ -529,13 +673,36 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, vacrelstats->nonempty_pages = 0; vacrelstats->latestRemovedXid = InvalidTransactionId; - lazy_space_alloc(vacrelstats, nblocks); + /* + * Compute the number of parallel vacuum worker to request and then enable + * parallel lazy vacuum. + */ + if ((lvstate->options->flags & VACOPT_PARALLEL) != 0) + parallel_workers = compute_parallel_workers(lvstate->relation, + lvstate->options->nworkers, + lvstate->nindexes); + + /* + * Allocate memory space for lazy vacuum. If parallel_workers > 0, we + * prepare for parallel vacuum, entering the parallel mode, initializing + * a dynamic shared memory segment. + */ + dead_tuples = lazy_space_alloc(lvstate, nblocks, parallel_workers); frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage); + /* + * allocate the memory for index bulkdelete results if in the single vacuum + * mode. In parallel mode, we've already prepared it in the shared memory + * segment. + */ + if (!lvstate->parallel_ready) + indstats = (IndexBulkDeleteResult **) + palloc0(lvstate->nindexes * sizeof(IndexBulkDeleteResult *)); + /* Report that we're scanning the heap, advertising total # of blocks */ initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP; initprog_val[1] = nblocks; - initprog_val[2] = vacrelstats->max_dead_tuples; + initprog_val[2] = dead_tuples->max_tuples; pgstat_progress_update_multi_param(3, initprog_index, initprog_val); /* @@ -583,7 +750,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, * be replayed on any hot standby, where it can be disruptive. */ next_unskippable_block = 0; - if ((options & VACOPT_DISABLE_PAGE_SKIPPING) == 0) + if ((lvstate->options->flags & VACOPT_DISABLE_PAGE_SKIPPING) == 0) { while (next_unskippable_block < nblocks) { @@ -591,7 +758,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, vmstatus = visibilitymap_get_status(onerel, next_unskippable_block, &vmbuffer); - if (aggressive) + if (lvstate->aggressive) { if ((vmstatus & VISIBILITYMAP_ALL_FROZEN) == 0) break; @@ -638,7 +805,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, { /* Time to advance next_unskippable_block */ next_unskippable_block++; - if ((options & VACOPT_DISABLE_PAGE_SKIPPING) == 0) + if ((lvstate->options->flags & VACOPT_DISABLE_PAGE_SKIPPING) == 0) { while (next_unskippable_block < nblocks) { @@ -647,7 +814,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, vmskipflags = visibilitymap_get_status(onerel, next_unskippable_block, &vmbuffer); - if (aggressive) + if (lvstate->aggressive) { if ((vmskipflags & VISIBILITYMAP_ALL_FROZEN) == 0) break; @@ -676,7 +843,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, * it's not all-visible. But in an aggressive vacuum we know only * that it's not all-frozen, so it might still be all-visible. */ - if (aggressive && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer)) + if (lvstate->aggressive && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer)) all_visible_according_to_vm = true; } else @@ -700,7 +867,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, * know whether it was all-frozen, so we have to recheck; but * in this case an approximate answer is OK. */ - if (aggressive || VM_ALL_FROZEN(onerel, blkno, &vmbuffer)) + if (lvstate->aggressive || VM_ALL_FROZEN(onerel, blkno, &vmbuffer)) vacrelstats->frozenskipped_pages++; continue; } @@ -713,8 +880,8 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, * If we are close to overrunning the available space for dead-tuple * TIDs, pause and do a cycle of vacuuming before we tackle this page. */ - if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage && - vacrelstats->num_dead_tuples > 0) + if ((dead_tuples->max_tuples - dead_tuples->num_tuples) < MaxHeapTuplesPerPage && + dead_tuples->num_tuples > 0) { const int hvp_index[] = { PROGRESS_VACUUM_PHASE, @@ -742,10 +909,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, PROGRESS_VACUUM_PHASE_VACUUM_INDEX); /* Remove index entries */ - for (i = 0; i < nindexes; i++) - lazy_vacuum_index(Irel[i], - &indstats[i], - vacrelstats); + lazy_vacuum_all_indexes(lvstate, indstats, dead_tuples, false); /* * Report that we are now vacuuming the heap. We also increase @@ -758,14 +922,14 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, pgstat_progress_update_multi_param(2, hvp_index, hvp_val); /* Remove tuples from heap */ - lazy_vacuum_heap(onerel, vacrelstats); + lazy_vacuum_heap(lvstate, dead_tuples); /* * Forget the now-vacuumed tuples, and press on, but be careful * not to reset latestRemovedXid since we want that value to be * valid. */ - vacrelstats->num_dead_tuples = 0; + dead_tuples->num_tuples = 0; vacrelstats->num_index_scans++; /* @@ -803,7 +967,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, * it's OK to skip vacuuming pages we get a lock conflict on. They * will be dealt with in some future vacuum. */ - if (!aggressive && !FORCE_CHECK_PAGE()) + if (!lvstate->aggressive && !FORCE_CHECK_PAGE()) { ReleaseBuffer(buf); vacrelstats->pinskipped_pages++; @@ -836,7 +1000,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, vacrelstats->nonempty_pages = blkno + 1; continue; } - if (!aggressive) + if (!lvstate->aggressive) { /* * Here, we must not advance scanned_pages; that would amount @@ -955,7 +1119,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, has_dead_tuples = false; nfrozen = 0; hastup = false; - prev_dead_count = vacrelstats->num_dead_tuples; + prev_dead_count = dead_tuples->num_tuples; maxoff = PageGetMaxOffsetNumber(page); /* @@ -994,7 +1158,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, */ if (ItemIdIsDead(itemid)) { - lazy_record_dead_tuple(vacrelstats, &(tuple.t_self)); + lazy_record_dead_tuple(dead_tuples, &(tuple.t_self)); all_visible = false; continue; } @@ -1134,7 +1298,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, if (tupgone) { - lazy_record_dead_tuple(vacrelstats, &(tuple.t_self)); + lazy_record_dead_tuple(dead_tuples, &(tuple.t_self)); HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data, &vacrelstats->latestRemovedXid); tups_vacuumed += 1; @@ -1203,11 +1367,12 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, * If there are no indexes then we can vacuum the page right now * instead of doing a second scan. */ - if (nindexes == 0 && - vacrelstats->num_dead_tuples > 0) + if (lvstate->nindexes == 0 && dead_tuples->num_tuples > 0) { /* Remove tuples from heap */ - lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats, &vmbuffer); + lazy_vacuum_page(lvstate, onerel, blkno, buf, 0, &vmbuffer, + lvstate->vacrelstats->latestRemovedXid, + dead_tuples); has_dead_tuples = false; /* @@ -1215,7 +1380,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, * not to reset latestRemovedXid since we want that value to be * valid. */ - vacrelstats->num_dead_tuples = 0; + dead_tuples->num_tuples = 0; vacuumed_pages++; /* @@ -1331,7 +1496,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, * page, so remember its free space as-is. (This path will always be * taken if there are no indexes.) */ - if (vacrelstats->num_dead_tuples == prev_dead_count) + if (dead_tuples->num_tuples == prev_dead_count) RecordPageWithFreeSpace(onerel, blkno, freespace); } @@ -1365,7 +1530,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, /* If any tuples need to be deleted, perform final vacuum cycle */ /* XXX put a threshold on min number of tuples here? */ - if (vacrelstats->num_dead_tuples > 0) + if (dead_tuples->num_tuples > 0) { const int hvp_index[] = { PROGRESS_VACUUM_PHASE, @@ -1381,10 +1546,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, PROGRESS_VACUUM_PHASE_VACUUM_INDEX); /* Remove index entries */ - for (i = 0; i < nindexes; i++) - lazy_vacuum_index(Irel[i], - &indstats[i], - vacrelstats); + lazy_vacuum_all_indexes(lvstate, indstats, dead_tuples, false); /* Report that we are now vacuuming the heap */ hvp_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_HEAP; @@ -1394,7 +1556,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, /* Remove tuples from heap */ pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_VACUUM_HEAP); - lazy_vacuum_heap(onerel, vacrelstats); + lazy_vacuum_heap(lvstate, dead_tuples); vacrelstats->num_index_scans++; } @@ -1411,8 +1573,10 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, PROGRESS_VACUUM_PHASE_INDEX_CLEANUP); /* Do post-vacuum cleanup and statistics update for each index */ - for (i = 0; i < nindexes; i++) - lazy_cleanup_index(Irel[i], indstats[i], vacrelstats); + lazy_vacuum_all_indexes(lvstate, indstats, dead_tuples, true); + + if (lvstate->parallel_ready) + lazy_end_parallel(lvstate, true); /* If no indexes, make log report that lazy_vacuum_heap would've made */ if (vacuumed_pages) @@ -1467,8 +1631,9 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, * process index entry removal in batches as large as possible. */ static void -lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) +lazy_vacuum_heap(LVState *lvstate, LVDeadTuples *dead_tuples) { + Relation onerel = lvstate->relation; int tupindex; int npages; PGRUsage ru0; @@ -1478,7 +1643,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) npages = 0; tupindex = 0; - while (tupindex < vacrelstats->num_dead_tuples) + while (tupindex < dead_tuples->num_tuples) { BlockNumber tblk; Buffer buf; @@ -1487,7 +1652,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) vacuum_delay_point(); - tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]); + tblk = ItemPointerGetBlockNumber(&dead_tuples->itemptrs[tupindex]); buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL, vac_strategy); if (!ConditionalLockBufferForCleanup(buf)) @@ -1496,8 +1661,9 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) ++tupindex; continue; } - tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats, - &vmbuffer); + tupindex = lazy_vacuum_page(lvstate, onerel, tblk, buf, tupindex, + &vmbuffer, lvstate->vacrelstats->latestRemovedXid, + dead_tuples); /* Now that we've compacted the page, record its available space */ page = BufferGetPage(buf); @@ -1532,8 +1698,9 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) * The return value is the first tupindex after the tuples of this page. */ static int -lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, - int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer) +lazy_vacuum_page(LVState *lvstate, Relation onerel, BlockNumber blkno, + Buffer buffer, int tupindex, Buffer *vmbuffer, + TransactionId latestRemovedXid, LVDeadTuples *dead_tuples) { Page page = BufferGetPage(buffer); OffsetNumber unused[MaxOffsetNumber]; @@ -1545,16 +1712,16 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, START_CRIT_SECTION(); - for (; tupindex < vacrelstats->num_dead_tuples; tupindex++) + for (; tupindex < dead_tuples->num_tuples; tupindex++) { BlockNumber tblk; OffsetNumber toff; ItemId itemid; - tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]); + tblk = ItemPointerGetBlockNumber(&dead_tuples->itemptrs[tupindex]); if (tblk != blkno) break; /* past end of tuples for this block */ - toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]); + toff = ItemPointerGetOffsetNumber(&dead_tuples->itemptrs[tupindex]); itemid = PageGetItemId(page, toff); ItemIdSetUnused(itemid); unused[uncnt++] = toff; @@ -1575,7 +1742,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, recptr = log_heap_clean(onerel, buffer, NULL, 0, NULL, 0, unused, uncnt, - vacrelstats->latestRemovedXid); + latestRemovedXid); PageSetLSN(page, recptr); } @@ -1674,6 +1841,98 @@ lazy_check_needs_freeze(Buffer buf, bool *hastup) return false; } +/* + * Vacuum or cleanup all indexes. If we're ready for the parallel vacuum it's + * done with parallel workers. So this function must be used by the parallel + * vacuum leader process. + * + * In parallel lazy vacuum, we copy the index bulk-deletion results returned + * from ambulkdelete and amvacuumcleanup to the shared memory because they are + * allocated in the local memory and it's possible that an index will be + * vacuumed by the different vacuum process at the next time. + * + * Since all vacuum workers process different indexes we can write them without + * locking. + */ +static void +lazy_vacuum_all_indexes(LVState *lvstate, IndexBulkDeleteResult **stats, + LVDeadTuples *dead_tuples, bool for_cleanup) +{ + LVShared *lvshared = lvstate->lvshared; + LVRelStats *vacrelstats = lvstate->vacrelstats; + int nprocessed = 0; + bool do_parallel = false; + int idx; + + Assert(!IsParallelWorker()); + Assert(lvstate->parallel_ready || + (!lvstate->parallel_ready && stats != NULL)); + + /* no job if the table has no index */ + if (lvstate->nindexes <= 0) + return; + + if (lvstate->parallel_ready) + do_parallel = lazy_begin_parallel_vacuum_index(lvstate, for_cleanup); + + for (;;) + { + IndexBulkDeleteResult *result = NULL; + + /* Get the next index number to vacuum and set index statistics */ + if (do_parallel) + { + idx = pg_atomic_fetch_add_u32(&(lvshared->nprocessed), 1); + + /* + * If there is already-updated result in the shared memory we + * use it. Otherwise we pass NULL to index AMs as they expect + * NULL for the first time exectuion. + */ + if (lvshared->indstats[idx].updated) + result = &(lvshared->indstats[idx].stats); + } + else + { + idx = nprocessed++; + result = stats[idx]; + } + + /* Done for all indexes? */ + if (idx >= lvstate->nindexes) + break; + + /* + * Vacuum or cleanup one index. For index cleanup, we don't update + * index statistics during parallel mode. + */ + if (for_cleanup) + result = lazy_cleanup_index(lvstate->indRels[idx], result, + vacrelstats->new_rel_tuples, + vacrelstats->tupcount_pages < vacrelstats->rel_pages, + !do_parallel); + else + result = lazy_vacuum_index(lvstate->indRels[idx], result, + vacrelstats->old_rel_pages, + dead_tuples); + + if (do_parallel && result) + { + /* + * Save index bulk-deletion result to the shared memory space if + * this is the first time. + */ + if (!lvshared->indstats[idx].updated) + memcpy(&(lvshared->indstats[idx].stats), result, + sizeof(IndexBulkDeleteResult)); + + lvshared->indstats[idx].updated = true; + } + } + + if (do_parallel) + lazy_end_parallel_vacuum_index(lvstate); +} /* * lazy_vacuum_index() -- vacuum one index relation. @@ -1681,11 +1940,11 @@ lazy_check_needs_freeze(Buffer buf, bool *hastup) * Delete all the index entries pointing to tuples listed in * vacrelstats->dead_tuples, and update running statistics. */ -static void -lazy_vacuum_index(Relation indrel, - IndexBulkDeleteResult **stats, - LVRelStats *vacrelstats) +static IndexBulkDeleteResult * +lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult *stats, + double reltuples, LVDeadTuples *dead_tuples) { + IndexBulkDeleteResult *res; IndexVacuumInfo ivinfo; PGRUsage ru0; @@ -1695,28 +1954,29 @@ lazy_vacuum_index(Relation indrel, ivinfo.analyze_only = false; ivinfo.estimated_count = true; ivinfo.message_level = elevel; - /* We can only provide an approximate value of num_heap_tuples here */ - ivinfo.num_heap_tuples = vacrelstats->old_live_tuples; + ivinfo.num_heap_tuples = reltuples; ivinfo.strategy = vac_strategy; /* Do bulk deletion */ - *stats = index_bulk_delete(&ivinfo, *stats, - lazy_tid_reaped, (void *) vacrelstats); + res = index_bulk_delete(&ivinfo, stats, + lazy_tid_reaped, (void *) dead_tuples); ereport(elevel, - (errmsg("scanned index \"%s\" to remove %d row versions", + (errmsg("scanned index \"%s\" to remove %d row versions %s", RelationGetRelationName(indrel), - vacrelstats->num_dead_tuples), + dead_tuples->num_tuples, + IsParallelWorker() ? "by parallel vacuum worker" : ""), errdetail_internal("%s", pg_rusage_show(&ru0)))); + + return res; } /* * lazy_cleanup_index() -- do post-vacuum cleanup for one index relation. */ -static void -lazy_cleanup_index(Relation indrel, - IndexBulkDeleteResult *stats, - LVRelStats *vacrelstats) +static IndexBulkDeleteResult * +lazy_cleanup_index(Relation indrel, IndexBulkDeleteResult *stats, + double reltuples, bool estimated_count, bool update_stats) { IndexVacuumInfo ivinfo; PGRUsage ru0; @@ -1725,27 +1985,21 @@ lazy_cleanup_index(Relation indrel, ivinfo.index = indrel; ivinfo.analyze_only = false; - ivinfo.estimated_count = (vacrelstats->tupcount_pages < vacrelstats->rel_pages); + ivinfo.estimated_count = estimated_count; ivinfo.message_level = elevel; - - /* - * Now we can provide a better estimate of total number of surviving - * tuples (we assume indexes are more interested in that than in the - * number of nominally live tuples). - */ - ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples; + ivinfo.num_heap_tuples = reltuples; ivinfo.strategy = vac_strategy; stats = index_vacuum_cleanup(&ivinfo, stats); if (!stats) - return; + return NULL; /* * Now update statistics in pg_class, but only if the index says the count * is accurate. */ - if (!stats->estimated_count) + if (!stats->estimated_count && update_stats) vac_update_relstats(indrel, stats->num_pages, stats->num_index_tuples, @@ -1766,8 +2020,7 @@ lazy_cleanup_index(Relation indrel, stats->tuples_removed, stats->pages_deleted, stats->pages_free, pg_rusage_show(&ru0)))); - - pfree(stats); + return stats; } /* @@ -2077,15 +2330,16 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats) * * See the comments at the head of this file for rationale. */ -static void -lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks) +static LVDeadTuples * +lazy_space_alloc(LVState *lvstate, BlockNumber relblocks, int parallel_workers) { + LVDeadTuples *dead_tuples = NULL; long maxtuples; int vac_work_mem = IsAutoVacuumWorkerProcess() && autovacuum_work_mem != -1 ? autovacuum_work_mem : maintenance_work_mem; - if (vacrelstats->hasindex) + if (lvstate->nindexes > 0) { maxtuples = (vac_work_mem * 1024L) / sizeof(ItemPointerData); maxtuples = Min(maxtuples, INT_MAX); @@ -2099,34 +2353,45 @@ lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks) maxtuples = Max(maxtuples, MaxHeapTuplesPerPage); } else - { maxtuples = MaxHeapTuplesPerPage; + + /* + * In parallel lazy vacuum, we enter the parallel mode and prepare all + * memory necessary for executing the parallel lazy vacuum including the + * space to store dead tuples. + */ + if (parallel_workers > 0) + { + dead_tuples = lazy_prepare_parallel(lvstate, maxtuples, parallel_workers); + Assert(dead_tuples != NULL); + + return dead_tuples; } - vacrelstats->num_dead_tuples = 0; - vacrelstats->max_dead_tuples = (int) maxtuples; - vacrelstats->dead_tuples = (ItemPointer) - palloc(maxtuples * sizeof(ItemPointerData)); + dead_tuples = (LVDeadTuples *) palloc(SizeOfLVDeadTuples + maxtuples * sizeof(ItemPointerData)); + dead_tuples->num_tuples = 0; + dead_tuples->max_tuples = (int) maxtuples; + + return dead_tuples; } /* * lazy_record_dead_tuple - remember one deletable tuple */ static void -lazy_record_dead_tuple(LVRelStats *vacrelstats, - ItemPointer itemptr) +lazy_record_dead_tuple(LVDeadTuples *dead_tuples, ItemPointer itemptr) { /* * The array shouldn't overflow under normal behavior, but perhaps it * could if we are given a really small maintenance_work_mem. In that * case, just forget the last few tuples (we'll get 'em next time). */ - if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples) + if (dead_tuples->num_tuples < dead_tuples->max_tuples) { - vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr; - vacrelstats->num_dead_tuples++; + dead_tuples->itemptrs[dead_tuples->num_tuples] = *itemptr; + dead_tuples->num_tuples++; pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES, - vacrelstats->num_dead_tuples); + dead_tuples->num_tuples); } } @@ -2140,12 +2405,12 @@ lazy_record_dead_tuple(LVRelStats *vacrelstats, static bool lazy_tid_reaped(ItemPointer itemptr, void *state) { - LVRelStats *vacrelstats = (LVRelStats *) state; + LVDeadTuples *dead_tuples = (LVDeadTuples *) state; ItemPointer res; res = (ItemPointer) bsearch((void *) itemptr, - (void *) vacrelstats->dead_tuples, - vacrelstats->num_dead_tuples, + (void *) dead_tuples->itemptrs, + dead_tuples->num_tuples, sizeof(ItemPointerData), vac_cmp_itemptr); @@ -2293,3 +2558,387 @@ heap_page_is_all_visible(Relation rel, Buffer buf, return all_visible; } + +/* + * Compute the number of parallel worker process to request. Vacuums can be + * executed in parallel if the table has more than one index since the parallel + * index vacuuming processes one index by one vacuum process. The relation + * size of table and indexes doesn't affect to the parallel degree. + */ +static int +compute_parallel_workers(Relation rel, int nrequests, int nindexes) +{ + int parallel_workers; + + if (nindexes <= 1) + return 0; + + if (nrequests) + parallel_workers = Min(nrequests, nindexes - 1); + else if (rel->rd_options) + { + StdRdOptions *relopts = (StdRdOptions *) rel->rd_options; + parallel_workers = Min(relopts->parallel_workers, nindexes - 1); + } + else + { + /* + * The parallel degree is neither requested nor set in relopts. Compute + * it based on the number of indexes. + */ + parallel_workers = nindexes - 1; + } + + /* cap by max_parallel_maintenace_workers */ + parallel_workers = Min(parallel_workers, max_parallel_maintenance_workers); + + return parallel_workers; +} + +/* + * Enter the parallel mode, allocate and initialize a DSM segment. Return + * the memory space for storing dead tuples. + */ +static LVDeadTuples * +lazy_prepare_parallel(LVState *lvstate, long maxtuples, int request) +{ + LVShared *shared; + ParallelContext *pcxt; + LVDeadTuples *tidmap; + char *sharedquery; + Size estshared; + Size estdt; + int querylen; + int i; + int keys = 0; + + Assert(request > 0); + + EnterParallelMode(); + + pcxt = CreateParallelContext("postgres", "heap_parallel_vacuum_main", + request, true); + /* + * nworkers must be prepared as we always request at least one worker + * and allow serializable isolation. + */ + Assert(pcxt->nworkers > 0); + lvstate->pcxt = pcxt; + + /* quick exit if no workers are prepared, e.g. under serializable isolation */ + if (pcxt->nworkers == 0) + { + lazy_end_parallel(lvstate, false); + return NULL; + } + + /* Estimate size for shared information -- PARALLEL_VACUUM_KEY_SHARED */ + estshared = MAXALIGN(add_size(SizeOfLVShared, + mul_size(sizeof(LVIndStats), lvstate->nindexes))); + shm_toc_estimate_chunk(&pcxt->estimator, estshared); + keys++; + + /* Estimate size for dead tuples -- PARALLEL_VACUUM_KEY_DEAD_TUPLES */ + estdt = MAXALIGN(add_size(sizeof(LVDeadTuples), + mul_size(sizeof(ItemPointerData), maxtuples))); + shm_toc_estimate_chunk(&pcxt->estimator, estdt); + keys++; + + shm_toc_estimate_keys(&pcxt->estimator, keys); + + /* + * Finally, estimate VACUUM_KEY_QUERY_TEXT space. Auto vacuums don't have + * debug_query_string. + */ + querylen = strlen(debug_query_string); + shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1); + shm_toc_estimate_keys(&pcxt->estimator, 1); + + /* create the DSM */ + InitializeParallelDSM(pcxt); + + /* prepare shared information */ + shared = (LVShared *) shm_toc_allocate(pcxt->toc, estshared); + shared->relid = RelationGetRelid(lvstate->relation); + shared->is_wraparound = lvstate->is_wraparound; + shared->elevel = elevel; + pg_atomic_init_u32(&(shared->nprocessed), 0); + + for (i = 0; i < lvstate->nindexes; i++) + { + LVIndStats *s = &(shared->indstats[i]); + s->updated = false; + MemSet(&(s->stats), 0, sizeof(IndexBulkDeleteResult)); + } + + shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_SHARED, shared); + lvstate->lvshared = shared; + + /* prepare the dead tuple space */ + tidmap = (LVDeadTuples *) shm_toc_allocate(pcxt->toc, estdt); + tidmap->max_tuples = maxtuples; + tidmap->num_tuples = 0; + MemSet(tidmap->itemptrs, 0, sizeof(ItemPointerData) * maxtuples); + shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_TUPLES, tidmap); + + /* Store query string for workers */ + sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1); + memcpy(sharedquery, debug_query_string, querylen + 1); + sharedquery[querylen] = '\0'; + shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, sharedquery); + + /* All setup is done, now we're ready for parallel vacuum execution */ + lvstate->parallel_ready = true; + + return tidmap; +} + +/* + * Shutdown workers, destroy parallel context, and end parallel mode. If + * 'update_indstats' is true, we copy statistics of all indexes before + * destroying the parallel context, and then update them after exit parallel + * mode. + */ +static void +lazy_end_parallel(LVState *lvstate, bool update_indstats) +{ + LVIndStats *copied_indstats = NULL; + + Assert(!IsParallelWorker()); + + if (update_indstats && lvstate->nindexes > 0) + { + /* copy the index statistics to a temporary space */ + copied_indstats = palloc(sizeof(LVIndStats) * lvstate->nindexes); + memcpy(copied_indstats, lvstate->lvshared->indstats, + sizeof(LVIndStats) * lvstate->nindexes); + } + + /* Shutdown worker processes and destroy the parallel context */ + WaitForParallelWorkersToFinish(lvstate->pcxt); + DestroyParallelContext(lvstate->pcxt); + ExitParallelMode(); + + if (copied_indstats) + { + int i; + + for (i = 0; i < lvstate->nindexes; i++) + { + LVIndStats *s = &(copied_indstats[i]); + + /* Update index statistics */ + if (s->updated && !s->stats.estimated_count) + vac_update_relstats(lvstate->indRels[i], + s->stats.num_pages, + s->stats.num_index_tuples, + 0, + false, + InvalidTransactionId, + InvalidMultiXactId, + false); + } + + pfree(copied_indstats); + } + + lvstate->parallel_ready = false; +} + +/* + * Begin a parallel index vacuuming or index cleanup. Set shared information + * and launch parallel worker processes. Return true if at least one worker + * has been launched. + */ +static bool +lazy_begin_parallel_vacuum_index(LVState *lvstate, bool for_cleanup) +{ + LVRelStats *vacrelstats = lvstate->vacrelstats; + StringInfoData buf; + + Assert(!IsParallelWorker()); + + /* + * Request workers to do either vacuuming indexes or cleaning indexes. + */ + lvstate->lvshared->for_cleanup = for_cleanup; + + if (for_cleanup) + { + /* + * Now we can provide a better estimate of total number of surviving + * tuples (we assume indexes are more interested in that than in the + * number of nominally live tuples). + */ + lvstate->lvshared->reltuples = vacrelstats->new_rel_tuples; + lvstate->lvshared->estimated_count = + (vacrelstats->tupcount_pages < vacrelstats->rel_pages); + } + else + { + /* We can only provide an approximate value of num_heap_tuples here */ + lvstate->lvshared->reltuples = vacrelstats->old_live_tuples; + lvstate->lvshared->estimated_count = true; + } + + LaunchParallelWorkers(lvstate->pcxt); + + /* Report parallel vacuum worker information */ + initStringInfo(&buf); + appendStringInfo(&buf, + ngettext("launched %d parallel vacuum worker %s (planned: %d", + "launched %d parallel vacuum workers %s (planned: %d", + lvstate->pcxt->nworkers_launched), + lvstate->pcxt->nworkers_launched, + for_cleanup ? "for index cleanup" : "for index vacuuming", + lvstate->pcxt->nworkers); + if (lvstate->options->nworkers > 0) + appendStringInfo(&buf, ", requested %d", lvstate->options->nworkers); + + appendStringInfo(&buf, ")"); + ereport(elevel, (errmsg("%s", buf.data))); + + /* + * if no workers launched, we vacuum all indexes by the leader process + * alone. Since there is hope that we can launch workers in the next + * execution time we don't want to end the parallel mode yet. + */ + if (lvstate->pcxt->nworkers_launched == 0) + { + lazy_end_parallel_vacuum_index(lvstate); + return false; + } + + WaitForParallelWorkersToAttach(lvstate->pcxt); + + return true; +} + +/* + * Wait for all worker processes to finish and reinitialize DSM for + * the next execution. + */ +static void +lazy_end_parallel_vacuum_index(LVState *lvstate) +{ + Assert(!IsParallelWorker()); + + WaitForParallelWorkersToFinish(lvstate->pcxt); + + /* Reset the processing count */ + pg_atomic_write_u32(&(lvstate->lvshared->nprocessed), 0); + + /* + * Reinitialize the DSM space except to relaunch parallel workers for + * the next execution. + */ + ReinitializeParallelDSM(lvstate->pcxt); +} + +/* + * Perform work within a launched parallel process. + * + * Parallel vacuum worker processes doesn't report the vacuum progress + * information. + */ +void +heap_parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) +{ + Relation onerel; + Relation *indrels; + LVShared *lvshared; + LVDeadTuples *dead_tuples; + int nindexes; + char *sharedquery; + + /* Set lazy vacuum state and open relations */ + lvshared = (LVShared *) shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_SHARED, false); + onerel = heap_open(lvshared->relid, ShareUpdateExclusiveLock); + vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &indrels); + elevel = lvshared->elevel; + + ereport(DEBUG1, + (errmsg("starting parallel lazy vacuum worker for %s", + lvshared->for_cleanup ? "cleanup" : "vacuuming"))); + + /* Set debug_query_string for individual workers */ + sharedquery = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, true); + + /* Report the query string from leader */ + debug_query_string = sharedquery; + pgstat_report_activity(STATE_RUNNING, debug_query_string); + + /* Set dead tuple space within worker */ + dead_tuples = (LVDeadTuples *) shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_DEAD_TUPLES, false); + + /* Set cost-based vacuum delay */ + VacuumCostActive = (VacuumCostDelay > 0); + VacuumCostBalance = 0; + VacuumPageHit = 0; + VacuumPageMiss = 0; + VacuumPageDirty = 0; + + /* Do either vacuuming indexes or cleaning indexes */ + lazy_vacuum_indexes_for_worker(indrels, nindexes, lvshared, + dead_tuples, + lvshared->for_cleanup); + + vac_close_indexes(nindexes, indrels, RowExclusiveLock); + heap_close(onerel, ShareUpdateExclusiveLock); +} + +/* + * Vacuum or cleanup indexes. This function must be used by the parallel vacuum + * worker processes. Similar to the leader process in parallel lazy vacuum, we + * copy the index bulk-deletion results to the shared memory segment. + */ +static void +lazy_vacuum_indexes_for_worker(Relation *indrels, int nindexes, + LVShared *lvshared, LVDeadTuples *dead_tuples, + bool for_cleanup) +{ + int idx = 0; + + Assert(IsParallelWorker()); + + for (;;) + { + IndexBulkDeleteResult *result = NULL; + + /* Get next index to process */ + idx = pg_atomic_fetch_add_u32(&(lvshared->nprocessed), 1); + + /* Done for all indexes? */ + if (idx >= nindexes) + break; + + /* + * If there is already-updated result in the shared memory we + * use it. Otherwise we pass NULL to index AMs as they expect + * NULL for the first time exectuion. + */ + if (lvshared->indstats[idx].updated) + result = &(lvshared->indstats[idx].stats); + + /* Vacuum or cleanup one index */ + if (for_cleanup) + result = lazy_cleanup_index(indrels[idx], result, lvshared->reltuples, + lvshared->estimated_count, false); + else + result = lazy_vacuum_index(indrels[idx], result, lvshared->reltuples, + dead_tuples); + + if (result) + { + /* + * Save index bulk-deletion result to the shared memory space if + * this is the first time. + */ + if (!lvshared->indstats[idx].updated) + memcpy(&(lvshared->indstats[idx].stats), result, + sizeof(IndexBulkDeleteResult)); + + lvshared->indstats[idx].updated = true; + } + } +} diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c index ce2b616..fb1e951 100644 --- a/src/backend/access/transam/parallel.c +++ b/src/backend/access/transam/parallel.c @@ -14,6 +14,7 @@ #include "postgres.h" +#include "access/heapam.h" #include "access/nbtree.h" #include "access/parallel.h" #include "access/session.h" @@ -138,6 +139,9 @@ static const struct }, { "_bt_parallel_build_main", _bt_parallel_build_main + }, + { + "heap_parallel_vacuum_main", heap_parallel_vacuum_main } }; diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index e91df21..1b64f15 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -67,13 +67,13 @@ static BufferAccessStrategy vac_strategy; /* non-export function prototypes */ -static List *expand_vacuum_rel(VacuumRelation *vrel, int options); -static List *get_all_vacuum_rels(int options); +static List *expand_vacuum_rel(VacuumRelation *vrel, VacuumOptions *options); +static List *get_all_vacuum_rels(VacuumOptions *options); static void vac_truncate_clog(TransactionId frozenXID, MultiXactId minMulti, TransactionId lastSaneFrozenXid, MultiXactId lastSaneMinMulti); -static bool vacuum_rel(Oid relid, RangeVar *relation, int options, +static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumOptions *options, VacuumParams *params); /* @@ -88,15 +88,15 @@ ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel) VacuumParams params; /* sanity checks on options */ - Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE)); - Assert((vacstmt->options & VACOPT_VACUUM) || - !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE))); - Assert(!(vacstmt->options & VACOPT_SKIPTOAST)); + Assert(vacstmt->options->flags & (VACOPT_VACUUM | VACOPT_ANALYZE)); + Assert((vacstmt->options->flags & VACOPT_VACUUM) || + !(vacstmt->options->flags & (VACOPT_FULL | VACOPT_FREEZE))); + Assert(!(vacstmt->options->flags & VACOPT_SKIPTOAST)); /* * Make sure VACOPT_ANALYZE is specified if any column lists are present. */ - if (!(vacstmt->options & VACOPT_ANALYZE)) + if (!(vacstmt->options->flags & VACOPT_ANALYZE)) { ListCell *lc; @@ -111,11 +111,17 @@ ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel) } } + if ((vacstmt->options->flags & VACOPT_FULL) && + (vacstmt->options->flags & VACOPT_PARALLEL)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot specify FULL option with PARALLEL option"))); + /* * All freeze ages are zero if the FREEZE option is given; otherwise pass * them as -1 which means to use the default values. */ - if (vacstmt->options & VACOPT_FREEZE) + if (vacstmt->options->flags & VACOPT_FREEZE) { params.freeze_min_age = 0; params.freeze_table_age = 0; @@ -143,7 +149,7 @@ ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel) /* * Internal entry point for VACUUM and ANALYZE commands. * - * options is a bitmask of VacuumOption flags, indicating what to do. + * options is a VacuumOptions, indicating what to do. * * relations, if not NIL, is a list of VacuumRelation to process; otherwise, * we process all relevant tables in the database. For each VacuumRelation, @@ -162,7 +168,7 @@ ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel) * memory context that will not disappear at transaction commit. */ void -vacuum(int options, List *relations, VacuumParams *params, +vacuum(VacuumOptions *options, List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, bool isTopLevel) { static bool in_vacuum = false; @@ -173,7 +179,7 @@ vacuum(int options, List *relations, VacuumParams *params, Assert(params != NULL); - stmttype = (options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE"; + stmttype = (options->flags & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE"; /* * We cannot run VACUUM inside a user transaction block; if we were inside @@ -183,7 +189,7 @@ vacuum(int options, List *relations, VacuumParams *params, * * ANALYZE (without VACUUM) can run either way. */ - if (options & VACOPT_VACUUM) + if (options->flags & VACOPT_VACUUM) { PreventInTransactionBlock(isTopLevel, stmttype); in_outer_xact = false; @@ -205,8 +211,8 @@ vacuum(int options, List *relations, VacuumParams *params, /* * Sanity check DISABLE_PAGE_SKIPPING option. */ - if ((options & VACOPT_FULL) != 0 && - (options & VACOPT_DISABLE_PAGE_SKIPPING) != 0) + if ((options->flags & VACOPT_FULL) != 0 && + (options->flags & VACOPT_DISABLE_PAGE_SKIPPING) != 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL"))); @@ -215,7 +221,7 @@ vacuum(int options, List *relations, VacuumParams *params, * Send info about dead objects to the statistics collector, unless we are * in autovacuum --- autovacuum.c does this for itself. */ - if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess()) + if ((options->flags & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess()) pgstat_vacuum_stat(); /* @@ -280,11 +286,11 @@ vacuum(int options, List *relations, VacuumParams *params, * transaction block, and also in an autovacuum worker, use own * transactions so we can release locks sooner. */ - if (options & VACOPT_VACUUM) + if (options->flags & VACOPT_VACUUM) use_own_xacts = true; else { - Assert(options & VACOPT_ANALYZE); + Assert(options->flags & VACOPT_ANALYZE); if (IsAutoVacuumWorkerProcess()) use_own_xacts = true; else if (in_outer_xact) @@ -334,13 +340,13 @@ vacuum(int options, List *relations, VacuumParams *params, { VacuumRelation *vrel = lfirst_node(VacuumRelation, cur); - if (options & VACOPT_VACUUM) + if (options->flags & VACOPT_VACUUM) { if (!vacuum_rel(vrel->oid, vrel->relation, options, params)) continue; } - if (options & VACOPT_ANALYZE) + if (options->flags & VACOPT_ANALYZE) { /* * If using separate xacts, start one for analyze. Otherwise, @@ -353,7 +359,7 @@ vacuum(int options, List *relations, VacuumParams *params, PushActiveSnapshot(GetTransactionSnapshot()); } - analyze_rel(vrel->oid, vrel->relation, options, params, + analyze_rel(vrel->oid, vrel->relation, options->flags, params, vrel->va_cols, in_outer_xact, vac_strategy); if (use_own_xacts) @@ -389,7 +395,7 @@ vacuum(int options, List *relations, VacuumParams *params, StartTransactionCommand(); } - if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess()) + if ((options->flags & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess()) { /* * Update pg_database.datfrozenxid, and truncate pg_xact if possible. @@ -602,7 +608,7 @@ vacuum_open_relation(Oid relid, RangeVar *relation, VacuumParams *params, * are made in vac_context. */ static List * -expand_vacuum_rel(VacuumRelation *vrel, int options) +expand_vacuum_rel(VacuumRelation *vrel, VacuumOptions *options) { List *vacrels = NIL; MemoryContext oldcontext; @@ -634,7 +640,7 @@ expand_vacuum_rel(VacuumRelation *vrel, int options) * below, as well as find_all_inheritors's expectation that the caller * holds some lock on the starting relation. */ - rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0; + rvr_opts = (options->flags & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0; relid = RangeVarGetRelidExtended(vrel->relation, AccessShareLock, rvr_opts, @@ -646,7 +652,7 @@ expand_vacuum_rel(VacuumRelation *vrel, int options) */ if (!OidIsValid(relid)) { - if (options & VACOPT_VACUUM) + if (options->flags & VACOPT_VACUUM) ereport(WARNING, (errcode(ERRCODE_LOCK_NOT_AVAILABLE), errmsg("skipping vacuum of \"%s\" --- lock not available", @@ -672,7 +678,7 @@ expand_vacuum_rel(VacuumRelation *vrel, int options) * Make a returnable VacuumRelation for this rel if user is a proper * owner. */ - if (vacuum_is_relation_owner(relid, classForm, options)) + if (vacuum_is_relation_owner(relid, classForm, options->flags)) { oldcontext = MemoryContextSwitchTo(vac_context); vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation, @@ -741,7 +747,7 @@ expand_vacuum_rel(VacuumRelation *vrel, int options) * the current database. The list is built in vac_context. */ static List * -get_all_vacuum_rels(int options) +get_all_vacuum_rels(VacuumOptions *options) { List *vacrels = NIL; Relation pgclass; @@ -759,7 +765,7 @@ get_all_vacuum_rels(int options) Oid relid = classForm->oid; /* check permissions of relation */ - if (!vacuum_is_relation_owner(relid, classForm, options)) + if (!vacuum_is_relation_owner(relid, classForm, options->flags)) continue; /* @@ -1520,7 +1526,8 @@ vac_truncate_clog(TransactionId frozenXID, * At entry and exit, we are not inside a transaction. */ static bool -vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params) +vacuum_rel(Oid relid, RangeVar *relation, VacuumOptions *options, + VacuumParams *params) { LOCKMODE lmode; Relation onerel; @@ -1541,7 +1548,7 @@ vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params) */ PushActiveSnapshot(GetTransactionSnapshot()); - if (!(options & VACOPT_FULL)) + if (!(options->flags & VACOPT_FULL)) { /* * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets @@ -1581,10 +1588,10 @@ vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params) * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either * way, we can be sure that no other backend is vacuuming the same table. */ - lmode = (options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock; + lmode = (options->flags & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock; /* open the relation and get the appropriate lock on it */ - onerel = vacuum_open_relation(relid, relation, params, options, lmode); + onerel = vacuum_open_relation(relid, relation, params, options->flags, lmode); /* leave if relation could not be opened or locked */ if (!onerel) @@ -1604,7 +1611,7 @@ vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params) */ if (!vacuum_is_relation_owner(RelationGetRelid(onerel), onerel->rd_rel, - options & VACOPT_VACUUM)) + options->flags & VACOPT_VACUUM)) { relation_close(onerel, lmode); PopActiveSnapshot(); @@ -1676,7 +1683,7 @@ vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params) * us to process it. In VACUUM FULL, though, the toast table is * automatically rebuilt by cluster_rel so we shouldn't recurse to it. */ - if (!(options & VACOPT_SKIPTOAST) && !(options & VACOPT_FULL)) + if (!(options->flags & VACOPT_SKIPTOAST) && !(options->flags & VACOPT_FULL)) toast_relid = onerel->rd_rel->reltoastrelid; else toast_relid = InvalidOid; @@ -1695,7 +1702,7 @@ vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params) /* * Do the actual work --- either FULL or "lazy" vacuum */ - if (options & VACOPT_FULL) + if (options->flags & VACOPT_FULL) { int cluster_options = 0; @@ -1703,7 +1710,7 @@ vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params) relation_close(onerel, NoLock); onerel = NULL; - if ((options & VACOPT_VERBOSE) != 0) + if ((options->flags & VACOPT_VERBOSE) != 0) cluster_options |= CLUOPT_VERBOSE; /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */ diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index b44ead2..9e576e0 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -3842,12 +3842,23 @@ _copyDropdbStmt(const DropdbStmt *from) return newnode; } +static VacuumOptions * +_copyVacuumOptions(const VacuumOptions *from) +{ + VacuumOptions *newnode = makeNode(VacuumOptions); + + COPY_SCALAR_FIELD(flags); + COPY_SCALAR_FIELD(nworkers); + + return newnode; +} + static VacuumStmt * _copyVacuumStmt(const VacuumStmt *from) { VacuumStmt *newnode = makeNode(VacuumStmt); - COPY_SCALAR_FIELD(options); + COPY_NODE_FIELD(options); COPY_NODE_FIELD(rels); return newnode; @@ -5320,6 +5331,9 @@ copyObjectImpl(const void *from) case T_DropdbStmt: retval = _copyDropdbStmt(from); break; + case T_VacuumOptions: + retval = _copyVacuumOptions(from); + break; case T_VacuumStmt: retval = _copyVacuumStmt(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 1e169e0..011a25f 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -1666,9 +1666,18 @@ _equalDropdbStmt(const DropdbStmt *a, const DropdbStmt *b) } static bool +_equalVacuumOptions(const VacuumOptions *a, const VacuumOptions *b) +{ + COMPARE_SCALAR_FIELD(flags); + COMPARE_SCALAR_FIELD(nworkers); + + return true; +} + +static bool _equalVacuumStmt(const VacuumStmt *a, const VacuumStmt *b) { - COMPARE_SCALAR_FIELD(options); + COMPARE_NODE_FIELD(options); COMPARE_NODE_FIELD(rels); return true; @@ -3385,6 +3394,8 @@ equal(const void *a, const void *b) case T_DropdbStmt: retval = _equalDropdbStmt(a, b); break; + case T_VacuumOptions: + retval = _equalVacuumOptions(a, b); case T_VacuumStmt: retval = _equalVacuumStmt(a, b); break; diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index c1faf41..d2cd4a2 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -187,6 +187,7 @@ static void processCASbits(int cas_bits, int location, const char *constrType, bool *deferrable, bool *initdeferred, bool *not_valid, bool *no_inherit, core_yyscan_t yyscanner); static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); +static VacuumOptions *makeVacOpt(VacuumFlag flag, int nworkers); %} @@ -237,6 +238,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); struct ImportQual *importqual; InsertStmt *istmt; VariableSetStmt *vsetstmt; + VacuumOptions *vacopt; PartitionElem *partelem; PartitionSpec *partspec; PartitionBoundSpec *partboundspec; @@ -305,8 +307,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); create_extension_opt_item alter_extension_opt_item %type <ival> opt_lock lock_type cast_context -%type <ival> vacuum_option_list vacuum_option_elem - analyze_option_list analyze_option_elem +%type <vacopt> vacuum_option_list vacuum_option_elem +%type <ival> analyze_option_list analyze_option_elem %type <boolean> opt_or_replace opt_grant_grant_option opt_grant_admin_option opt_nowait opt_if_exists opt_with_data @@ -10430,22 +10432,23 @@ cluster_index_specification: VacuumStmt: VACUUM opt_full opt_freeze opt_verbose opt_analyze opt_vacuum_relation_list { VacuumStmt *n = makeNode(VacuumStmt); - n->options = VACOPT_VACUUM; + n->options = makeVacOpt(VACOPT_VACUUM, 0); if ($2) - n->options |= VACOPT_FULL; + n->options->flags |= VACOPT_FULL; if ($3) - n->options |= VACOPT_FREEZE; + n->options->flags |= VACOPT_FREEZE; if ($4) - n->options |= VACOPT_VERBOSE; + n->options->flags |= VACOPT_VERBOSE; if ($5) - n->options |= VACOPT_ANALYZE; + n->options->flags |= VACOPT_ANALYZE; n->rels = $6; $$ = (Node *)n; } | VACUUM '(' vacuum_option_list ')' opt_vacuum_relation_list { VacuumStmt *n = makeNode(VacuumStmt); - n->options = VACOPT_VACUUM | $3; + n->options = $3; + n->options->flags |= VACOPT_VACUUM; n->rels = $5; $$ = (Node *) n; } @@ -10453,20 +10456,40 @@ VacuumStmt: VACUUM opt_full opt_freeze opt_verbose opt_analyze opt_vacuum_relati vacuum_option_list: vacuum_option_elem { $$ = $1; } - | vacuum_option_list ',' vacuum_option_elem { $$ = $1 | $3; } + | vacuum_option_list ',' vacuum_option_elem + { + VacuumOptions *vacopt1 = $1; + VacuumOptions *vacopt2 = $3; + + vacopt1->flags |= vacopt2->flags; + if (vacopt2->flags == VACOPT_PARALLEL) + vacopt1->nworkers = vacopt2->nworkers; + pfree(vacopt2); + $$ = vacopt1; + } ; vacuum_option_elem: - analyze_keyword { $$ = VACOPT_ANALYZE; } - | VERBOSE { $$ = VACOPT_VERBOSE; } - | FREEZE { $$ = VACOPT_FREEZE; } - | FULL { $$ = VACOPT_FULL; } + analyze_keyword { $$ = makeVacOpt(VACOPT_ANALYZE, 0); } + | VERBOSE { $$ = makeVacOpt(VACOPT_VERBOSE, 0); } + | FREEZE { $$ = makeVacOpt(VACOPT_FREEZE, 0); } + | FULL { $$ = makeVacOpt(VACOPT_FULL, 0); } + | PARALLEL { $$ = makeVacOpt(VACOPT_PARALLEL, 0); } + | PARALLEL ICONST + { + if ($2 < 1) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("parallel vacuum degree must be more than 1"), + parser_errposition(@1))); + $$ = makeVacOpt(VACOPT_PARALLEL, $2); + } | IDENT { if (strcmp($1, "disable_page_skipping") == 0) - $$ = VACOPT_DISABLE_PAGE_SKIPPING; + $$ = makeVacOpt(VACOPT_DISABLE_PAGE_SKIPPING, 0); else if (strcmp($1, "skip_locked") == 0) - $$ = VACOPT_SKIP_LOCKED; + $$ = makeVacOpt(VACOPT_SKIP_LOCKED, 0); else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -10478,16 +10501,16 @@ vacuum_option_elem: AnalyzeStmt: analyze_keyword opt_verbose opt_vacuum_relation_list { VacuumStmt *n = makeNode(VacuumStmt); - n->options = VACOPT_ANALYZE; + n->options = makeVacOpt(VACOPT_ANALYZE, 0); if ($2) - n->options |= VACOPT_VERBOSE; + n->options->flags |= VACOPT_VERBOSE; n->rels = $3; $$ = (Node *)n; } | analyze_keyword '(' analyze_option_list ')' opt_vacuum_relation_list { VacuumStmt *n = makeNode(VacuumStmt); - n->options = VACOPT_ANALYZE | $3; + n->options = makeVacOpt(VACOPT_ANALYZE | $3, 0); n->rels = $5; $$ = (Node *) n; } @@ -15985,6 +16008,19 @@ makeXmlExpr(XmlExprOp op, char *name, List *named_args, List *args, return (Node *) x; } + +/* + * Create a VacuumOptions with the given options. + */ +static VacuumOptions * +makeVacOpt(VacuumFlag flag, int nworkers) +{ + VacuumOptions *vacopt = makeNode(VacuumOptions); + + vacopt->flags = flag; + vacopt->nworkers = nworkers; + return vacopt; +} /* * Merge the input and output parameters of a table function. */ diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index d1177b3..22ec846 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -187,8 +187,8 @@ typedef struct av_relation typedef struct autovac_table { Oid at_relid; - int at_vacoptions; /* bitmask of VacuumOption */ - VacuumParams at_params; + VacuumOptions *at_vacoptions; + VacuumParams at_params; int at_vacuum_cost_delay; int at_vacuum_cost_limit; bool at_dobalance; @@ -2481,7 +2481,7 @@ do_autovacuum(void) * next table in our list. */ HOLD_INTERRUPTS(); - if (tab->at_vacoptions & VACOPT_VACUUM) + if (tab->at_vacoptions->flags & VACOPT_VACUUM) errcontext("automatic vacuum of table \"%s.%s.%s\"", tab->at_datname, tab->at_nspname, tab->at_relname); else @@ -2882,10 +2882,12 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, tab = palloc(sizeof(autovac_table)); tab->at_relid = relid; tab->at_sharedrel = classForm->relisshared; - tab->at_vacoptions = VACOPT_SKIPTOAST | + tab->at_vacoptions = makeNode(VacuumOptions); + tab->at_vacoptions->flags = VACOPT_SKIPTOAST | (dovacuum ? VACOPT_VACUUM : 0) | (doanalyze ? VACOPT_ANALYZE : 0) | (!wraparound ? VACOPT_SKIP_LOCKED : 0); + tab->at_vacoptions->nworkers = 0; /* parallel lazy vacuum is not supported */ tab->at_params.freeze_min_age = freeze_min_age; tab->at_params.freeze_table_age = freeze_table_age; tab->at_params.multixact_freeze_min_age = multixact_freeze_min_age; @@ -3131,10 +3133,10 @@ autovac_report_activity(autovac_table *tab) int len; /* Report the command and possible options */ - if (tab->at_vacoptions & VACOPT_VACUUM) + if (tab->at_vacoptions->flags & VACOPT_VACUUM) snprintf(activity, MAX_AUTOVAC_ACTIV_LEN, "autovacuum: VACUUM%s", - tab->at_vacoptions & VACOPT_ANALYZE ? " ANALYZE" : ""); + tab->at_vacoptions->flags & VACOPT_ANALYZE ? " ANALYZE" : ""); else snprintf(activity, MAX_AUTOVAC_ACTIV_LEN, "autovacuum: ANALYZE"); diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 6ec795f..a735ff9 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -664,7 +664,7 @@ standard_ProcessUtility(PlannedStmt *pstmt, VacuumStmt *stmt = (VacuumStmt *) parsetree; /* we choose to allow this during "read only" transactions */ - PreventCommandDuringRecovery((stmt->options & VACOPT_VACUUM) ? + PreventCommandDuringRecovery((stmt->options->flags & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE"); /* forbidden in parallel mode due to CommandIsReadOnly */ ExecVacuum(stmt, isTopLevel); @@ -2570,7 +2570,7 @@ CreateCommandTag(Node *parsetree) break; case T_VacuumStmt: - if (((VacuumStmt *) parsetree)->options & VACOPT_VACUUM) + if (((VacuumStmt *) parsetree)->options->flags & VACOPT_VACUUM) tag = "VACUUM"; else tag = "ANALYZE"; diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index ab08791..62e75d8 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -14,11 +14,13 @@ #ifndef HEAPAM_H #define HEAPAM_H +#include "access/parallel.h" #include "access/relation.h" /* for backward compatibility */ #include "access/sdir.h" #include "access/skey.h" #include "access/table.h" /* for backward compatibility */ #include "nodes/lockoptions.h" +#include "nodes/parsenodes.h" #include "nodes/primnodes.h" #include "storage/bufpage.h" #include "storage/lockdefs.h" @@ -185,8 +187,9 @@ extern Size SyncScanShmemSize(void); /* in heap/vacuumlazy.c */ struct VacuumParams; -extern void heap_vacuum_rel(Relation onerel, int options, +extern void heap_vacuum_rel(Relation onerel, VacuumOptions *options, struct VacuumParams *params, BufferAccessStrategy bstrategy); +extern void heap_parallel_vacuum_main(dsm_segment *seg, shm_toc *toc); /* in heap/heapam_visibility.c */ extern bool HeapTupleSatisfiesVisibility(HeapTuple stup, Snapshot snapshot, diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 0a051ec..dd71f0d 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -163,7 +163,7 @@ extern int vacuum_multixact_freeze_table_age; /* in commands/vacuum.c */ extern void ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel); -extern void vacuum(int options, List *relations, VacuumParams *params, +extern void vacuum(VacuumOptions *options, List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, bool isTopLevel); extern void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel); diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index e215ad4..70b9231 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -474,6 +474,7 @@ typedef enum NodeTag T_PartitionBoundSpec, T_PartitionRangeDatum, T_PartitionCmd, + T_VacuumOptions, T_VacuumRelation, /* diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 2fe14d7..526caa2 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -3147,7 +3147,7 @@ typedef struct ClusterStmt * and VACOPT_ANALYZE must be set in options. * ---------------------- */ -typedef enum VacuumOption +typedef enum VacuumFlag { VACOPT_VACUUM = 1 << 0, /* do VACUUM */ VACOPT_ANALYZE = 1 << 1, /* do ANALYZE */ @@ -3156,8 +3156,16 @@ typedef enum VacuumOption VACOPT_FULL = 1 << 4, /* FULL (non-concurrent) vacuum */ VACOPT_SKIP_LOCKED = 1 << 5, /* skip if cannot get lock */ VACOPT_SKIPTOAST = 1 << 6, /* don't process the TOAST table, if any */ - VACOPT_DISABLE_PAGE_SKIPPING = 1 << 7 /* don't skip any pages */ -} VacuumOption; + VACOPT_DISABLE_PAGE_SKIPPING = 1 << 7, /* don't skip any pages */ + VACOPT_PARALLEL = 1 << 8 /* do lazy VACUUM in parallel */ +} VacuumFlag; + +typedef struct VacuumOptions +{ + NodeTag type; + VacuumFlag flags; /* OR of VacuumFlag */ + int nworkers; /* # of parallel vacuum workers */ +} VacuumOptions; /* * Info about a single target table of VACUUM/ANALYZE. @@ -3176,9 +3184,9 @@ typedef struct VacuumRelation typedef struct VacuumStmt { - NodeTag type; - int options; /* OR of VacuumOption flags */ - List *rels; /* list of VacuumRelation, or NIL for all */ + NodeTag type; + VacuumOptions *options; + List *rels; /* list of VacuumRelation, or NIL for all */ } VacuumStmt; /* ---------------------- diff --git a/src/test/regress/expected/vacuum.out b/src/test/regress/expected/vacuum.out index fa9d663..9b5b7dc 100644 --- a/src/test/regress/expected/vacuum.out +++ b/src/test/regress/expected/vacuum.out @@ -80,6 +80,8 @@ CONTEXT: SQL function "do_analyze" statement 1 SQL function "wrap_do_analyze" statement 1 VACUUM FULL vactst; VACUUM (DISABLE_PAGE_SKIPPING) vaccluster; +VACUUM (PARALLEL) vaccluster; +VACUUM (PARALLEL 2) vaccluster; -- partitioned table CREATE TABLE vacparted (a int, b char) PARTITION BY LIST (a); CREATE TABLE vacparted1 PARTITION OF vacparted FOR VALUES IN (1); diff --git a/src/test/regress/sql/vacuum.sql b/src/test/regress/sql/vacuum.sql index 9defa0d..f92c4e5 100644 --- a/src/test/regress/sql/vacuum.sql +++ b/src/test/regress/sql/vacuum.sql @@ -61,6 +61,9 @@ VACUUM FULL vaccluster; VACUUM FULL vactst; VACUUM (DISABLE_PAGE_SKIPPING) vaccluster; +VACUUM (PARALLEL) vaccluster; +VACUUM (PARALLEL 2) vaccluster; + -- partitioned table CREATE TABLE vacparted (a int, b char) PARTITION BY LIST (a); -- 1.8.3.1