At 2014-09-25 15:40:11 +0530, a...@2ndquadrant.com wrote: > > All right, then I'll post a version that addresses Amit's other > points, adds a new file/function to pgstattuple, acquires content > locks, and uses HeapTupleSatisfiesVacuum, hint-bit setting and all.
Sorry, I forgot to post this patch. It does what I listed above, and seems to work fine (it's still quite a lot faster than pgstattuple in many cases). A couple of remaining questions: 1. I didn't change the handling of LP_DEAD items, because the way it is now matches what pgstattuple counts. I'm open to changing it, though. Maybe it makes sense to count LP_DEAD items iff lp_len != 0? Or just leave it as it is? I think it doesn't matter much. 2. I changed the code to acquire the content locks on the buffer, as discussed, but it still uses HeapTupleSatisfiesVacuum. Amit suggested using HeapTupleSatisfiesVisibility, but it's not clear to me why that would be better. I welcome advice in this matter. (If anything, I should use HeapTupleIsSurelyDead, which doesn't set any hint bits, but which I earlier rejected as too conservative in its dead/not-dead decisions for this purpose.) (I've actually patched the pgstattuple.sgml docs as well, but I want to re-read that to make sure it's up to date, and didn't want to wait to post the code changes.) I also didn't change the name. I figure it's easy enough to change it everywhere once all the remaining pieces are in place. Comments welcome. -- Abhijit
diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile index 862585c..bae80c9 100644 --- a/contrib/pgstattuple/Makefile +++ b/contrib/pgstattuple/Makefile @@ -1,10 +1,10 @@ # contrib/pgstattuple/Makefile MODULE_big = pgstattuple -OBJS = pgstattuple.o pgstatindex.o $(WIN32RES) +OBJS = pgstattuple.o pgstatindex.o fastbloat.o $(WIN32RES) EXTENSION = pgstattuple -DATA = pgstattuple--1.2.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql +DATA = pgstattuple--1.3.sql pgstattuple--1.2--1.3.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql PGFILEDESC = "pgstattuple - tuple-level statistics" REGRESS = pgstattuple diff --git a/contrib/pgstattuple/fastbloat.c b/contrib/pgstattuple/fastbloat.c new file mode 100644 index 0000000..b33db14 --- /dev/null +++ b/contrib/pgstattuple/fastbloat.c @@ -0,0 +1,367 @@ +/* + * contrib/pgstattuple/fastbloat.c + * + * Abhijit Menon-Sen <a...@2ndquadrant.com> + * Portions Copyright (c) 2001,2002 Tatsuo Ishii (from pg_stattuple) + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose, without fee, and without a + * written agreement is hereby granted, provided that the above + * copyright notice and this paragraph and the following two + * paragraphs appear in all copies. + * + * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, + * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING + * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS + * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS + * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, + * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "postgres.h" + +#include "access/visibilitymap.h" +#include "access/transam.h" +#include "access/xact.h" +#include "access/multixact.h" +#include "access/htup_details.h" +#include "catalog/namespace.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "storage/freespace.h" +#include "storage/procarray.h" +#include "storage/lmgr.h" +#include "utils/builtins.h" +#include "utils/tqual.h" +#include "commands/vacuum.h" + +PG_FUNCTION_INFO_V1(fastbloat); +PG_FUNCTION_INFO_V1(fastbloatbyid); + +/* + * tuple_percent, dead_tuple_percent and free_percent are computable, + * so not defined here. + */ +typedef struct fastbloat_output_type +{ + uint64 table_len; + uint64 tuple_count; + uint64 tuple_len; + uint64 dead_tuple_count; + uint64 dead_tuple_len; + uint64 free_space; + uint64 total_pages; + uint64 scanned_pages; +} fastbloat_output_type; + +static Datum build_output_type(fastbloat_output_type *stat, + FunctionCallInfo fcinfo); +static Datum fbstat_relation(Relation rel, FunctionCallInfo fcinfo); +static Datum fbstat_heap(Relation rel, FunctionCallInfo fcinfo); + +/* + * build a fastbloat_output_type tuple + */ +static Datum +build_output_type(fastbloat_output_type *stat, FunctionCallInfo fcinfo) +{ +#define NCOLUMNS 10 +#define NCHARS 32 + + HeapTuple tuple; + char *values[NCOLUMNS]; + char values_buf[NCOLUMNS][NCHARS]; + int i; + double tuple_percent; + double dead_tuple_percent; + double free_percent; /* free/reusable space in % */ + double scanned_percent; + TupleDesc tupdesc; + AttInMetadata *attinmeta; + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + /* + * Generate attribute metadata needed later to produce tuples from raw C + * strings + */ + attinmeta = TupleDescGetAttInMetadata(tupdesc); + + if (stat->table_len == 0) + { + tuple_percent = 0.0; + dead_tuple_percent = 0.0; + free_percent = 0.0; + } + else + { + tuple_percent = 100.0 * stat->tuple_len / stat->table_len; + dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len; + free_percent = 100.0 * stat->free_space / stat->table_len; + } + + scanned_percent = 0.0; + if (stat->total_pages != 0) + { + scanned_percent = 100 * stat->scanned_pages / stat->total_pages; + } + + for (i = 0; i < NCOLUMNS; i++) + values[i] = values_buf[i]; + i = 0; + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->table_len); + snprintf(values[i++], NCHARS, "%.2f", scanned_percent); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->tuple_count); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->tuple_len); + snprintf(values[i++], NCHARS, "%.2f", tuple_percent); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->dead_tuple_count); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->dead_tuple_len); + snprintf(values[i++], NCHARS, "%.2f", dead_tuple_percent); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->free_space); + snprintf(values[i++], NCHARS, "%.2f", free_percent); + + tuple = BuildTupleFromCStrings(attinmeta, values); + + return HeapTupleGetDatum(tuple); +} + +/* Returns live/dead tuple statistics for the named table. */ + +Datum +fastbloat(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_P(0); + RangeVar *relrv; + Relation rel; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + (errmsg("must be superuser to use fastbloat functions")))); + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + PG_RETURN_DATUM(fbstat_relation(rel, fcinfo)); +} + +/* As above, but takes a reloid instead of a relation name. */ + +Datum +fastbloatbyid(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + Relation rel; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + (errmsg("must be superuser to use fastbloat functions")))); + + rel = relation_open(relid, AccessShareLock); + + PG_RETURN_DATUM(fbstat_relation(rel, fcinfo)); +} + +/* + * A helper function to reject unsupported relation types. We depend on + * the visibility map to decide which pages we can skip, so we can't + * support indexes, for example, which don't have a VM. + */ + +static Datum +fbstat_relation(Relation rel, FunctionCallInfo fcinfo) +{ + const char *err; + + /* + * Reject attempts to read non-local temporary relations; we would be + * likely to get wrong data since we have no visibility into the owning + * session's local buffers. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"))); + + switch (rel->rd_rel->relkind) + { + case RELKIND_RELATION: + case RELKIND_MATVIEW: + return fbstat_heap(rel, fcinfo); + case RELKIND_TOASTVALUE: + err = "toast value"; + break; + case RELKIND_SEQUENCE: + err = "sequence"; + break; + case RELKIND_INDEX: + err = "index"; + break; + case RELKIND_VIEW: + err = "view"; + break; + case RELKIND_COMPOSITE_TYPE: + err = "composite type"; + break; + case RELKIND_FOREIGN_TABLE: + err = "foreign table"; + break; + default: + err = "unknown"; + break; + } + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("\"%s\" (%s) is not supported", + RelationGetRelationName(rel), err))); + return 0; +} + +/* + * This function takes an already open relation and scans its pages, + * skipping those that have the corresponding visibility map bit set. + * For pages we skip, we find the free space from the free space map + * and approximate tuple_len on that basis. For the others, we count + * the exact number of dead tuples etc. + * + * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but + * we do not try to avoid skipping single pages. + */ + +static Datum +fbstat_heap(Relation rel, FunctionCallInfo fcinfo) +{ + BlockNumber scanned, + nblocks, + blkno; + Buffer vmbuffer = InvalidBuffer; + fastbloat_output_type stat = {0}; + BufferAccessStrategy bstrategy; + TransactionId OldestXmin; + + OldestXmin = GetOldestXmin(rel, true); + bstrategy = GetAccessStrategy(BAS_BULKREAD); + + scanned = 0; + nblocks = RelationGetNumberOfBlocks(rel); + + for (blkno = 0; blkno < nblocks; blkno++) + { + Buffer buf; + Page page; + OffsetNumber offnum, + maxoff; + Size freespace; + + CHECK_FOR_INTERRUPTS(); + + /* + * If the page has only visible tuples, then we can find out the + * free space from the FSM and move on. + */ + + if (visibilitymap_test(rel, blkno, &vmbuffer)) + { + freespace = GetRecordedFreeSpace(rel, blkno); + stat.tuple_len += BLCKSZ - freespace; + stat.free_space += freespace; + continue; + } + + buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, + RBM_NORMAL, bstrategy); + + LockBuffer(buf, BUFFER_LOCK_SHARE); + + page = BufferGetPage(buf); + + stat.free_space += PageGetHeapFreeSpace(page); + + if (PageIsNew(page)) + { + UnlockReleaseBuffer(buf); + continue; + } + + scanned++; + + /* + * Look at each tuple on the page and decide whether it's live + * or dead, then count it and its size. Unlike lazy_scan_heap, + * we can afford to ignore problems and special cases. + */ + + maxoff = PageGetMaxOffsetNumber(page); + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid; + HeapTupleData tuple; + + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid) || + ItemIdIsDead(itemid)) + { + continue; + } + + Assert(ItemIdIsNormal(itemid)); + + ItemPointerSet(&(tuple.t_self), blkno, offnum); + + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(rel); + + switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf)) + { + case HEAPTUPLE_DEAD: + case HEAPTUPLE_RECENTLY_DEAD: + stat.dead_tuple_len += tuple.t_len; + stat.dead_tuple_count++; + break; + case HEAPTUPLE_LIVE: + stat.tuple_len += tuple.t_len; + stat.tuple_count++; + break; + case HEAPTUPLE_INSERT_IN_PROGRESS: + case HEAPTUPLE_DELETE_IN_PROGRESS: + break; + default: + elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); + break; + } + } + + UnlockReleaseBuffer(buf); + } + + stat.table_len = (uint64) nblocks * BLCKSZ; + stat.tuple_count = vac_estimate_reltuples(rel, false, nblocks, scanned, + stat.tuple_count); + stat.total_pages = nblocks; + stat.scanned_pages = scanned; + + if (BufferIsValid(vmbuffer)) + { + ReleaseBuffer(vmbuffer); + vmbuffer = InvalidBuffer; + } + + relation_close(rel, AccessShareLock); + + return build_output_type(&stat, fcinfo); +} diff --git a/contrib/pgstattuple/pgstattuple--1.2--1.3.sql b/contrib/pgstattuple/pgstattuple--1.2--1.3.sql new file mode 100644 index 0000000..8a4064e --- /dev/null +++ b/contrib/pgstattuple/pgstattuple--1.2--1.3.sql @@ -0,0 +1,32 @@ +/* contrib/pgstattuple/pgstattuple--1.2--1.3.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pgstattuple UPDATE TO '1.3'" to load this file. \quit + +CREATE FUNCTION fastbloat(IN relname text, + OUT table_len BIGINT, -- physical table length in bytes + OUT scanned_percent FLOAT8, -- what percentage of the table's pages was scanned + OUT approx_tuple_count BIGINT, -- estimated number of live tuples + OUT approx_tuple_len BIGINT, -- estimated total length in bytes of live tuples + OUT approx_tuple_percent FLOAT8, -- live tuples in % (based on estimate) + OUT dead_tuple_count BIGINT, -- exact number of dead tuples + OUT dead_tuple_len BIGINT, -- exact total length in bytes of dead tuples + OUT dead_tuple_percent FLOAT8, -- dead tuples in % (based on estimate) + OUT free_space BIGINT, -- exact free space in bytes + OUT free_percent FLOAT8) -- free space in % +AS 'MODULE_PATHNAME', 'fastbloat' +LANGUAGE C STRICT; + +CREATE FUNCTION fastbloat(IN reloid regclass, + OUT table_len BIGINT, -- physical table length in bytes + OUT scanned_percent FLOAT8, -- what percentage of the table's pages was scanned + OUT approx_tuple_count BIGINT, -- estimated number of live tuples + OUT approx_tuple_len BIGINT, -- estimated total length in bytes of live tuples + OUT approx_tuple_percent FLOAT8, -- live tuples in % (based on estimate) + OUT dead_tuple_count BIGINT, -- exact number of dead tuples + OUT dead_tuple_len BIGINT, -- exact total length in bytes of dead tuples + OUT dead_tuple_percent FLOAT8, -- dead tuples in % (based on estimate) + OUT free_space BIGINT, -- exact free space in bytes + OUT free_percent FLOAT8) -- free space in % +AS 'MODULE_PATHNAME', 'fastbloatbyid' +LANGUAGE C STRICT; diff --git a/contrib/pgstattuple/pgstattuple--1.2.sql b/contrib/pgstattuple/pgstattuple--1.3.sql similarity index 59% rename from contrib/pgstattuple/pgstattuple--1.2.sql rename to contrib/pgstattuple/pgstattuple--1.3.sql index e5fa2f5..0a111e6 100644 --- a/contrib/pgstattuple/pgstattuple--1.2.sql +++ b/contrib/pgstattuple/pgstattuple--1.3.sql @@ -1,4 +1,4 @@ -/* contrib/pgstattuple/pgstattuple--1.2.sql */ +/* contrib/pgstattuple/pgstattuple--1.3.sql */ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION pgstattuple" to load this file. \quit @@ -77,3 +77,33 @@ CREATE FUNCTION pg_relpages(IN relname regclass) RETURNS BIGINT AS 'MODULE_PATHNAME', 'pg_relpagesbyid' LANGUAGE C STRICT; + +/* New stuff in 1.3 begins here */ + +CREATE FUNCTION fastbloat(IN relname text, + OUT table_len BIGINT, -- physical table length in bytes + OUT scanned_percent FLOAT8, -- what percentage of the table's pages was scanned + OUT approx_tuple_count BIGINT, -- estimated number of live tuples + OUT approx_tuple_len BIGINT, -- estimated total length in bytes of live tuples + OUT approx_tuple_percent FLOAT8, -- live tuples in % (based on estimate) + OUT dead_tuple_count BIGINT, -- exact number of dead tuples + OUT dead_tuple_len BIGINT, -- exact total length in bytes of dead tuples + OUT dead_tuple_percent FLOAT8, -- dead tuples in % (based on estimate) + OUT free_space BIGINT, -- exact free space in bytes + OUT free_percent FLOAT8) -- free space in % +AS 'MODULE_PATHNAME', 'fastbloat' +LANGUAGE C STRICT; + +CREATE FUNCTION fastbloat(IN reloid regclass, + OUT table_len BIGINT, -- physical table length in bytes + OUT scanned_percent FLOAT8, -- what percentage of the table's pages was scanned + OUT approx_tuple_count BIGINT, -- estimated number of live tuples + OUT approx_tuple_len BIGINT, -- estimated total length in bytes of live tuples + OUT approx_tuple_percent FLOAT8, -- live tuples in % (based on estimate) + OUT dead_tuple_count BIGINT, -- exact number of dead tuples + OUT dead_tuple_len BIGINT, -- exact total length in bytes of dead tuples + OUT dead_tuple_percent FLOAT8, -- dead tuples in % (based on estimate) + OUT free_space BIGINT, -- exact free space in bytes + OUT free_percent FLOAT8) -- free space in % +AS 'MODULE_PATHNAME', 'fastbloatbyid' +LANGUAGE C STRICT;
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers