Attached v4 patch fixes the latest comments from Robert and Masahiko-san. Changes: 1) Let heap_force_kill and freeze functions to be used with toast tables. 2) Replace "int nskippedItems" with "bool did_modify_page" flag to know if any modification happened in the page or not. 3) Declare some of the variables such as buf, page inside of the do loop instead of declaring them at the top of heap_force_common function.
-- With Regards, Ashutosh Sharma EnterpriseDB:http://www.enterprisedb.com On Thu, Aug 6, 2020 at 2:49 PM Masahiko Sawada <masahiko.saw...@2ndquadrant.com> wrote: > > On Thu, 6 Aug 2020 at 18:05, Ashutosh Sharma <ashu.coe...@gmail.com> wrote: > > > > Hello Masahiko-san, > > > > Thanks for looking into the patch. Please find my comments inline below: > > > > On Thu, Aug 6, 2020 at 1:42 PM Masahiko Sawada > > <masahiko.saw...@2ndquadrant.com> wrote: > > > > > > On Wed, 5 Aug 2020 at 22:42, Ashutosh Sharma <ashu.coe...@gmail.com> > > > wrote: > > > > Please check the attached patch for the changes. > > > > > > I also looked at this version patch and have some small comments: > > > > > > + Oid relid = PG_GETARG_OID(0); > > > + ArrayType *ta = PG_GETARG_ARRAYTYPE_P_COPY(1); > > > + ItemPointer tids; > > > + int ntids; > > > + Relation rel; > > > + Buffer buf; > > > + Page page; > > > + ItemId itemid; > > > + BlockNumber blkno; > > > + OffsetNumber *offnos; > > > + OffsetNumber offno, > > > + noffs, > > > + curr_start_ptr, > > > + next_start_ptr, > > > + maxoffset; > > > + int i, > > > + nskippedItems, > > > + nblocks; > > > > > > You declare all variables at the top of heap_force_common() function > > > but I think we can declare some variables such as buf, page inside of > > > the do loop. > > > > > > > Sure, I will do this in the next version of patch. > > > > > --- > > > + if (offnos[i] > maxoffset) > > > + { > > > + ereport(NOTICE, > > > + errmsg("skipping tid (%u, %u) because it > > > contains an invalid offset", > > > + blkno, offnos[i])); > > > + continue; > > > + } > > > > > > If all tids on a page take the above path, we will end up logging FPI > > > in spite of modifying nothing on the page. > > > > > > > Yeah, that's right. I've taken care of this in the new version of > > patch which I am yet to share. > > > > > --- > > > + /* XLOG stuff */ > > > + if (RelationNeedsWAL(rel)) > > > + log_newpage_buffer(buf, true); > > > > > > I think we need to set the returned LSN by log_newpage_buffer() to the > > > page lsn. > > > > > > > I think we are already setting the page lsn in the log_newpage() which > > is being called from log_newpage_buffer(). So, AFAIU, no change would > > be required here. Please let me know if I am missing something. > > You're right. I'd missed the comment of log_newpage_buffer(). Thanks! > > Regards, > > -- > Masahiko Sawada http://www.2ndQuadrant.com/ > PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From 6d616af51a8f9e6352145f799f635ec907ed83c2 Mon Sep 17 00:00:00 2001 From: ashu <ashutosh.sha...@enterprisedb.com> Date: Thu, 6 Aug 2020 18:02:29 +0530 Subject: [PATCH] Add contrib/pg_surgery to perform surgery on a damaged heap table. This contrib module basically adds a couple of functions named heap_force_kill and heap_force_freeze that can be used in the scripts to perform surgery on the damaged heap tables. Ashutosh Sharma. --- contrib/Makefile | 1 + contrib/pg_surgery/Makefile | 23 ++ contrib/pg_surgery/expected/pg_surgery.out | 78 +++++++ contrib/pg_surgery/heap_surgery.c | 362 +++++++++++++++++++++++++++++ contrib/pg_surgery/pg_surgery--1.0.sql | 18 ++ contrib/pg_surgery/pg_surgery.control | 5 + contrib/pg_surgery/sql/pg_surgery.sql | 61 +++++ 7 files changed, 548 insertions(+) create mode 100644 contrib/pg_surgery/Makefile create mode 100644 contrib/pg_surgery/expected/pg_surgery.out create mode 100644 contrib/pg_surgery/heap_surgery.c create mode 100644 contrib/pg_surgery/pg_surgery--1.0.sql create mode 100644 contrib/pg_surgery/pg_surgery.control create mode 100644 contrib/pg_surgery/sql/pg_surgery.sql diff --git a/contrib/Makefile b/contrib/Makefile index 1846d41..07d5734 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -35,6 +35,7 @@ SUBDIRS = \ pg_standby \ pg_stat_statements \ pg_trgm \ + pg_surgery \ pgcrypto \ pgrowlocks \ pgstattuple \ diff --git a/contrib/pg_surgery/Makefile b/contrib/pg_surgery/Makefile new file mode 100644 index 0000000..ecf2e20 --- /dev/null +++ b/contrib/pg_surgery/Makefile @@ -0,0 +1,23 @@ +# contrib/pg_surgery/Makefile + +MODULE_big = pg_surgery +OBJS = \ + $(WIN32RES) \ + heap_surgery.o + +EXTENSION = pg_surgery +DATA = pg_surgery--1.0.sql +PGFILEDESC = "pg_surgery - perform surgery on a damaged relation" + +REGRESS = pg_surgery + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/pg_surgery +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/pg_surgery/expected/pg_surgery.out b/contrib/pg_surgery/expected/pg_surgery.out new file mode 100644 index 0000000..c01fcf1 --- /dev/null +++ b/contrib/pg_surgery/expected/pg_surgery.out @@ -0,0 +1,78 @@ +create extension pg_surgery; +-- +-- check that using heap_force_kill and heap_force_freeze functions with the +-- normal heap table passes. +-- +-- normal heap table. +begin; +create table htab(a int); +insert into htab values (100), (200), (300), (400), (500); +select * from htab where xmin = 2; + a +--- +(0 rows) + +select heap_force_freeze('htab'::regclass, ARRAY['(0, 4)']::tid[]); + heap_force_freeze +------------------- + +(1 row) + +select ctid, xmax from htab where xmin = 2; + ctid | xmax +-------+------ + (0,4) | 0 +(1 row) + +select heap_force_kill('htab'::regclass, ARRAY['(0, 4)']::tid[]); + heap_force_kill +----------------- + +(1 row) + +select * from htab where ctid = '(0, 4)'; + a +--- +(0 rows) + +rollback; +-- +-- check that using heap_force_kill and heap_force_freeze functions with the +-- unsupported relations fails. +-- +-- partitioned tables (the parent table) doesn't contain any tuple. +create table ptab (a int) partition by list (a); +select heap_force_kill('ptab'::regclass, ARRAY['(0, 1)']::tid[]); +ERROR: only heap AM is supported +select heap_force_freeze('ptab'::regclass, ARRAY['(0, 1)']::tid[]); +ERROR: only heap AM is supported +create index ptab_idx on ptab (a); +-- indexes are not supported, should fail. +select heap_force_kill('ptab_idx'::regclass, ARRAY['(0, 1)']::tid[]); +ERROR: only heap AM is supported +select heap_force_freeze('ptab_idx'::regclass, ARRAY['(0, 1)']::tid[]); +ERROR: only heap AM is supported +create view vw as select 1; +-- views are not supported as well. so, all these should fail. +select heap_force_kill('vw'::regclass, ARRAY['(0, 1)']::tid[]); +ERROR: only heap AM is supported +select heap_force_freeze('vw'::regclass, ARRAY['(0, 1)']::tid[]); +ERROR: only heap AM is supported +create materialized view mvw as select 10; +-- materialized views are not supported as well. so, all these should fail. +select heap_force_kill('mvw'::regclass, ARRAY['(0, 1)']::tid[]); +ERROR: "mvw" is not a table or TOAST table +select heap_force_freeze('mvw'::regclass, ARRAY['(0, 1)']::tid[]); +ERROR: "mvw" is not a table or TOAST table +create sequence seq; +-- sequences are not supported as well. so, all these functions should fail. +select heap_force_kill('seq'::regclass, ARRAY['(0, 1)']::tid[]); +ERROR: only heap AM is supported +select heap_force_freeze('seq'::regclass, ARRAY['(0, 1)']::tid[]); +ERROR: only heap AM is supported +-- cleanup. +drop materialized view mvw; +drop table ptab; +drop view vw; +drop sequence seq; +drop extension pg_surgery; diff --git a/contrib/pg_surgery/heap_surgery.c b/contrib/pg_surgery/heap_surgery.c new file mode 100644 index 0000000..0c1b66e --- /dev/null +++ b/contrib/pg_surgery/heap_surgery.c @@ -0,0 +1,362 @@ +/*------------------------------------------------------------------------- + * + * heap_surgery.c + * Functions to perform surgery on the damaged heap table. + * + * Copyright (c) 2020, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/pg_surgery/heap_surgery.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/heapam.h" +#include "catalog/pg_am_d.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "utils/acl.h" +#include "utils/rel.h" + +PG_MODULE_MAGIC; + +/* Options to forcefully change the state of a heap tuple. */ +typedef enum HeapTupleForceOption +{ + HEAP_FORCE_KILL, + HEAP_FORCE_FREEZE +} HeapTupleForceOption; + +PG_FUNCTION_INFO_V1(heap_force_kill); +PG_FUNCTION_INFO_V1(heap_force_freeze); + +static int32 tidcmp(const void *a, const void *b); +static Datum heap_force_common(FunctionCallInfo fcinfo, + HeapTupleForceOption heap_force_opt); +static void sanity_check_tid_array(ArrayType *ta, int *ntids); +static void sanity_check_relation(Relation rel); +static BlockNumber tids_same_page_fetch_offnums(ItemPointer tids, int ntids, + OffsetNumber *next_start_ptr, + OffsetNumber *offnos); + +/*------------------------------------------------------------------------- + * heap_force_kill() + * + * Force kill the tuple(s) pointed to by the item pointer(s) stored in the + * given tid array. + * + * Usage: SELECT heap_force_kill(regclass, tid[]); + *------------------------------------------------------------------------- + */ +Datum +heap_force_kill(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(heap_force_common(fcinfo, HEAP_FORCE_KILL)); +} + +/*------------------------------------------------------------------------- + * heap_force_freeze() + * + * Force freeze the tuple(s) pointed to by the item pointer(s) stored in the + * given tid array. + * + * Usage: SELECT heap_force_freeze(regclass, tid[]); + *------------------------------------------------------------------------- + */ +Datum +heap_force_freeze(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(heap_force_common(fcinfo, HEAP_FORCE_FREEZE)); +} + +/*------------------------------------------------------------------------- + * heap_force_common() + * + * Common code for heap_force_kill and heap_force_freeze + *------------------------------------------------------------------------- + */ +static Datum +heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt) +{ + Oid relid = PG_GETARG_OID(0); + ArrayType *ta = PG_GETARG_ARRAYTYPE_P_COPY(1); + ItemPointer tids; + int ntids, + nblocks; + Relation rel; + OffsetNumber *offnos; + OffsetNumber noffs, + curr_start_ptr, + next_start_ptr; + + if (RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is in progress"), + errhint("heap surgery functions cannot be executed during recovery."))); + + /* Basic sanity checking. */ + sanity_check_tid_array(ta, &ntids); + + rel = relation_open(relid, RowExclusiveLock); + + sanity_check_relation(rel); + + tids = ((ItemPointer) ARR_DATA_PTR(ta)); + + /* + * If there is more than one tid in the array, sort it so that we can + * easily fetch all the tids belonging to one particular page from the + * array. + */ + if (ntids > 1) + qsort((void*) tids, ntids, sizeof(ItemPointerData), tidcmp); + + offnos = (OffsetNumber *) palloc(ntids * sizeof(OffsetNumber)); + noffs = curr_start_ptr = next_start_ptr = 0; + nblocks = RelationGetNumberOfBlocks(rel); + + do + { + Buffer buf; + Page page; + BlockNumber blkno; + OffsetNumber maxoffset; + int i; + bool did_modify_page; + + /* + * Get the offset numbers from the tids belonging to one particular page + * and process them one by one. + */ + blkno = tids_same_page_fetch_offnums(tids, ntids, &next_start_ptr, + offnos); + + if (blkno < 0 || blkno >= nblocks) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("block number %u is out of range for relation \"%s\"", + blkno, RelationGetRelationName(rel)))); + + CHECK_FOR_INTERRUPTS(); + + buf = ReadBuffer(rel, blkno); + LockBufferForCleanup(buf); + + page = BufferGetPage(buf); + + /* Calculate the number of offsets stored in offnos array. */ + noffs = next_start_ptr - curr_start_ptr; + + maxoffset = PageGetMaxOffsetNumber(page); + + did_modify_page = false; + + /* No ereport(ERROR) from here until all the changes are logged. */ + START_CRIT_SECTION(); + + for (i = 0; i < noffs; i++) + { + OffsetNumber offno; + ItemId itemid; + + if (offnos[i] > maxoffset) + { + ereport(NOTICE, + errmsg("skipping tid (%u, %u) because it contains an invalid offset", + blkno, offnos[i])); + continue; + } + + itemid = PageGetItemId(page, offnos[i]); + + /* Follow any redirections until we find something useful. */ + while (ItemIdIsRedirected(itemid)) + { + offno = ItemIdGetRedirect(itemid); + itemid = PageGetItemId(page, offno); + CHECK_FOR_INTERRUPTS(); + } + + /* Nothing to do if the itemid is unused or already dead. */ + if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid)) + { + if (!ItemIdIsUsed(itemid)) + ereport(NOTICE, + (errmsg("skipping tid (%u, %u) because it is marked unused", + blkno, offnos[i]))); + else + ereport(NOTICE, + (errmsg("skipping tid (%u, %u) because it is marked dead", + blkno, offnos[i]))); + continue; + } + + Assert(ItemIdIsNormal(itemid)); + + did_modify_page = true; + + if (heap_force_opt == HEAP_FORCE_KILL) + ItemIdSetDead(itemid); + else + { + HeapTupleHeader htup; + + Assert(heap_force_opt == HEAP_FORCE_FREEZE); + + htup = (HeapTupleHeader) PageGetItem(page, itemid); + + HeapTupleHeaderSetXmin(htup, FrozenTransactionId); + HeapTupleHeaderSetXmax(htup, InvalidTransactionId); + + /* + * Clear all the visibility-related bits of this tuple and mark + * it as frozen. + */ + htup->t_infomask &= ~HEAP_XACT_MASK; + htup->t_infomask |= (HEAP_XMIN_FROZEN | HEAP_XMAX_INVALID); + } + } + + /* + * If we didn't modify the page, let's not mark the buffer as dirty or + * do the WAL logging. + */ + if (!did_modify_page) + goto skip_wal; + + /* Mark buffer dirty before we write WAL. */ + MarkBufferDirty(buf); + + /* XLOG stuff */ + if (RelationNeedsWAL(rel)) + log_newpage_buffer(buf, true); + +skip_wal: + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); + + /* + * Update the current start pointer before fetching next set of offset + * numbers from the tids belonging to the same page. + */ + curr_start_ptr = next_start_ptr; + } while (next_start_ptr != ntids); + + relation_close(rel, RowExclusiveLock); + + pfree(ta); + pfree(offnos); + + PG_RETURN_VOID(); +} + +/*------------------------------------------------------------------------- + * tidcmp() + * + * Compare two item pointers, return -1, 0, or +1. + * + * See ItemPointerCompare for details. + * ------------------------------------------------------------------------ + */ +static int32 +tidcmp(const void *a, const void *b) +{ + ItemPointer iptr1 = ((const ItemPointer) a); + ItemPointer iptr2 = ((const ItemPointer) b); + + return ItemPointerCompare(iptr1, iptr2); +} + +/*------------------------------------------------------------------------- + * sanity_check_tid_array() + * + * Perform sanity check on the given tid array. + * ------------------------------------------------------------------------ + */ +static void +sanity_check_tid_array(ArrayType *ta, int *ntids) +{ + if (ARR_HASNULL(ta) && array_contains_nulls(ta)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("array must not contain nulls"))); + + *ntids = ArrayGetNItems(ARR_NDIM(ta), ARR_DIMS(ta)); + + if (*ntids == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("empty tid array"))); +} + +/*------------------------------------------------------------------------- + * sanity_check_relation() + * + * Perform sanity check on the given relation. + * ------------------------------------------------------------------------ + */ +static void +sanity_check_relation(Relation rel) +{ + if (rel->rd_rel->relam != HEAP_TABLE_AM_OID) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only heap AM is supported"))); + + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_TOASTVALUE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a table or TOAST table", + RelationGetRelationName(rel)))); + + if (pg_class_aclcheck(RelationGetRelid(rel), GetUserId(), ACL_ALL_RIGHTS_RELATION) != ACLCHECK_OK) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied for relation \"%s\"", + RelationGetRelationName(rel)))); +} + +/*------------------------------------------------------------------------- + * tids_same_page_fetch_offnums() + * + * Find out all the tids residing in the same page as tids[next_start_ptr] and + * fetch the offset number stored in each of them into a caller-allocated offset + * number array. + * ------------------------------------------------------------------------ + */ +static BlockNumber +tids_same_page_fetch_offnums(ItemPointer tids, int ntids, + OffsetNumber *next_start_ptr, OffsetNumber *offnos) +{ + int i; + BlockNumber prev_blkno, + blkno; + OffsetNumber offno; + + for (i = *next_start_ptr; i < ntids; i++) + { + ItemPointerData tid = tids[i]; + + if (!ItemPointerIsValid(&tid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid item pointer"))); + + blkno = ItemPointerGetBlockNumberNoCheck(&tid); + offno = ItemPointerGetOffsetNumberNoCheck(&tid); + + if (i == *next_start_ptr || (prev_blkno == blkno)) + offnos[i - *next_start_ptr] = offno; + else + break; + + prev_blkno = blkno; + } + + *next_start_ptr = i; + return prev_blkno; +} \ No newline at end of file diff --git a/contrib/pg_surgery/pg_surgery--1.0.sql b/contrib/pg_surgery/pg_surgery--1.0.sql new file mode 100644 index 0000000..2ae7f22 --- /dev/null +++ b/contrib/pg_surgery/pg_surgery--1.0.sql @@ -0,0 +1,18 @@ +/* contrib/pg_surgery/pg_surgery--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION pg_surgery" to load this file. \quit + +CREATE FUNCTION heap_force_kill(reloid regclass, tids tid[]) +RETURNS VOID +AS 'MODULE_PATHNAME', 'heap_force_kill' +LANGUAGE C STRICT; + +REVOKE EXECUTE ON FUNCTION heap_force_kill(regclass, tid[]) FROM PUBLIC; + +CREATE FUNCTION heap_force_freeze(reloid regclass, tids tid[]) +RETURNS VOID +AS 'MODULE_PATHNAME', 'heap_force_freeze' +LANGUAGE C STRICT; + +REVOKE EXECUTE ON FUNCTION heap_force_freeze(regclass, tid[]) FROM PUBLIC; \ No newline at end of file diff --git a/contrib/pg_surgery/pg_surgery.control b/contrib/pg_surgery/pg_surgery.control new file mode 100644 index 0000000..2bcdad1 --- /dev/null +++ b/contrib/pg_surgery/pg_surgery.control @@ -0,0 +1,5 @@ +# pg_surgery extension +comment = 'extension to perform surgery on a damaged relation' +default_version = '1.0' +module_pathname = '$libdir/pg_surgery' +relocatable = true diff --git a/contrib/pg_surgery/sql/pg_surgery.sql b/contrib/pg_surgery/sql/pg_surgery.sql new file mode 100644 index 0000000..bcd1878 --- /dev/null +++ b/contrib/pg_surgery/sql/pg_surgery.sql @@ -0,0 +1,61 @@ +create extension pg_surgery; + +-- +-- check that using heap_force_kill and heap_force_freeze functions with the +-- normal heap table passes. +-- + +-- normal heap table. +begin; +create table htab(a int); +insert into htab values (100), (200), (300), (400), (500); + +select * from htab where xmin = 2; +select heap_force_freeze('htab'::regclass, ARRAY['(0, 4)']::tid[]); +select ctid, xmax from htab where xmin = 2; + +select heap_force_kill('htab'::regclass, ARRAY['(0, 4)']::tid[]); +select * from htab where ctid = '(0, 4)'; +rollback; + +-- +-- check that using heap_force_kill and heap_force_freeze functions with the +-- unsupported relations fails. +-- + +-- partitioned tables (the parent table) doesn't contain any tuple. +create table ptab (a int) partition by list (a); + +select heap_force_kill('ptab'::regclass, ARRAY['(0, 1)']::tid[]); +select heap_force_freeze('ptab'::regclass, ARRAY['(0, 1)']::tid[]); + +create index ptab_idx on ptab (a); + +-- indexes are not supported, should fail. +select heap_force_kill('ptab_idx'::regclass, ARRAY['(0, 1)']::tid[]); +select heap_force_freeze('ptab_idx'::regclass, ARRAY['(0, 1)']::tid[]); + +create view vw as select 1; + +-- views are not supported as well. so, all these should fail. +select heap_force_kill('vw'::regclass, ARRAY['(0, 1)']::tid[]); +select heap_force_freeze('vw'::regclass, ARRAY['(0, 1)']::tid[]); + +create materialized view mvw as select 10; + +-- materialized views are not supported as well. so, all these should fail. +select heap_force_kill('mvw'::regclass, ARRAY['(0, 1)']::tid[]); +select heap_force_freeze('mvw'::regclass, ARRAY['(0, 1)']::tid[]); + +create sequence seq; + +-- sequences are not supported as well. so, all these functions should fail. +select heap_force_kill('seq'::regclass, ARRAY['(0, 1)']::tid[]); +select heap_force_freeze('seq'::regclass, ARRAY['(0, 1)']::tid[]); + +-- cleanup. +drop materialized view mvw; +drop table ptab; +drop view vw; +drop sequence seq; +drop extension pg_surgery; -- 1.8.3.1