We've been chewing around query cancel on Hot Standby and I think things have got fairly confusing, hence a new thread.
I enclose a patch that includes all the things that we all agree on so far, in my understanding * Recovery conflict processing uses SIGUSR1 rather than shmem per Tom, while holding ProcArrayLock per Andres * CONFLICT_MODE_ERROR throws ERROR when in a transaction, not idle and not in subtransaction, otherwise becomes CONFLICT_MODE_FATAL per Tom and other discussion * Recovery abort message has additional detail, per Heikki It doesn't include anything still under discussion, though is intended as a base upon which further patches can progress independently. * Infrastructure for supercancel, by Joachim Wieland * Any of the many further ideas by Andres Freund Please review this so we can move onto taking other issues one by one. This is also a base for other HS work I need to complete. I am still testing patch, so should be confident to commit tomorrow barring issues. -- Simon Riggs www.2ndQuadrant.com
*** a/src/backend/access/transam/xact.c --- b/src/backend/access/transam/xact.c *************** *** 313,320 **** IsTransactionState(void) /* * IsAbortedTransactionBlockState * ! * This returns true if we are currently running a query ! * within an aborted transaction block. */ bool IsAbortedTransactionBlockState(void) --- 313,319 ---- /* * IsAbortedTransactionBlockState * ! * This returns true if we are within an aborted transaction block. */ bool IsAbortedTransactionBlockState(void) *** a/src/backend/storage/ipc/procarray.c --- b/src/backend/storage/ipc/procarray.c *************** *** 324,329 **** ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid) --- 324,330 ---- /* must be cleared with xid/xmin: */ proc->vacuumFlags &= ~PROC_VACUUM_STATE_MASK; proc->inCommit = false; /* be sure this is cleared in abort */ + proc->recoveryConflictPending = false; /* Clear the subtransaction-XID cache too while holding the lock */ proc->subxids.nxids = 0; *************** *** 350,355 **** ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid) --- 351,357 ---- /* must be cleared with xid/xmin: */ proc->vacuumFlags &= ~PROC_VACUUM_STATE_MASK; proc->inCommit = false; /* be sure this is cleared in abort */ + proc->recoveryConflictPending = false; Assert(proc->subxids.nxids == 0); Assert(proc->subxids.overflowed == false); *************** *** 377,383 **** ProcArrayClearTransaction(PGPROC *proc) proc->xid = InvalidTransactionId; proc->lxid = InvalidLocalTransactionId; proc->xmin = InvalidTransactionId; ! proc->recoveryConflictMode = 0; /* redundant, but just in case */ proc->vacuumFlags &= ~PROC_VACUUM_STATE_MASK; --- 379,385 ---- proc->xid = InvalidTransactionId; proc->lxid = InvalidLocalTransactionId; proc->xmin = InvalidTransactionId; ! proc->recoveryConflictPending = false; /* redundant, but just in case */ proc->vacuumFlags &= ~PROC_VACUUM_STATE_MASK; *************** *** 1665,1671 **** GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid, if (proc->pid == 0) continue; ! if (skipExistingConflicts && proc->recoveryConflictMode > 0) continue; if (!OidIsValid(dbOid) || --- 1667,1673 ---- if (proc->pid == 0) continue; ! if (skipExistingConflicts && proc->recoveryConflictPending) continue; if (!OidIsValid(dbOid) || *************** *** 1704,1710 **** GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid, * Returns pid of the process signaled, or 0 if not found. */ pid_t ! CancelVirtualTransaction(VirtualTransactionId vxid, int cancel_mode) { ProcArrayStruct *arrayP = procArray; int index; --- 1706,1712 ---- * Returns pid of the process signaled, or 0 if not found. */ pid_t ! CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode) { ProcArrayStruct *arrayP = procArray; int index; *************** *** 1722,1749 **** CancelVirtualTransaction(VirtualTransactionId vxid, int cancel_mode) if (procvxid.backendId == vxid.backendId && procvxid.localTransactionId == vxid.localTransactionId) { ! /* ! * Issue orders for the proc to read next time it receives SIGINT ! */ ! if (proc->recoveryConflictMode < cancel_mode) ! proc->recoveryConflictMode = cancel_mode; ! pid = proc->pid; break; } } LWLockRelease(ProcArrayLock); - if (pid != 0) - { - /* - * Kill the pid if it's still here. If not, that's what we wanted - * so ignore any errors. - */ - kill(pid, SIGINT); - } - return pid; } --- 1724,1745 ---- if (procvxid.backendId == vxid.backendId && procvxid.localTransactionId == vxid.localTransactionId) { ! proc->recoveryConflictPending = true; pid = proc->pid; + if (pid != 0) + { + /* + * Kill the pid if it's still here. If not, that's what we wanted + * so ignore any errors. + */ + (void) SendProcSignal(pid, sigmode, vxid.backendId); + } break; } } LWLockRelease(ProcArrayLock); return pid; } *************** *** 1834,1839 **** CancelDBBackends(Oid databaseid) --- 1830,1836 ---- { ProcArrayStruct *arrayP = procArray; int index; + pid_t pid = 0; /* tell all backends to die */ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); *************** *** 1844,1851 **** CancelDBBackends(Oid databaseid) if (proc->databaseId == databaseid) { ! proc->recoveryConflictMode = CONFLICT_MODE_FATAL; ! kill(proc->pid, SIGINT); } } --- 1841,1861 ---- if (proc->databaseId == databaseid) { ! VirtualTransactionId procvxid; ! ! GET_VXID_FROM_PGPROC(procvxid, *proc); ! ! proc->recoveryConflictPending = true; ! pid = proc->pid; ! if (pid != 0) ! { ! /* ! * Kill the pid if it's still here. If not, that's what we wanted ! * so ignore any errors. ! */ ! (void) SendProcSignal(pid, PROCSIG_CONFLICT_FATAL_INTERRUPT, ! procvxid.backendId); ! } } } *** a/src/backend/storage/ipc/procsignal.c --- b/src/backend/storage/ipc/procsignal.c *************** *** 24,29 **** --- 24,31 ---- #include "storage/procsignal.h" #include "storage/shmem.h" #include "storage/sinval.h" + #include "storage/standby.h" + #include "tcop/tcopprot.h" /* *************** *** 258,262 **** procsignal_sigusr1_handler(SIGNAL_ARGS) --- 260,270 ---- if (CheckProcSignal(PROCSIG_NOTIFY_INTERRUPT)) HandleNotifyInterrupt(); + if (CheckProcSignal(PROCSIG_CONFLICT_ERROR_INTERRUPT)) + RecoveryConflictInterrupt(CONFLICT_MODE_ERROR); + + if (CheckProcSignal(PROCSIG_CONFLICT_FATAL_INTERRUPT)) + RecoveryConflictInterrupt(CONFLICT_MODE_FATAL); + errno = save_errno; } *** a/src/backend/storage/ipc/standby.c --- b/src/backend/storage/ipc/standby.c *************** *** 159,166 **** WaitExceedsMaxStandbyDelay(void) * recovery processing. Judgement has already been passed on it within * a specific rmgr. Here we just issue the orders to the procs. The procs * then throw the required error as instructed. - * - * We may ask for a specific cancel_mode, typically ERROR or FATAL. */ void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, --- 159,164 ---- *************** *** 218,229 **** ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, if (WaitExceedsMaxStandbyDelay()) { pid_t pid; /* * Now find out who to throw out of the balloon. */ Assert(VirtualTransactionIdIsValid(*waitlist)); ! pid = CancelVirtualTransaction(*waitlist, cancel_mode); if (pid != 0) { --- 216,231 ---- if (WaitExceedsMaxStandbyDelay()) { pid_t pid; + ProcSignalReason sigmode = PROCSIG_CONFLICT_ERROR_INTERRUPT; + + if (cancel_mode == CONFLICT_MODE_FATAL) + sigmode = PROCSIG_CONFLICT_FATAL_INTERRUPT; /* * Now find out who to throw out of the balloon. */ Assert(VirtualTransactionIdIsValid(*waitlist)); ! pid = CancelVirtualTransaction(*waitlist, sigmode); if (pid != 0) { *** a/src/backend/storage/lmgr/proc.c --- b/src/backend/storage/lmgr/proc.c *************** *** 318,324 **** InitProcess(void) MyProc->waitProcLock = NULL; for (i = 0; i < NUM_LOCK_PARTITIONS; i++) SHMQueueInit(&(MyProc->myProcLocks[i])); ! MyProc->recoveryConflictMode = 0; /* * We might be reusing a semaphore that belonged to a failed process. So --- 318,324 ---- MyProc->waitProcLock = NULL; for (i = 0; i < NUM_LOCK_PARTITIONS; i++) SHMQueueInit(&(MyProc->myProcLocks[i])); ! MyProc->recoveryConflictPending = false; /* * We might be reusing a semaphore that belonged to a failed process. So *** a/src/backend/tcop/postgres.c --- b/src/backend/tcop/postgres.c *************** *** 172,177 **** static int UseNewLine = 1; /* Use newlines query delimiters (the default) */ --- 172,179 ---- static int UseNewLine = 0; /* Use EOF as query delimiters */ #endif /* TCOP_DONTUSENEWLINE */ + /* whether we were cancelled during recovery by conflict processing or not */ + static bool RecoveryConflictPending = false; /* ---------------------------------------------------------------- * decls for routines only used in this file *************** *** 185,190 **** static List *pg_rewrite_query(Query *query); --- 187,193 ---- static bool check_log_statement(List *stmt_list); static int errdetail_execute(List *raw_parsetree_list); static int errdetail_params(ParamListInfo params); + static int errdetail_abort(void); static void start_xact_command(void); static void finish_xact_command(void); static bool IsTransactionExitStmt(Node *parsetree); *************** *** 943,949 **** exec_simple_query(const char *query_string) ereport(ERROR, (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION), errmsg("current transaction is aborted, " ! "commands ignored until end of transaction block"))); /* Make sure we are in a transaction command */ start_xact_command(); --- 946,953 ---- ereport(ERROR, (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION), errmsg("current transaction is aborted, " ! "commands ignored until end of transaction block"), ! errdetail_abort())); /* Make sure we are in a transaction command */ start_xact_command(); *************** *** 1252,1258 **** exec_parse_message(const char *query_string, /* string to execute */ ereport(ERROR, (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION), errmsg("current transaction is aborted, " ! "commands ignored until end of transaction block"))); /* * Set up a snapshot if parse analysis/planning will need one. --- 1256,1263 ---- ereport(ERROR, (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION), errmsg("current transaction is aborted, " ! "commands ignored until end of transaction block"), ! errdetail_abort())); /* * Set up a snapshot if parse analysis/planning will need one. *************** *** 1532,1538 **** exec_bind_message(StringInfo input_message) ereport(ERROR, (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION), errmsg("current transaction is aborted, " ! "commands ignored until end of transaction block"))); /* * Create the portal. Allow silent replacement of an existing portal only --- 1537,1544 ---- ereport(ERROR, (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION), errmsg("current transaction is aborted, " ! "commands ignored until end of transaction block"), ! errdetail_abort())); /* * Create the portal. Allow silent replacement of an existing portal only *************** *** 1973,1979 **** exec_execute_message(const char *portal_name, long max_rows) ereport(ERROR, (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION), errmsg("current transaction is aborted, " ! "commands ignored until end of transaction block"))); /* Check for cancel signal before we start execution */ CHECK_FOR_INTERRUPTS(); --- 1979,1986 ---- ereport(ERROR, (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION), errmsg("current transaction is aborted, " ! "commands ignored until end of transaction block"), ! errdetail_abort())); /* Check for cancel signal before we start execution */ CHECK_FOR_INTERRUPTS(); *************** *** 2234,2239 **** errdetail_params(ParamListInfo params) --- 2241,2260 ---- } /* + * errdetail_abort + * + * Add an errdetail() line showing abort reason, if any. + */ + static int + errdetail_abort(void) + { + if (MyProc->recoveryConflictPending) + errdetail("abort reason: recovery conflict"); + + return 0; + } + + /* * exec_describe_statement_message * * Process a "Describe" message for a prepared statement *************** *** 2290,2296 **** exec_describe_statement_message(const char *stmt_name) ereport(ERROR, (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION), errmsg("current transaction is aborted, " ! "commands ignored until end of transaction block"))); if (whereToSendOutput != DestRemote) return; /* can't actually do anything... */ --- 2311,2318 ---- ereport(ERROR, (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION), errmsg("current transaction is aborted, " ! "commands ignored until end of transaction block"), ! errdetail_abort())); if (whereToSendOutput != DestRemote) return; /* can't actually do anything... */ *************** *** 2370,2376 **** exec_describe_portal_message(const char *portal_name) ereport(ERROR, (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION), errmsg("current transaction is aborted, " ! "commands ignored until end of transaction block"))); if (whereToSendOutput != DestRemote) return; /* can't actually do anything... */ --- 2392,2399 ---- ereport(ERROR, (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION), errmsg("current transaction is aborted, " ! "commands ignored until end of transaction block"), ! errdetail_abort())); if (whereToSendOutput != DestRemote) return; /* can't actually do anything... */ *************** *** 2677,2682 **** SigHupHandler(SIGNAL_ARGS) --- 2700,2778 ---- got_SIGHUP = true; } + /* + * RecoveryConflictInterrupt: out-of-line portion of recovery conflict + * handling ollowing receipt of SIGUSR1. Designed to be similar to die() + * and StatementCancelHandler(). Called only by a normal user backend + * that begins a transaction during recovery. + */ + void + RecoveryConflictInterrupt(int conflict_mode) + { + int save_errno = errno; + + /* + * Don't joggle the elbow of proc_exit + */ + if (!proc_exit_inprogress) + { + switch (conflict_mode) + { + case CONFLICT_MODE_ERROR: + /* + * If we aren't in a transaction or if we already aborted + * then we no longer need to cancel. + */ + if (!IsTransactionOrTransactionBlock() || + IsAbortedTransactionBlockState()) + return; + + /* + * If we can abort just the current subtransaction then we + * are OK to throw an ERROR to resolve the conflict. Otherwise + * drop through to the FATAL case. + * XXX Other cases exist also, but add those later. + */ + if (!IsSubTransaction()) + { + RecoveryConflictPending = true; + QueryCancelPending = true; + InterruptPending = true; + break; + } + + /* Intentional drop through to CONFLICT_MODE_FATAL */ + + case CONFLICT_MODE_FATAL: + RecoveryConflictPending = true; + ProcDiePending = true; + InterruptPending = true; + break; + + default: + elog(FATAL, "Unknown conflict mode"); + } + + /* + * If it's safe to interrupt, and we're waiting for input or a lock, + * service the interrupt immediately + */ + if (ImmediateInterruptOK && InterruptHoldoffCount == 0 && + CritSectionCount == 0) + { + /* bump holdoff count to make ProcessInterrupts() a no-op */ + /* until we are done getting ready for it */ + InterruptHoldoffCount++; + LockWaitCancel(); /* prevent CheckDeadLock from running */ + DisableNotifyInterrupt(); + DisableCatchupInterrupt(); + InterruptHoldoffCount--; + ProcessInterrupts(); + } + } + + errno = save_errno; + } /* * ProcessInterrupts: out-of-line portion of CHECK_FOR_INTERRUPTS() macro *************** *** 2706,2711 **** ProcessInterrupts(void) --- 2802,2811 ---- ereport(FATAL, (errcode(ERRCODE_ADMIN_SHUTDOWN), errmsg("terminating autovacuum process due to administrator command"))); + else if (RecoveryConflictPending) + ereport(FATAL, + (errcode(ERRCODE_ADMIN_SHUTDOWN), + errmsg("terminating connection due to conflict with recovery"))); else ereport(FATAL, (errcode(ERRCODE_ADMIN_SHUTDOWN), *************** *** 2744,2800 **** ProcessInterrupts(void) (errcode(ERRCODE_QUERY_CANCELED), errmsg("canceling autovacuum task"))); } { ! int cancelMode = MyProc->recoveryConflictMode; ! /* ! * XXXHS: We don't yet have a clean way to cancel an ! * idle-in-transaction session, so make it FATAL instead. ! * This isn't as bad as it looks because we don't issue a ! * CONFLICT_MODE_ERROR for a session with proc->xmin == 0 ! * on cleanup conflicts. There's a possibility that we ! * marked somebody as a conflict and then they go idle. ! */ ! if (DoingCommandRead && IsTransactionBlock() && ! cancelMode == CONFLICT_MODE_ERROR) { ! cancelMode = CONFLICT_MODE_FATAL; } ! ! switch (cancelMode) { ! case CONFLICT_MODE_FATAL: ! ImmediateInterruptOK = false; /* not idle anymore */ ! DisableNotifyInterrupt(); ! DisableCatchupInterrupt(); ! Assert(RecoveryInProgress()); ! ereport(FATAL, ! (errcode(ERRCODE_QUERY_CANCELED), ! errmsg("canceling session due to conflict with recovery"))); ! ! case CONFLICT_MODE_ERROR: ! /* ! * We are aborting because we need to release ! * locks. So we need to abort out of all ! * subtransactions to make sure we release ! * all locks at whatever their level. ! * ! * XXX Should we try to examine the ! * transaction tree and cancel just enough ! * subxacts to remove locks? Doubt it. ! */ ! ImmediateInterruptOK = false; /* not idle anymore */ ! DisableNotifyInterrupt(); ! DisableCatchupInterrupt(); ! Assert(RecoveryInProgress()); ! AbortOutOfAnyTransaction(); ! ereport(ERROR, ! (errcode(ERRCODE_QUERY_CANCELED), ! errmsg("canceling statement due to conflict with recovery"))); ! ! default: ! /* No conflict pending, so fall through */ ! break; } } --- 2844,2869 ---- (errcode(ERRCODE_QUERY_CANCELED), errmsg("canceling autovacuum task"))); } + if (RecoveryConflictPending) { ! ImmediateInterruptOK = false; /* not idle anymore */ ! DisableNotifyInterrupt(); ! DisableCatchupInterrupt(); ! if (DoingCommandRead) { ! ProcDiePending = false; ! QueryCancelPending = false; ! ereport(FATAL, ! (errcode(ERRCODE_ADMIN_SHUTDOWN), ! errmsg("terminating connection due to conflict with recovery"))); } ! else { ! QueryCancelPending = false; ! ereport(ERROR, ! (errcode(ERRCODE_QUERY_CANCELED), ! errmsg("canceling statement due to conflict with recovery"))); } } *************** *** 3627,3633 **** PostgresMain(int argc, char *argv[], const char *username) */ if (send_ready_for_query) { ! if (IsTransactionOrTransactionBlock()) { set_ps_display("idle in transaction", false); pgstat_report_activity("<IDLE> in transaction"); --- 3696,3707 ---- */ if (send_ready_for_query) { ! if (IsAbortedTransactionBlockState()) ! { ! set_ps_display("idle in transaction (aborted)", false); ! pgstat_report_activity("<IDLE> in transaction (aborted)"); ! } ! else if (IsTransactionOrTransactionBlock()) { set_ps_display("idle in transaction", false); pgstat_report_activity("<IDLE> in transaction"); *** a/src/include/storage/proc.h --- b/src/include/storage/proc.h *************** *** 96,106 **** struct PGPROC uint8 vacuumFlags; /* vacuum-related flags, see above */ /* ! * While in hot standby mode, setting recoveryConflictMode instructs ! * the backend to commit suicide. Possible values are the same as those ! * passed to ResolveRecoveryConflictWithVirtualXIDs(). */ ! int recoveryConflictMode; /* Info about LWLock the process is currently waiting for, if any. */ bool lwWaiting; /* true if waiting for an LW lock */ --- 96,106 ---- uint8 vacuumFlags; /* vacuum-related flags, see above */ /* ! * While in hot standby mode, shows that a conflict signal has been sent ! * for the current transaction. Set/cleared while holding ProcArrayLock, ! * though not required. Accessed without lock, if needed. */ ! bool recoveryConflictPending; /* Info about LWLock the process is currently waiting for, if any. */ bool lwWaiting; /* true if waiting for an LW lock */ *** a/src/include/storage/procarray.h --- b/src/include/storage/procarray.h *************** *** 15,20 **** --- 15,21 ---- #define PROCARRAY_H #include "storage/lock.h" + #include "storage/procsignal.h" #include "storage/standby.h" #include "utils/snapshot.h" *************** *** 58,65 **** extern VirtualTransactionId *GetCurrentVirtualXIDs(TransactionId limitXmin, int *nvxids); extern VirtualTransactionId *GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid, bool skipExistingConflicts); ! extern pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ! int cancel_mode); extern int CountActiveBackends(void); extern int CountDBBackends(Oid databaseid); --- 59,65 ---- int *nvxids); extern VirtualTransactionId *GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid, bool skipExistingConflicts); ! extern pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode); extern int CountActiveBackends(void); extern int CountDBBackends(Oid databaseid); *** a/src/include/storage/procsignal.h --- b/src/include/storage/procsignal.h *************** *** 31,36 **** typedef enum --- 31,38 ---- { PROCSIG_CATCHUP_INTERRUPT, /* sinval catchup interrupt */ PROCSIG_NOTIFY_INTERRUPT, /* listen/notify interrupt */ + PROCSIG_CONFLICT_ERROR_INTERRUPT, /* recovery conflict error */ + PROCSIG_CONFLICT_FATAL_INTERRUPT, /* recovery conflict fatal */ NUM_PROCSIGNALS /* Must be last! */ } ProcSignalReason; *** a/src/include/tcop/tcopprot.h --- b/src/include/tcop/tcopprot.h *************** *** 64,69 **** extern void die(SIGNAL_ARGS); --- 64,70 ---- extern void quickdie(SIGNAL_ARGS); extern void StatementCancelHandler(SIGNAL_ARGS); extern void FloatExceptionHandler(SIGNAL_ARGS); + extern void RecoveryConflictInterrupt(int conflict_mode); /* called from SIGUSR1 handler */ extern void prepare_for_client_read(void); extern void client_read_ended(void); extern const char *process_postgres_switches(int argc, char *argv[],
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers