From 3ef9afda4f4ed870d24029a344c431af023aa698 Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Wed, 26 Feb 2020 14:44:09 +0900
Subject: [PATCH 3/3] Improve wait events of recovery conflict resolution

---
 doc/src/sgml/monitoring.sgml      | 28 ++++++++++++++++++
 src/backend/postmaster/pgstat.c   | 47 +++++++++++++++++++++++++++++++
 src/backend/storage/ipc/standby.c | 16 +++++++----
 src/include/pgstat.h              | 17 +++++++++++
 4 files changed, 102 insertions(+), 6 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 87586a7b06..ea30fedcd1 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -781,6 +781,13 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
           <literal>wait_event</literal> will identify the specific wait point.
          </para>
         </listitem>
+        <listitem>
+         <para>
+          <literal>RecoveryConflict</literal>: The server process is waiting for a
+          recovery conflict resolution.  <literal>wait_event</literal> will identify
+          the specific wait point.
+         </para>
+        </listitem>
        </itemizedlist>
       </entry>
      </row>
@@ -1773,6 +1780,27 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
          <entry><literal>WALWrite</literal></entry>
          <entry>Waiting for a write to a WAL file.</entry>
         </row>
+        <row>
+         <entry morerows="5"><literal>RecoveryConflict</literal></entry>
+         <entry><literal>Snapshot</literal></entry>
+         <entry>Waiting for recovery conflict resolution on a physical cleanup.</entry>
+        </row>
+        <row>
+         <entry><literal>Tablespace</literal></entry>
+         <entry>Waiting for recovery conflict resolution on dropping tablespace.</entry>
+        </row>
+        <row>
+         <entry><literal>Lock</literal></entry>
+         <entry>Waiting for recovery conflict resolution on acquiring a lock.</entry>
+        </row>
+        <row>
+         <entry><literal>BufferPin</literal></entry>
+         <entry>Waiting for recovery conflict resolution on acquiring a buffer pin.</entry>
+        </row>
+        <row>
+         <entry><literal>Database</literal></entry>
+         <entry>Waiting for recovery conflict resolution on dropping a database.</entry>
+        </row>
       </tbody>
      </tgroup>
     </table>
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 462b4d7e06..5276300216 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -308,6 +308,7 @@ static const char *pgstat_get_wait_client(WaitEventClient w);
 static const char *pgstat_get_wait_ipc(WaitEventIPC w);
 static const char *pgstat_get_wait_timeout(WaitEventTimeout w);
 static const char *pgstat_get_wait_io(WaitEventIO w);
+static const char *pgstat_get_wait_recovery_conflict(WaitEventRecoveryConflict w);
 
 static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
 static void pgstat_send(void *msg, int len);
@@ -3535,6 +3536,9 @@ pgstat_get_wait_event_type(uint32 wait_event_info)
 		case PG_WAIT_IO:
 			event_type = "IO";
 			break;
+		case PG_WAIT_RECOVERY_CONFLICT:
+			event_type = "RecoveryConflict";
+			break;
 		default:
 			event_type = "???";
 			break;
@@ -3612,6 +3616,14 @@ pgstat_get_wait_event(uint32 wait_event_info)
 				event_name = pgstat_get_wait_io(w);
 				break;
 			}
+		case PG_WAIT_RECOVERY_CONFLICT:
+			{
+				WaitEventRecoveryConflict w =
+					(WaitEventRecoveryConflict) wait_event_info;
+
+				event_name = pgstat_get_wait_recovery_conflict(w);
+				break;
+			}
 		default:
 			event_name = "unknown wait event";
 			break;
@@ -4112,6 +4124,41 @@ pgstat_get_wait_io(WaitEventIO w)
 	return event_name;
 }
 
+/* ----------
+ * pgstat_get_wait_recovery_conflict() -
+ *
+ * Convert WaitEventRecoveryConflict to string.
+ * ----------
+ */
+static const char *
+pgstat_get_wait_recovery_conflict(WaitEventRecoveryConflict w)
+{
+	const char *event_name = "unknown wait event";
+
+	switch (w)
+	{
+		case WAIT_EVENT_RECOVERY_CONFLICT_SNAPSHOT:
+			event_name = "Snapshot";
+			break;
+		case WAIT_EVENT_RECOVERY_CONFLICT_TABLESPACE:
+			event_name = "Tablespace";
+			break;
+		case WAIT_EVENT_RECOVERY_CONFLICT_LOCK:
+			event_name = "Lock";
+			break;
+		case WAIT_EVENT_RECOVERY_CONFLICT_BUFFER_PIN:
+			event_name = "BufferPin";
+			break;
+		case WAIT_EVENT_RECOVERY_CONFLICT_DATABASE:
+			event_name = "Database";
+			break;
+		default:
+			event_name = "unknown wait event";
+			break;
+	}
+
+	return event_name;
+}
 
 /* ----------
  * pgstat_get_backend_current_activity() -
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index b45a83c54c..0bfe68b14a 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -279,7 +279,7 @@ ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid, RelFileNode
 
 	ResolveRecoveryConflictWithVirtualXIDs(backends,
 										   PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
-										   PG_WAIT_LOCK | LOCKTAG_TRANSACTION);
+										   WAIT_EVENT_RECOVERY_CONFLICT_SNAPSHOT);
 
 	/* Reset ps display if we changed it */
 	if (new_status)
@@ -319,7 +319,7 @@ ResolveRecoveryConflictWithTablespace(Oid tsid)
 												InvalidOid);
 	ResolveRecoveryConflictWithVirtualXIDs(temp_file_users,
 										   PROCSIG_RECOVERY_CONFLICT_TABLESPACE,
-										   PG_WAIT_LOCK | LOCKTAG_TRANSACTION);
+										   WAIT_EVENT_RECOVERY_CONFLICT_TABLESPACE);
 
 	/* Reset ps display if we changed it */
 	if (new_status)
@@ -356,7 +356,11 @@ ResolveRecoveryConflictWithDatabase(Oid dbid)
 		 * Wait awhile for them to die so that we avoid flooding an
 		 * unresponsive backend when system is heavily loaded.
 		 */
-		pg_usleep(10000);
+		WaitLatch(MyLatch,
+				  WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_TIMEOUT,
+				  10,
+				  WAIT_EVENT_RECOVERY_CONFLICT_DATABASE);
+		ResetLatch(MyLatch);
 	}
 
 	/* Reset ps display if we changed it */
@@ -406,7 +410,7 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
 		backends = GetLockConflicts(&locktag, AccessExclusiveLock, NULL);
 		ResolveRecoveryConflictWithVirtualXIDs(backends,
 											   PROCSIG_RECOVERY_CONFLICT_LOCK,
-											   PG_WAIT_LOCK | locktag.locktag_type);
+											   WAIT_EVENT_RECOVERY_CONFLICT_LOCK);
 	}
 	else
 	{
@@ -421,7 +425,7 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
 		enable_timeouts(timeouts, 1);
 
 		/* Wait to be signaled by the release of the Relation Lock */
-		ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type);
+		ProcWaitForSignal(WAIT_EVENT_RECOVERY_CONFLICT_LOCK);
 	}
 
 	/*
@@ -506,7 +510,7 @@ ResolveRecoveryConflictWithBufferPin(void)
 	}
 
 	/* Wait to be signaled by UnpinBuffer() */
-	ProcWaitForSignal(PG_WAIT_BUFFER_PIN);
+	ProcWaitForSignal(WAIT_EVENT_RECOVERY_CONFLICT_BUFFER_PIN);
 
 	/*
 	 * Clear any timeout requests established above.  We assume here that the
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 3a65a51696..67ab5ee13e 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -760,6 +760,7 @@ typedef enum BackendState
 #define PG_WAIT_IPC					0x08000000U
 #define PG_WAIT_TIMEOUT				0x09000000U
 #define PG_WAIT_IO					0x0A000000U
+#define PG_WAIT_RECOVERY_CONFLICT	0x0B000000U
 
 /* ----------
  * Wait Events - Activity
@@ -948,6 +949,22 @@ typedef enum
 	WAIT_EVENT_WAL_WRITE
 } WaitEventIO;
 
+/* ----------
+ * Wait Events - Recovery Conflict
+ *
+ * Use this category when a process is waiting for a recovery conflict
+ * resolution.
+ * ----------
+ */
+typedef enum
+{
+	WAIT_EVENT_RECOVERY_CONFLICT_SNAPSHOT = PG_WAIT_RECOVERY_CONFLICT,
+	WAIT_EVENT_RECOVERY_CONFLICT_TABLESPACE,
+	WAIT_EVENT_RECOVERY_CONFLICT_LOCK,
+	WAIT_EVENT_RECOVERY_CONFLICT_BUFFER_PIN,
+	WAIT_EVENT_RECOVERY_CONFLICT_DATABASE
+} WaitEventRecoveryConflict;
+
 /* ----------
  * Command type for progress reporting purposes
  * ----------
-- 
2.23.0

