At Wed, 20 Sep 2023 14:18:41 +0900 (JST), Kyotaro Horiguchi 
<horikyota....@gmail.com> wrote in 
> I was able to see the trouble in the CI environment, but not
> locally. I'll delve deeper into this. Thanks you for bringing it to my
> attention.

I found two instances with multiple child processes.

# child-pid / parent-pid / given-pid : exec name
process  parent PID  child PID  target PID   exec file
shell       1228       6472        1228      cmd.exe
child       5184       1228        1228      cmd.exe
child       6956       1228        1228      postgres.exe
> launcher shell executed multiple processes

process  parent PID  child PID  target PID   exec file
shell       4296       5880        4296      cmd.exe
child       5156       4296        4296      agent.exe
child       5640       4296        4296      postgres.exe
> launcher shell executed multiple processes

It looks like the environment has autorun setups for cmd.exe. There's
another known issue related to auto-launching chcp at
startup. Ideally, we would avoid such behavior in the
postmaster-launcher shell.  I think we should add "/D" flag to cmd.exe
command line, perhaps in a separate patch.

Even after making that change, I still see something being launched from the 
launcher cmd.exe...

process  parent PID  child PID  target PID   exec file
shell       2784       6668        2784      cmd.exe
child       6140       2784        2784      MicrosoftEdgeUpdate.exe
child       6260       2784        2784      postgres.exe
> launcher shell executed multiple processes

I'm not sure what triggers this; perhaps some other kind of hooks?  If
we cannot avoid this behavior, we'll have to verify the executable
file name. It should be fine, given that the file name is constant,
but I'm not fully convinced that this is the ideal solution.

Another issue is.. that I haven't been able to cause the false
positive of pg_ctl start..  Do you have a concise reproducer of the
issue?

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
>From c6de7b541be55b01bbd0d2e8e8d95aac6799a82c Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horikyota....@gmail.com>
Date: Thu, 21 Sep 2023 16:54:33 +0900
Subject: [PATCH 1/3] Disable autoruns for cmd.exe on Windows

On Windows, we use cmd.exe to launch the postmaster process for easy
redirection setup. However, cmd.exe may execute other programs at
startup due to autorun configurations. This patch adds /D flag to the
launcher cmd.exe command line to disable autorun settings written in
the registry.
---
 src/bin/pg_ctl/pg_ctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c
index 807d7023a9..3ac2fcc004 100644
--- a/src/bin/pg_ctl/pg_ctl.c
+++ b/src/bin/pg_ctl/pg_ctl.c
@@ -553,11 +553,11 @@ start_postmaster(void)
 		else
 			close(fd);
 
-		cmd = psprintf("\"%s\" /C \"\"%s\" %s%s < \"%s\" >> \"%s\" 2>&1\"",
+		cmd = psprintf("\"%s\" /D /C \"\"%s\" %s%s < \"%s\" >> \"%s\" 2>&1\"",
 					   comspec, exec_path, pgdata_opt, post_opts, DEVNULL, log_file);
 	}
 	else
-		cmd = psprintf("\"%s\" /C \"\"%s\" %s%s < \"%s\" 2>&1\"",
+		cmd = psprintf("\"%s\" /D /C \"\"%s\" %s%s < \"%s\" 2>&1\"",
 					   comspec, exec_path, pgdata_opt, post_opts, DEVNULL);
 
 	if (!CreateRestrictedProcess(cmd, &pi, false))
-- 
2.39.3

>From f1484974f5bcf14d5ac3c6a702dcb02d0eb45f9f Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horikyota....@gmail.com>
Date: Wed, 20 Sep 2023 13:16:35 +0900
Subject: [PATCH 2/3] Improve pg_ctl postmaster process check on Windows

Currently pg_ctl on Windows does not verify that it actually executed
a postmaster process due to lack of process ID knowledge. This can
lead to false positives in cases where another pg_ctl instance starts
a different server simultaneously.
This patch adds the capability to identify the process ID of the
launched postmaster on Windows, similar to other OS versions, ensuring
more reliable detection of concurrent server startups.
---
 src/bin/pg_ctl/pg_ctl.c | 109 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 100 insertions(+), 9 deletions(-)

diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c
index 3ac2fcc004..ed1b0c43fc 100644
--- a/src/bin/pg_ctl/pg_ctl.c
+++ b/src/bin/pg_ctl/pg_ctl.c
@@ -132,6 +132,7 @@ static void adjust_data_dir(void);
 
 #ifdef WIN32
 #include <versionhelpers.h>
+#include <tlhelp32.h>
 static bool pgwin32_IsInstalled(SC_HANDLE);
 static char *pgwin32_CommandLine(bool);
 static void pgwin32_doRegister(void);
@@ -142,6 +143,7 @@ static void WINAPI pgwin32_ServiceMain(DWORD, LPTSTR *);
 static void pgwin32_doRunAsService(void);
 static int	CreateRestrictedProcess(char *cmd, PROCESS_INFORMATION *processInfo, bool as_service);
 static PTOKEN_PRIVILEGES GetPrivilegesToDelete(HANDLE hToken);
+static pid_t pgwin32_find_postmaster_pid(pid_t shell_pid);
 #endif
 
 static pid_t get_pgpid(bool is_status_request);
@@ -609,7 +611,11 @@ wait_for_postmaster_start(pid_t pm_pid, bool do_checkpoint)
 			/* File is complete enough for us, parse it */
 			pid_t		pmpid;
 			time_t		pmstart;
-
+#ifndef WIN32
+			pid_t		wait_pid = pm_pid;
+#else
+			pid_t		wait_pid = pgwin32_find_postmaster_pid(pm_pid);
+#endif
 			/*
 			 * Make sanity checks.  If it's for the wrong PID, or the recorded
 			 * start time is before pg_ctl started, then either we are looking
@@ -619,14 +625,8 @@ wait_for_postmaster_start(pid_t pm_pid, bool do_checkpoint)
 			 */
 			pmpid = atol(optlines[LOCK_FILE_LINE_PID - 1]);
 			pmstart = atol(optlines[LOCK_FILE_LINE_START_TIME - 1]);
-			if (pmstart >= start_time - 2 &&
-#ifndef WIN32
-				pmpid == pm_pid
-#else
-			/* Windows can only reject standalone-backend PIDs */
-				pmpid > 0
-#endif
-				)
+
+			if (pmstart >= start_time - 2 && pmpid == wait_pid)
 			{
 				/*
 				 * OK, seems to be a valid pidfile from our child.  Check the
@@ -1950,6 +1950,97 @@ GetPrivilegesToDelete(HANDLE hToken)
 
 	return tokenPrivs;
 }
+
+/*
+ * Find the PID of the launched postmaster.
+ *
+ * On Windows, the cmd.exe doesn't support the exec command. As a result, we
+ * don't directly get the postmaster's PID. This function identifies the PID of
+ * the postmaster started by the child cmd.exe.
+ *
+ * Returns the postmaster's PID. If the shell is alive but the postmaster is
+ * missing, returns 0. Otherwise terminates this command with an error.
+ *
+ * This function uses PID 0 as an invalid value, assuming the system idle
+ * process occupies it and it won't be a PID for a shell or postmaster.
+ */
+pid_t
+pgwin32_find_postmaster_pid(pid_t shell_pid)
+{
+	HANDLE hSnapshot;
+	PROCESSENTRY32 ppe;
+	pid_t pm_pid = 0;			/* abusing 0 as an invalid value */
+	bool shell_exists = false;
+	bool multiple_children = false;
+	DWORD last_error;
+
+	/* create a process snapshot */
+	hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
+	if (hSnapshot == INVALID_HANDLE_VALUE)
+	{
+		write_stderr(_("%s: CreateToolhelp32Snapshot failed\n"),
+					 progname);
+		exit(1);
+	}
+
+	/* start iterating on the snapshot */
+	ppe.dwSize = sizeof(PROCESSENTRY32);
+	if (!Process32First(hSnapshot, &ppe))
+	{
+		write_stderr(_("%s: Process32First failed: errcode=%08lx\n"),
+					 progname, GetLastError());
+		exit(1);
+	}
+
+	/*
+	 * Iterate over the snapshot
+	 *
+	 * Check for shell existence and duplicate processes for reliability.
+	 *
+	 * The launcher shell may start other instances of cmd.exe or programs
+	 * besides postgres.exe. It's important to verify the program file name.
+	 */
+	do
+	{
+		if (ppe.th32ProcessID == shell_pid)
+			shell_exists = true;
+		else if (ppe.th32ParentProcessID == shell_pid &&
+				 strcmp("postgres.exe", ppe.szExeFile) == 0)
+		{
+			if (pm_pid != ppe.th32ProcessID && pm_pid != 0)
+				multiple_children = true;
+			pm_pid = ppe.th32ProcessID;
+		}
+	}
+	while (Process32Next(hSnapshot, &ppe));
+
+	/* avoid multiple calls primary for clarity, not out of necessity */
+	last_error = GetLastError();
+	if (last_error != ERROR_NO_MORE_FILES)
+	{
+		write_stderr(_("%s: Process32Next failed: errcode=%08lx\n"),
+					 progname, last_error);
+		exit(1);
+	}
+	CloseHandle(hSnapshot);
+
+	/* assuming the launching shell executes a single process */
+	if (multiple_children)
+	{
+		write_stderr(_("%s: launcher shell executed multiple processes\n"),
+					 progname);
+		exit(1);
+	}
+
+	/* check if the process is still alive */
+	if (!shell_exists)
+	{
+		write_stderr(_("%s: launcher shell died\n"), progname);
+		exit(1);
+	}
+
+	return pm_pid;
+}
 #endif							/* WIN32 */
 
 static void
-- 
2.39.3

Reply via email to