While working on 2dcd157, I noticed cfbot failures for Windows due to tests
with commands that had non-options specified before options.  For example,
"createuser myrole -a myadmin" specified a non-option (myrole) before the
option/argument pair (-a admin).  To get the tests passing for Windows,
non-options must be placed at the end (e.g., "createuser -a myadmin
myrole").  This same problem was encountered while working on 08951a7 [0],
but it was again fortunately caught with cfbot.  Others have not been so
lucky [1] [2] [3].

The reason for this discrepancy is because Windows uses the in-tree
implementation of getopt_long(), which differs from the other
implementations I've found in that it doesn't reorder non-options to the
end of argv by default.  Instead, it returns -1 as soon as the first
non-option is found, even if there are other options listed afterwards.  By
moving non-options to the end of argv, getopt_long() can parse all
specified options and return -1 when only non-options remain.  The
implementations I reviewed all reorder argv unless the POSIXLY_CORRECT
environment variable is set or the "optstring" argument begins with '+'.

The best reasons I can think of to keep the current behavior are 1)
reordering involves writing to the original argv array, which could be
risky (as noted by Tom [4]) and 2) any systems with a getopt_long()
implementation that doesn't reorder non-options could begin failing tests
that take advantage of this behavior.  However, this quirk in the in-tree
getopt_long() is periodically missed by hackers, the only systems I'm aware
of that use it are Windows and AIX, and every other implementation of
getopt_long() I saw reorders non-options by default.  Furthermore, C99
omits const decorations in main()'s signature, so modifying argv might not
be too scary.

Thus, I propose introducing this non-option reordering behavior but
allowing it to be disabled via the POSIXLY_CORRECT environment variable.
The attached patch is my first attempt at implementing this proposal.  I
don't think we need to bother with handling '+' at the beginning of
"optstring" since it seems unlikely to be used in PostgreSQL, but it would
probably be easy enough to add if folks want it.

I briefly looked at getopt() and concluded that we should probably retain
its POSIX-compliant behavior for non-options, as reordering support seems
much less universal than with getopt_long().  AFAICT all client utilities
use getopt_long(), anyway.

Thoughts?

[0] 
https://postgr.es/m/20220525.110752.305692011781436338.horikyota.ntt%40gmail.com
[1] https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=869aa40
[2] https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=ffd3980
[3] https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=d9ddc50
[4] https://postgr.es/m/20539.1237314382%40sss.pgh.pa.us

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com
>From 68c866da205592a370279d6b2a180e0888b0587c Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nat...@postgresql.org>
Date: Fri, 9 Jun 2023 15:51:58 -0700
Subject: [PATCH v1 1/1] Teach in-tree getopt_long() to move non-options to the
 end of argv.

---
 src/bin/scripts/t/040_createuser.pl | 10 ++++---
 src/port/getopt_long.c              | 43 ++++++++++++++++++++++++++---
 2 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/src/bin/scripts/t/040_createuser.pl b/src/bin/scripts/t/040_createuser.pl
index 9ca282181d..ba40ab11a3 100644
--- a/src/bin/scripts/t/040_createuser.pl
+++ b/src/bin/scripts/t/040_createuser.pl
@@ -42,9 +42,9 @@ $node->issues_sql_like(
 	'add a role as a member with admin option of the newly created role');
 $node->issues_sql_like(
 	[
-		'createuser', '-m',
-		'regress_user3', '-m',
-		'regress user #4', 'REGRESS_USER5'
+		'createuser', 'REGRESS_USER5',
+		'-m', 'regress_user3',
+		'-m', 'regress user #4'
 	],
 	qr/statement: CREATE ROLE "REGRESS_USER5" NOSUPERUSER NOCREATEDB NOCREATEROLE INHERIT LOGIN NOREPLICATION NOBYPASSRLS ROLE regress_user3,"regress user #4";/,
 	'add a role as a member of the newly created role');
@@ -73,11 +73,13 @@ $node->issues_sql_like(
 	qr/statement: CREATE ROLE regress_user11 NOSUPERUSER NOCREATEDB NOCREATEROLE INHERIT LOGIN NOREPLICATION NOBYPASSRLS IN ROLE regress_user1;/,
 	'--role');
 $node->issues_sql_like(
-	[ 'createuser', '--member-of', 'regress_user1', 'regress_user12' ],
+	[ 'createuser', 'regress_user12', '--member-of', 'regress_user1' ],
 	qr/statement: CREATE ROLE regress_user12 NOSUPERUSER NOCREATEDB NOCREATEROLE INHERIT LOGIN NOREPLICATION NOBYPASSRLS IN ROLE regress_user1;/,
 	'--member-of');
 
 $node->command_fails([ 'createuser', 'regress_user1' ],
 	'fails if role already exists');
+$node->command_fails([ 'createuser', 'regress_user1', '-m', 'regress_user2', 'regress_user3' ],
+	'fails for too many non-options');
 
 done_testing();
diff --git a/src/port/getopt_long.c b/src/port/getopt_long.c
index c989276988..94e76769a3 100644
--- a/src/port/getopt_long.c
+++ b/src/port/getopt_long.c
@@ -50,8 +50,8 @@
  * This implementation does not use optreset.  Instead, we guarantee that
  * it can be restarted on a new argv array after a previous call returned -1,
  * if the caller resets optind to 1 before the first call of the new series.
- * (Internally, this means we must be sure to reset "place" to EMSG before
- * returning -1.)
+ * (Internally, this means we must be sure to reset "place" to EMSG and
+ * nonopt_start to -1 before returning -1.)
  */
 int
 getopt_long(int argc, char *const argv[],
@@ -60,21 +60,54 @@ getopt_long(int argc, char *const argv[],
 {
 	static char *place = EMSG;	/* option letter processing */
 	char	   *oli;			/* option letter list index */
+	static int	nonopt_start = -1;
+	static bool posix = false;
+	static bool posix_init = false;
+
+	if (!posix_init)
+	{
+		posix = (getenv("POSIXLY_CORRECT") != NULL);
+		posix_init = true;
+	}
 
 	if (!*place)
 	{							/* update scanning pointer */
 		if (optind >= argc)
 		{
 			place = EMSG;
+			nonopt_start = -1;
 			return -1;
 		}
 
+retry:
 		place = argv[optind];
 
 		if (place[0] != '-')
 		{
-			place = EMSG;
-			return -1;
+			char	  **args = (char **) argv;
+
+			/*
+			 * If POSIXLY_CORRECT is set, we must return -1 as soon as we see
+			 * a non-option. Else, move the non-options to the end and return
+			 * -1 when only non-options remain.
+			 */
+			if (posix || optind == nonopt_start)
+			{
+				place = EMSG;
+				nonopt_start = -1;
+				return -1;
+			}
+
+			for (int i = optind; i < argc - 1; i++)
+				args[i] = args[i + 1];
+			args[argc - 1] = place;
+
+			if (nonopt_start == -1)
+				nonopt_start = argc - 1;
+			else
+				nonopt_start--;
+
+			goto retry;
 		}
 
 		place++;
@@ -83,6 +116,7 @@ getopt_long(int argc, char *const argv[],
 		{
 			/* treat "-" as not being an option */
 			place = EMSG;
+			nonopt_start = -1;
 			return -1;
 		}
 
@@ -91,6 +125,7 @@ getopt_long(int argc, char *const argv[],
 			/* found "--", treat it as end of options */
 			++optind;
 			place = EMSG;
+			nonopt_start = -1;
 			return -1;
 		}
 
-- 
2.25.1

Reply via email to