On 09/09/2024 19:30, Pádraig Brady wrote:
On 06/09/2024 15:06, Bruno Haible wrote:
Hi,
POSIX:2024 specifies that printf(1) should support numbered conversion
specifications:
https://pubs.opengroup.org/onlinepubs/9799919799/utilities/printf.html
https://austingroupbugs.net/view.php?id=1592
Could this support please be added to GNU coreutils? As of coreutils 9.5,
I still get:
$ /usr/bin/printf 'abc%2$sdef%1$sxxx\n' 1 2
abc/usr/bin/printf: %2$: invalid conversion specification
This make sense to implement.
I see ksh and FreeBSD at least, already have.
I'll have a look at doing this.
I'll apply the attached sometime tomorrow.
Marking this as done.
cheers,
Pádraig
From 97e55c7ace9e9e46a32faa0d592870983a14367b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Wed, 11 Sep 2024 16:07:48 +0100
Subject: [PATCH] printf: add indexed argument support
* src/printf.c (print_formatted): Add support for %i$ indexed args.
* tests/printf/printf-indexed.sh: Add a new file of test cases.
* tests/local.mk: Reference the new test file.
* doc/coreutils.texi (printf invocation): Mention how mixed
processing of indexed and sequential references are supported,
unlike the printf(2) library function.
* NEWS: Mention the new feature.
These are specified in POSIX:2024.
Addresses https://bugs.gnu.org/73068
---
NEWS | 4 +
doc/coreutils.texi | 6 ++
src/printf.c | 179 ++++++++++++++++++++-------------
tests/local.mk | 1 +
tests/printf/printf-indexed.sh | 93 +++++++++++++++++
5 files changed, 215 insertions(+), 68 deletions(-)
create mode 100755 tests/printf/printf-indexed.sh
diff --git a/NEWS b/NEWS
index e1d3f82d1..6094de8d2 100644
--- a/NEWS
+++ b/NEWS
@@ -40,6 +40,10 @@ GNU coreutils NEWS -*- outline -*-
ls now supports the --sort=name option,
to explicitly select the default operation of sorting by file name.
+ printf now supports indexed arguments, using the POSIX:2024 specified
+ %i$ format, where 'i' is an integer referencing a particular argument,
+ thus allowing repetition or reordering of printf arguments.
+
** Improvements
'head -c NUM', 'head -n NUM', 'nl -l NUM', 'nproc --ignore NUM',
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 58b425779..9fe953587 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -13429,6 +13429,12 @@ Missing @var{argument}s are treated as null strings or as zeros,
depending on whether the context expects a string or a number. For
example, the command @samp{printf %sx%d} prints @samp{x0}.
+@item
+Indexed arguments referenced with @samp{%i$} formats, can be
+mixed with standard sequential argument references,
+in which case both index types are independent.
+For example, the command @samp{printf '%1$s%s' A} prints @samp{AA},
+
@item
@kindex \c
An additional escape, @samp{\c}, causes @command{printf} to produce no
diff --git a/src/printf.c b/src/printf.c
index de3507925..b1c7dd561 100644
--- a/src/printf.c
+++ b/src/printf.c
@@ -291,15 +291,13 @@ print_esc_string (char const *str)
}
/* Evaluate a printf conversion specification. START is the start of
- the directive, LENGTH is its length, and CONVERSION specifies the
- type of conversion. LENGTH does not include any length modifier or
- the conversion specifier itself. FIELD_WIDTH and PRECISION are the
- field width and precision for '*' values, if HAVE_FIELD_WIDTH and
- HAVE_PRECISION are true, respectively. ARGUMENT is the argument to
- be formatted. */
+ the directive, and CONVERSION specifies the type of conversion.
+ FIELD_WIDTH and PRECISION are the field width and precision for '*'
+ values, if HAVE_FIELD_WIDTH and HAVE_PRECISION are true, respectively.
+ ARGUMENT is the argument to be formatted. */
static void
-print_direc (char const *start, size_t length, char conversion,
+print_direc (char const *start, char conversion,
bool have_field_width, int field_width,
bool have_precision, int precision,
char const *argument)
@@ -333,6 +331,7 @@ print_direc (char const *start, size_t length, char conversion,
break;
}
+ size_t length = strlen (start);
p = xmalloc (length + length_modifier_len + 2);
q = mempcpy (p, start, length);
q = mempcpy (q, length_modifier, length_modifier_len);
@@ -448,50 +447,92 @@ print_direc (char const *start, size_t length, char conversion,
static int
print_formatted (char const *format, int argc, char **argv)
{
- int save_argc = argc; /* Preserve original value. */
+
+/* Set curr_arg from indexed %i$ or otherwise next in sequence.
+ POS can be 0,1,2,3 corresponding to
+ [%][width][.precision][conversion] respectively. */
+
+#define GET_CURR_ARG(POS) \
+do { \
+ char *arge; \
+ intmax_t arg = POS==3 ? 0 : strtoimax (f, &arge, 10); \
+ if (0 < arg && arg <= INT_MAX && *arge == '$') \
+ /* Process indexed %i$ format. */ \
+ /* Note '$' comes before any flags. */ \
+ { \
+ SET_CURR_ARG (arg - 1); \
+ f = arge + 1; \
+ if (POS == 0) \
+ direc_arg = arg - 1; \
+ } \
+ else \
+ /* Reset to sequential processing. */ \
+ { \
+ if (POS == 0) \
+ direc_arg = -1; \
+ else if (POS < 3 || direc_arg == -1) \
+ SET_CURR_ARG (++curr_s_arg); \
+ else \
+ SET_CURR_ARG (direc_arg); \
+ } \
+} while (0) \
+
+#define SET_CURR_ARG(ARG) \
+do { \
+ curr_arg = ARG; \
+ end_arg = MAX (curr_arg, end_arg); \
+} while (0) \
+
+ int curr_arg = -1; /* Current offset. */
+ int curr_s_arg = -1; /* Current sequential offset. */
+ int end_arg = -1; /* End arg processed. */
+ int direc_arg = -1; /* Arg for main directive. */
char const *f; /* Pointer into 'format'. */
char const *direc_start; /* Start of % directive. */
- size_t direc_length; /* Length of % directive. */
+ char *direc; /* Generated % directive. */
+ char *pdirec; /* Pointer to current end of directive. */
bool have_field_width; /* True if FIELD_WIDTH is valid. */
int field_width = 0; /* Arg to first '*'. */
bool have_precision; /* True if PRECISION is valid. */
int precision = 0; /* Arg to second '*'. */
char ok[UCHAR_MAX + 1]; /* ok['x'] is true if %x is allowed. */
+ direc = xmalloc (strlen (format) + 1);
+
for (f = format; *f; ++f)
{
switch (*f)
{
case '%':
- direc_start = f++;
- direc_length = 1;
+ direc_start = f;
+ pdirec = direc;
+ *pdirec++ = *f++;
have_field_width = have_precision = false;
if (*f == '%')
{
putchar ('%');
break;
}
+
+ GET_CURR_ARG (0);
+
if (*f == 'b')
{
/* FIXME: Field width and precision are not supported
for %b, even though POSIX requires it. */
- if (argc > 0)
- {
- print_esc_string (*argv);
- ++argv;
- --argc;
- }
+ GET_CURR_ARG (3);
+ if (curr_arg < argc)
+ print_esc_string (argv[curr_arg]);
break;
}
if (*f == 'q')
{
- if (argc > 0)
+ GET_CURR_ARG (3);
+ if (curr_arg < argc)
{
- fputs (quotearg_style (shell_escape_quoting_style, *argv),
- stdout);
- ++argv;
- --argc;
+ fputs (quotearg_style (shell_escape_quoting_style,
+ argv[curr_arg]), stdout);
}
break;
}
@@ -501,43 +542,46 @@ print_formatted (char const *format, int argc, char **argv)
ok['f'] = ok['F'] = ok['g'] = ok['G'] = ok['i'] = ok['o'] =
ok['s'] = ok['u'] = ok['x'] = ok['X'] = 1;
- for (;; f++, direc_length++)
- switch (*f)
- {
+ for (;; f++)
+ {
+ switch (*f)
+ {
#if (__GLIBC__ == 2 && 2 <= __GLIBC_MINOR__) || 3 <= __GLIBC__
- case 'I':
+ case 'I':
#endif
- case '\'':
- ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] =
- ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0;
- break;
- case '-': case '+': case ' ':
- break;
- case '#':
- ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0;
- break;
- case '0':
- ok['c'] = ok['s'] = 0;
- break;
- default:
- goto no_more_flag_characters;
- }
+ case '\'':
+ ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] =
+ ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0;
+ break;
+ case '-': case '+': case ' ':
+ break;
+ case '#':
+ ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0;
+ break;
+ case '0':
+ ok['c'] = ok['s'] = 0;
+ break;
+ default:
+ goto no_more_flag_characters;
+ }
+ *pdirec++ = *f;
+ }
no_more_flag_characters:
if (*f == '*')
{
- ++f;
- ++direc_length;
- if (argc > 0)
+ *pdirec++ = *f++;
+
+ GET_CURR_ARG (1);
+
+ if (curr_arg < argc)
{
- intmax_t width = vstrtoimax (*argv);
+ intmax_t width = vstrtoimax (argv[curr_arg]);
if (INT_MIN <= width && width <= INT_MAX)
field_width = width;
else
error (EXIT_FAILURE, 0, _("invalid field width: %s"),
- quote (*argv));
- ++argv;
- --argc;
+ quote (argv[curr_arg]));
}
else
field_width = 0;
@@ -545,22 +589,20 @@ print_formatted (char const *format, int argc, char **argv)
}
else
while (ISDIGIT (*f))
- {
- ++f;
- ++direc_length;
- }
+ *pdirec++ = *f++;
if (*f == '.')
{
- ++f;
- ++direc_length;
+ *pdirec++ = *f++;
ok['c'] = 0;
if (*f == '*')
{
- ++f;
- ++direc_length;
- if (argc > 0)
+ *pdirec++ = *f++;
+
+ GET_CURR_ARG (2);
+
+ if (curr_arg < argc)
{
- intmax_t prec = vstrtoimax (*argv);
+ intmax_t prec = vstrtoimax (argv[curr_arg]);
if (prec < 0)
{
/* A negative precision is taken as if the
@@ -570,11 +612,9 @@ print_formatted (char const *format, int argc, char **argv)
}
else if (INT_MAX < prec)
error (EXIT_FAILURE, 0, _("invalid precision: %s"),
- quote (*argv));
+ quote (argv[curr_arg]));
else
precision = prec;
- ++argv;
- --argc;
}
else
precision = 0;
@@ -582,12 +622,11 @@ print_formatted (char const *format, int argc, char **argv)
}
else
while (ISDIGIT (*f))
- {
- ++f;
- ++direc_length;
- }
+ *pdirec++ = *f++;
}
+ *pdirec++ = '\0';
+
while (*f == 'l' || *f == 'L' || *f == 'h'
|| *f == 'j' || *f == 't' || *f == 'z')
++f;
@@ -601,10 +640,13 @@ print_formatted (char const *format, int argc, char **argv)
speclen, direc_start);
}
- print_direc (direc_start, direc_length, *f,
+ GET_CURR_ARG (3);
+
+ print_direc (direc, *f,
have_field_width, field_width,
have_precision, precision,
- (argc <= 0 ? "" : (argc--, *argv++)));
+ (argc <= curr_arg ? "" : argv[curr_arg]));
+
break;
case '\\':
@@ -616,7 +658,8 @@ print_formatted (char const *format, int argc, char **argv)
}
}
- return save_argc - argc;
+ free (direc);
+ return MIN (argc, end_arg + 1);
}
int
diff --git a/tests/local.mk b/tests/local.mk
index fdbf36946..f72353862 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -353,6 +353,7 @@ all_tests = \
tests/printf/printf.sh \
tests/printf/printf-cov.pl \
tests/printf/printf-hex.sh \
+ tests/printf/printf-indexed.sh \
tests/printf/printf-mb.sh \
tests/printf/printf-surprise.sh \
tests/printf/printf-quote.sh \
diff --git a/tests/printf/printf-indexed.sh b/tests/printf/printf-indexed.sh
new file mode 100755
index 000000000..1c3a6c380
--- /dev/null
+++ b/tests/printf/printf-indexed.sh
@@ -0,0 +1,93 @@
+#!/bin/sh
+# tests for printf %i$ indexed format processing
+
+# Copyright (C) 2024 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ printf
+i
+getlimits_
+
+prog='env printf'
+
+printf_check() {
+ cat <<EOF > exp || framework_failure_
+$1
+EOF
+
+ shift
+
+ $prog "$@" > out || fail=1
+ compare exp out || fail=1
+}
+
+printf_checki_err() {
+ cat <<EOF > exp || framework_failure_
+$1
+EOF
+
+ shift
+
+ returns_1 $prog "$@" 2> out || fail=1
+ compare exp out || fail=1
+}
+
+NL="
+"
+
+# Reordering
+printf_check '21' '%2$s%1$s\n' 1 2
+
+# Repetition
+printf_check "11${NL}22" '%1$s%1$s\n' 1 2
+
+# Multiple uses of format
+printf_check "A C B${NL}D " '%s %3$s %s\n' A B C D
+printf_check " 4${NL}1" '%1$*d\n' 4 1
+
+# Mixed indexed and sequential main arg
+printf_check "A B A" '%s %s %1$s\n' A B
+printf_check ' 0 1 ' '%100$*d %s %s %s\n' 4 1
+
+# indexed arg, width, and precision
+printf_check ' 01' '%1$*2$.*3$d\n' 1 3 2
+# indexed arg, sequential width, and precision
+printf_check ' 01' '%3$*.*d\n' 3 2 1
+# indexed arg, width, and sequential precision
+printf_check ' 01' '%3$*2$.*d\n' 2 3 1
+# indexed arg, precision, and sequential width
+printf_check ' 01' '%3$*.*2$d\n' 3 2 1
+# Indexed arg, width
+printf_check ' 1' '%2$*1$d\n' 4 1
+# Indexed arg, and sequential width
+printf_check ' 1' '%2$*d\n' 4 1
+
+# Flags come after $ (0 is not a flag here):
+printf_check ' 1' '%01$4d\n' 1
+# Flags come after $ (0 is a flag here):
+printf_check '0001' '%1$0*2$d\n' 1 4
+# Flags come after $ (-2 not taken as a valid index here):
+printf_check_err 'printf: %-2$: invalid conversion specification' \
+ '%-2$s %1$s\n' A B
+
+# Ensure only base 10 numbers are accepted
+printf_check_err "printf: 'A': expected a numeric value" \
+ '%0x2$s %1$s\n' A B
+# Verify int limits (avoiding comparisons with argc etc.)
+printf_check_err "printf: %${INT_OFLOW}$: invalid conversion specification" \
+ "%${INT_OFLOW}$d\n" 1
+
+Exit $fail
--
2.46.0