On 09/09/2024 19:30, Pádraig Brady wrote:
On 06/09/2024 15:06, Bruno Haible wrote:
Hi,

POSIX:2024 specifies that printf(1) should support numbered conversion
specifications:
https://pubs.opengroup.org/onlinepubs/9799919799/utilities/printf.html
https://austingroupbugs.net/view.php?id=1592

Could this support please be added to GNU coreutils? As of coreutils 9.5,
I still get:

    $ /usr/bin/printf 'abc%2$sdef%1$sxxx\n' 1 2
    abc/usr/bin/printf: %2$: invalid conversion specification


This make sense to implement.
I see ksh and FreeBSD at least, already have.
I'll have a look at doing this.

I'll apply the attached sometime tomorrow.

Marking this as done.

cheers,
Pádraig
From 97e55c7ace9e9e46a32faa0d592870983a14367b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Wed, 11 Sep 2024 16:07:48 +0100
Subject: [PATCH] printf: add indexed argument support

* src/printf.c (print_formatted): Add support for %i$ indexed args.
* tests/printf/printf-indexed.sh: Add a new file of test cases.
* tests/local.mk: Reference the new test file.
* doc/coreutils.texi (printf invocation): Mention how mixed
processing of indexed and sequential references are supported,
unlike the printf(2) library function.
* NEWS: Mention the new feature.
These are specified in POSIX:2024.
Addresses https://bugs.gnu.org/73068
---
 NEWS                           |   4 +
 doc/coreutils.texi             |   6 ++
 src/printf.c                   | 179 ++++++++++++++++++++-------------
 tests/local.mk                 |   1 +
 tests/printf/printf-indexed.sh |  93 +++++++++++++++++
 5 files changed, 215 insertions(+), 68 deletions(-)
 create mode 100755 tests/printf/printf-indexed.sh

diff --git a/NEWS b/NEWS
index e1d3f82d1..6094de8d2 100644
--- a/NEWS
+++ b/NEWS
@@ -40,6 +40,10 @@ GNU coreutils NEWS                                    -*- outline -*-
   ls now supports the --sort=name option,
   to explicitly select the default operation of sorting by file name.
 
+  printf now supports indexed arguments, using the POSIX:2024 specified
+  %i$ format, where 'i' is an integer referencing a particular argument,
+  thus allowing repetition or reordering of printf arguments.
+
 ** Improvements
 
   'head -c NUM', 'head -n NUM', 'nl -l NUM', 'nproc --ignore NUM',
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 58b425779..9fe953587 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -13429,6 +13429,12 @@ Missing @var{argument}s are treated as null strings or as zeros,
 depending on whether the context expects a string or a number.  For
 example, the command @samp{printf %sx%d} prints @samp{x0}.
 
+@item
+Indexed arguments referenced with @samp{%i$} formats, can be
+mixed with standard sequential argument references,
+in which case both index types are independent.
+For example, the command @samp{printf '%1$s%s' A} prints @samp{AA},
+
 @item
 @kindex \c
 An additional escape, @samp{\c}, causes @command{printf} to produce no
diff --git a/src/printf.c b/src/printf.c
index de3507925..b1c7dd561 100644
--- a/src/printf.c
+++ b/src/printf.c
@@ -291,15 +291,13 @@ print_esc_string (char const *str)
 }
 
 /* Evaluate a printf conversion specification.  START is the start of
-   the directive, LENGTH is its length, and CONVERSION specifies the
-   type of conversion.  LENGTH does not include any length modifier or
-   the conversion specifier itself.  FIELD_WIDTH and PRECISION are the
-   field width and precision for '*' values, if HAVE_FIELD_WIDTH and
-   HAVE_PRECISION are true, respectively.  ARGUMENT is the argument to
-   be formatted.  */
+   the directive, and CONVERSION specifies the type of conversion.
+   FIELD_WIDTH and PRECISION are the field width and precision for '*'
+   values, if HAVE_FIELD_WIDTH and HAVE_PRECISION are true, respectively.
+   ARGUMENT is the argument to be formatted.  */
 
 static void
-print_direc (char const *start, size_t length, char conversion,
+print_direc (char const *start, char conversion,
              bool have_field_width, int field_width,
              bool have_precision, int precision,
              char const *argument)
@@ -333,6 +331,7 @@ print_direc (char const *start, size_t length, char conversion,
         break;
       }
 
+    size_t length = strlen (start);
     p = xmalloc (length + length_modifier_len + 2);
     q = mempcpy (p, start, length);
     q = mempcpy (q, length_modifier, length_modifier_len);
@@ -448,50 +447,92 @@ print_direc (char const *start, size_t length, char conversion,
 static int
 print_formatted (char const *format, int argc, char **argv)
 {
-  int save_argc = argc;		/* Preserve original value.  */
+
+/* Set curr_arg from indexed %i$ or otherwise next in sequence.
+   POS can be 0,1,2,3 corresponding to
+   [%][width][.precision][conversion] respectively.  */
+
+#define GET_CURR_ARG(POS)				\
+do {							\
+  char *arge;						\
+  intmax_t arg = POS==3 ? 0 : strtoimax (f, &arge, 10);	\
+  if (0 < arg && arg <= INT_MAX && *arge == '$')	\
+    /* Process indexed %i$ format.  */			\
+    /* Note '$' comes before any flags.  */		\
+    {							\
+      SET_CURR_ARG (arg - 1);				\
+      f = arge + 1;					\
+      if (POS == 0)					\
+        direc_arg = arg - 1;				\
+    }							\
+  else							\
+    /* Reset to sequential processing.  */		\
+    {							\
+      if (POS == 0)					\
+        direc_arg = -1;					\
+      else if (POS < 3 || direc_arg == -1)		\
+        SET_CURR_ARG (++curr_s_arg);			\
+      else						\
+        SET_CURR_ARG (direc_arg);			\
+    }							\
+} while (0)						\
+
+#define SET_CURR_ARG(ARG)				\
+do {							\
+  curr_arg = ARG;					\
+  end_arg = MAX (curr_arg, end_arg);			\
+} while (0)						\
+
+  int curr_arg = -1;		/* Current offset.  */
+  int curr_s_arg = -1;		/* Current sequential offset.  */
+  int end_arg = -1;		/* End arg processed.  */
+  int direc_arg = -1;		/* Arg for main directive.  */
   char const *f;		/* Pointer into 'format'.  */
   char const *direc_start;	/* Start of % directive.  */
-  size_t direc_length;		/* Length of % directive.  */
+  char *direc;			/* Generated % directive.  */
+  char *pdirec;			/* Pointer to current end of directive.  */
   bool have_field_width;	/* True if FIELD_WIDTH is valid.  */
   int field_width = 0;		/* Arg to first '*'.  */
   bool have_precision;		/* True if PRECISION is valid.  */
   int precision = 0;		/* Arg to second '*'.  */
   char ok[UCHAR_MAX + 1];	/* ok['x'] is true if %x is allowed.  */
 
+  direc = xmalloc (strlen (format) + 1);
+
   for (f = format; *f; ++f)
     {
       switch (*f)
         {
         case '%':
-          direc_start = f++;
-          direc_length = 1;
+          direc_start = f;
+          pdirec = direc;
+          *pdirec++ = *f++;
           have_field_width = have_precision = false;
           if (*f == '%')
             {
               putchar ('%');
               break;
             }
+
+          GET_CURR_ARG (0);
+
           if (*f == 'b')
             {
               /* FIXME: Field width and precision are not supported
                  for %b, even though POSIX requires it.  */
-              if (argc > 0)
-                {
-                  print_esc_string (*argv);
-                  ++argv;
-                  --argc;
-                }
+              GET_CURR_ARG (3);
+              if (curr_arg < argc)
+                print_esc_string (argv[curr_arg]);
               break;
             }
 
           if (*f == 'q')
             {
-              if (argc > 0)
+              GET_CURR_ARG (3);
+              if (curr_arg < argc)
                 {
-                  fputs (quotearg_style (shell_escape_quoting_style, *argv),
-                         stdout);
-                  ++argv;
-                  --argc;
+                  fputs (quotearg_style (shell_escape_quoting_style,
+                                         argv[curr_arg]), stdout);
                 }
               break;
             }
@@ -501,43 +542,46 @@ print_formatted (char const *format, int argc, char **argv)
             ok['f'] = ok['F'] = ok['g'] = ok['G'] = ok['i'] = ok['o'] =
             ok['s'] = ok['u'] = ok['x'] = ok['X'] = 1;
 
-          for (;; f++, direc_length++)
-            switch (*f)
-              {
+          for (;; f++)
+            {
+              switch (*f)
+                {
 #if (__GLIBC__ == 2 && 2 <= __GLIBC_MINOR__) || 3 <= __GLIBC__
-              case 'I':
+                case 'I':
 #endif
-              case '\'':
-                ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] =
-                  ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0;
-                break;
-              case '-': case '+': case ' ':
-                break;
-              case '#':
-                ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0;
-                break;
-              case '0':
-                ok['c'] = ok['s'] = 0;
-                break;
-              default:
-                goto no_more_flag_characters;
-              }
+                case '\'':
+                  ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] =
+                    ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0;
+                  break;
+                case '-': case '+': case ' ':
+                  break;
+                case '#':
+                  ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0;
+                  break;
+                case '0':
+                  ok['c'] = ok['s'] = 0;
+                  break;
+                default:
+                  goto no_more_flag_characters;
+                }
+              *pdirec++ = *f;
+            }
         no_more_flag_characters:
 
           if (*f == '*')
             {
-              ++f;
-              ++direc_length;
-              if (argc > 0)
+              *pdirec++ = *f++;
+
+              GET_CURR_ARG (1);
+
+              if (curr_arg < argc)
                 {
-                  intmax_t width = vstrtoimax (*argv);
+                  intmax_t width = vstrtoimax (argv[curr_arg]);
                   if (INT_MIN <= width && width <= INT_MAX)
                     field_width = width;
                   else
                     error (EXIT_FAILURE, 0, _("invalid field width: %s"),
-                           quote (*argv));
-                  ++argv;
-                  --argc;
+                           quote (argv[curr_arg]));
                 }
               else
                 field_width = 0;
@@ -545,22 +589,20 @@ print_formatted (char const *format, int argc, char **argv)
             }
           else
             while (ISDIGIT (*f))
-              {
-                ++f;
-                ++direc_length;
-              }
+              *pdirec++ = *f++;
           if (*f == '.')
             {
-              ++f;
-              ++direc_length;
+              *pdirec++ = *f++;
               ok['c'] = 0;
               if (*f == '*')
                 {
-                  ++f;
-                  ++direc_length;
-                  if (argc > 0)
+                  *pdirec++ = *f++;
+
+                  GET_CURR_ARG (2);
+
+                  if (curr_arg < argc)
                     {
-                      intmax_t prec = vstrtoimax (*argv);
+                      intmax_t prec = vstrtoimax (argv[curr_arg]);
                       if (prec < 0)
                         {
                           /* A negative precision is taken as if the
@@ -570,11 +612,9 @@ print_formatted (char const *format, int argc, char **argv)
                         }
                       else if (INT_MAX < prec)
                         error (EXIT_FAILURE, 0, _("invalid precision: %s"),
-                               quote (*argv));
+                               quote (argv[curr_arg]));
                       else
                         precision = prec;
-                      ++argv;
-                      --argc;
                     }
                   else
                     precision = 0;
@@ -582,12 +622,11 @@ print_formatted (char const *format, int argc, char **argv)
                 }
               else
                 while (ISDIGIT (*f))
-                  {
-                    ++f;
-                    ++direc_length;
-                  }
+                  *pdirec++ = *f++;
             }
 
+          *pdirec++ = '\0';
+
           while (*f == 'l' || *f == 'L' || *f == 'h'
                  || *f == 'j' || *f == 't' || *f == 'z')
             ++f;
@@ -601,10 +640,13 @@ print_formatted (char const *format, int argc, char **argv)
                      speclen, direc_start);
           }
 
-          print_direc (direc_start, direc_length, *f,
+          GET_CURR_ARG (3);
+
+          print_direc (direc, *f,
                        have_field_width, field_width,
                        have_precision, precision,
-                       (argc <= 0 ? "" : (argc--, *argv++)));
+                       (argc <= curr_arg ? "" : argv[curr_arg]));
+
           break;
 
         case '\\':
@@ -616,7 +658,8 @@ print_formatted (char const *format, int argc, char **argv)
         }
     }
 
-  return save_argc - argc;
+  free (direc);
+  return MIN (argc, end_arg + 1);
 }
 
 int
diff --git a/tests/local.mk b/tests/local.mk
index fdbf36946..f72353862 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -353,6 +353,7 @@ all_tests =					\
   tests/printf/printf.sh			\
   tests/printf/printf-cov.pl			\
   tests/printf/printf-hex.sh			\
+  tests/printf/printf-indexed.sh		\
   tests/printf/printf-mb.sh			\
   tests/printf/printf-surprise.sh		\
   tests/printf/printf-quote.sh			\
diff --git a/tests/printf/printf-indexed.sh b/tests/printf/printf-indexed.sh
new file mode 100755
index 000000000..1c3a6c380
--- /dev/null
+++ b/tests/printf/printf-indexed.sh
@@ -0,0 +1,93 @@
+#!/bin/sh
+# tests for printf %i$ indexed format processing
+
+# Copyright (C) 2024 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ printf
+i
+getlimits_
+
+prog='env printf'
+
+printf_check() {
+  cat <<EOF > exp || framework_failure_
+$1
+EOF
+
+  shift
+
+  $prog "$@" > out || fail=1
+  compare exp out || fail=1
+}
+
+printf_checki_err() {
+  cat <<EOF > exp || framework_failure_
+$1
+EOF
+
+  shift
+
+  returns_1 $prog "$@" 2> out || fail=1
+  compare exp out || fail=1
+}
+
+NL="
+"
+
+# Reordering
+printf_check '21' '%2$s%1$s\n' 1 2
+
+# Repetition
+printf_check "11${NL}22" '%1$s%1$s\n' 1 2
+
+# Multiple uses of format
+printf_check "A C B${NL}D  " '%s %3$s %s\n' A B C D
+printf_check "   4${NL}1" '%1$*d\n' 4 1
+
+# Mixed indexed and sequential main arg
+printf_check "A B A" '%s %s %1$s\n' A B
+printf_check '   0 1  ' '%100$*d %s %s %s\n' 4 1
+
+# indexed arg, width, and precision
+printf_check ' 01' '%1$*2$.*3$d\n' 1 3 2
+# indexed arg, sequential width, and precision
+printf_check ' 01' '%3$*.*d\n' 3 2 1
+# indexed arg, width, and sequential precision
+printf_check ' 01' '%3$*2$.*d\n' 2 3 1
+# indexed arg, precision, and sequential width
+printf_check ' 01' '%3$*.*2$d\n' 3 2 1
+# Indexed arg, width
+printf_check '   1' '%2$*1$d\n' 4 1
+# Indexed arg, and sequential width
+printf_check '   1' '%2$*d\n' 4 1
+
+# Flags come after $ (0 is not a flag here):
+printf_check '   1' '%01$4d\n' 1
+# Flags come after $ (0 is a flag here):
+printf_check '0001' '%1$0*2$d\n' 1 4
+# Flags come after $ (-2 not taken as a valid index here):
+printf_check_err 'printf: %-2$: invalid conversion specification' \
+                 '%-2$s %1$s\n' A B
+
+# Ensure only base 10 numbers are accepted
+printf_check_err "printf: 'A': expected a numeric value" \
+                 '%0x2$s %1$s\n' A B
+# Verify int limits (avoiding comparisons with argc etc.)
+printf_check_err "printf: %${INT_OFLOW}$: invalid conversion specification" \
+                 "%${INT_OFLOW}$d\n" 1
+
+Exit $fail
-- 
2.46.0

Reply via email to