The branch main has been updated by imp:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=7b9c912c41f484b0fe75b30fbac465cc984e56b2

commit 7b9c912c41f484b0fe75b30fbac465cc984e56b2
Author:     Warner Losh <i...@freebsd.org>
AuthorDate: 2025-07-25 16:30:06 +0000
Commit:     Warner Losh <i...@freebsd.org>
CommitDate: 2025-07-25 16:34:37 +0000

    find: Add GNU find's -printf
    
    Implements most of gnu find's -printf predicate. However, the '#', '-',
    '.' and size format modifiers are unimplemented, as are %P, %H, %F, %y,
    and %Y formats. Follows what I think it should do based on the info
    page, I've not looked at the gnu find code.
    
    Sponsored by:           Netflix
    Discussed with:         des, jilles
    Reviewed by:            paua...@gundo.com (man)
    Differential Revision: https://reviews.freebsd.org/D38138
---
 usr.bin/find/Makefile   |   2 +-
 usr.bin/find/extern.h   |   4 +
 usr.bin/find/find.1     |  84 +++++++++++++
 usr.bin/find/find.h     |   2 +
 usr.bin/find/function.c |  27 +++++
 usr.bin/find/option.c   |   2 +-
 usr.bin/find/printf.c   | 307 ++++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 426 insertions(+), 2 deletions(-)

diff --git a/usr.bin/find/Makefile b/usr.bin/find/Makefile
index 904c08620833..48b164133bb0 100644
--- a/usr.bin/find/Makefile
+++ b/usr.bin/find/Makefile
@@ -3,7 +3,7 @@
 PACKAGE=       runtime
 
 PROG=  find
-SRCS=  find.c function.c ls.c main.c misc.c operator.c option.c \
+SRCS=  find.c function.c ls.c main.c misc.c operator.c option.c printf.c \
        getdate.y
 YFLAGS=
 CFLAGS.clang+= -Werror=undef
diff --git a/usr.bin/find/extern.h b/usr.bin/find/extern.h
index feb2e0202056..6760ac24fb04 100644
--- a/usr.bin/find/extern.h
+++ b/usr.bin/find/extern.h
@@ -44,6 +44,8 @@ void   printlong(char *, char *, struct stat *);
 int     queryuser(char **);
 OPTION *lookup_option(const char *);
 void    finish_execplus(void);
+void    do_printf(PLAN *plan, FTSENT *entry, FILE *fout);
+
 
 creat_f        c_Xmin;
 creat_f        c_Xtime;
@@ -68,6 +70,7 @@ creat_f       c_nogroup;
 creat_f        c_nouser;
 creat_f        c_perm;
 creat_f        c_print;
+creat_f        c_printf;
 creat_f        c_regex;
 creat_f        c_samefile;
 creat_f        c_simple;
@@ -106,6 +109,7 @@ exec_f      f_path;
 exec_f f_perm;
 exec_f f_print;
 exec_f f_print0;
+exec_f f_printf;
 exec_f f_prune;
 exec_f f_quit;
 exec_f f_readable;
diff --git a/usr.bin/find/find.1 b/usr.bin/find/find.1
index 8c2d8624a82a..1217d9151168 100644
--- a/usr.bin/find/find.1
+++ b/usr.bin/find/find.1
@@ -821,6 +821,17 @@ It prints the pathname of the current file to standard 
output, followed by an
 ASCII
 .Dv NUL
 character (character code 0).
+.It Ic -printf Ar fmt
+This primary always evaluates to true.
+It prints information about the file, interpreting
+.Sq \
+and
+.Sq %
+escape sequences as described in the PRINTF FORMATS section.
+Unlike
+.Ic -print ,
+.Ic -printf
+does not add a newline automatically.
 .It Ic -prune
 This primary always evaluates to true.
 It causes
@@ -993,6 +1004,79 @@ All operands and primaries must be separate arguments to
 Primaries which themselves take arguments expect each argument
 to be a separate argument to
 .Nm .
+.Sh PRINTF FORMATS
+The following
+.Sq \e
+escapes are recognized:
+.Bl -tag -width Ds -offset indent -compact
+.It Cm \ea
+Write a <bell> character.
+.It Cm \eb
+Write a <backspace> character.
+.It Cm \ec
+Writes no characters, but terminates the string and flushes the output so far
+after each match.
+.It Cm \ef
+Write a <form-feed> character.
+.It Cm \en
+Write a <new-line> character.
+.It Cm \er
+Write a <carriage return> character.
+.It Cm \et
+Write a <tab> character.
+.It Cm \ev
+Write a <vertical tab> character.
+.It Cm \e\'
+Write a <single quote> character.
+.It Cm \e\e
+Write a backslash character.
+.It Cm \e Ns Ar num
+Write a byte whose
+value is the 1-, 2-, or 3-digit
+octal number
+.Ar num .
+Multibyte characters can be constructed using multiple
+.Cm \e Ns Ar num
+sequences.
+.El
+.Pp
+Each format specification is introduced by the percent character
+(``%'').
+The remainder of the format specification includes,
+in the following order:
+.Bl -tag -width Ds
+.It "Zero or more of the following flags:"
+.Bl -tag -width Ds
+.It Cm #
+A `#' character, has no effect on almost all formats.
+It is not yet implemented.
+.It Cm \&\-
+A minus sign `\-' which specifies
+.Em left adjustment
+of the output in the indicated field;
+It is not yet implemented.
+.It "Field Width:"
+An optional digit string specifying a
+.Em field width ;
+if the output string has fewer bytes than the field width it will
+be blank-padded on the left (or right, if the left-adjustment indicator
+has been given) to make up the field width (note that a leading zero
+is a flag, but an embedded zero is part of a field width);
+It is not yet implemented.
+.It Precision:
+An optional period,
+.Sq Cm \&.\& ,
+followed by an optional digit string giving a
+.Em precision
+which specifies the maximum number of bytes to be printed
+from a string; if the digit string is missing, the precision is treated
+as zero;
+It is not yet implemented.
+.It Format:
+One or two characters, described below, which indicates the information to 
display.
+XXX need to write this.
+.El
+.El
 .Sh ENVIRONMENT
 The
 .Ev LANG , LC_ALL , LC_COLLATE , LC_CTYPE , LC_MESSAGES
diff --git a/usr.bin/find/find.h b/usr.bin/find/find.h
index 1664eeb9a93f..2ddb70fd7bcc 100644
--- a/usr.bin/find/find.h
+++ b/usr.bin/find/find.h
@@ -97,6 +97,8 @@ typedef       struct _plandata *creat_f(struct _option *, 
char ***);
 #define        F_TIME2_B       0x00080000      /* one of -newer?B */
 #endif
 #define F_LINK         0x00100000      /* lname or ilname */
+/* Notes about execution */
+#define F_HAS_WARNED   0x10000000      /* Has issued a warning for maybe bad 
input */
 
 /* node definition */
 typedef struct _plandata {
diff --git a/usr.bin/find/function.c b/usr.bin/find/function.c
index ef610903cc00..21dfab8fe408 100644
--- a/usr.bin/find/function.c
+++ b/usr.bin/find/function.c
@@ -1388,6 +1388,33 @@ f_print0(PLAN *plan __unused, FTSENT *entry)
 
 /* c_print0 is the same as c_print */
 
+/*
+ * -printf functions --
+ *
+ *     Always true. Causes information as specified in the
+ *     argument to be written to standard output.
+ */
+int
+f_printf(PLAN *plan, FTSENT *entry)
+{
+       do_printf(plan, entry, stdout);
+       return 1;
+}
+
+PLAN *
+c_printf(OPTION *option, char ***argvp)
+{
+       PLAN *new;
+
+       isoutput = 1;
+       ftsoptions &= ~FTS_NOSTAT;
+
+       new = palloc(option);
+       new->c_data = nextarg(option, argvp);
+
+       return (new);
+}
+
 /*
  * -prune functions --
  *
diff --git a/usr.bin/find/option.c b/usr.bin/find/option.c
index 268803343a8d..79fa581e79f5 100644
--- a/usr.bin/find/option.c
+++ b/usr.bin/find/option.c
@@ -148,7 +148,7 @@ static OPTION const options[] = {
        { "-perm",      c_perm,         f_perm,         0 },
        { "-print",     c_print,        f_print,        0 },
        { "-print0",    c_print,        f_print0,       0 },
-// -printf
+       { "-printf",    c_printf,       f_printf,       0 },
        { "-prune",     c_simple,       f_prune,        0 },
        { "-quit",      c_simple,       f_quit,         0 },
        { "-readable",  c_simple,       f_readable,     0 },
diff --git a/usr.bin/find/printf.c b/usr.bin/find/printf.c
new file mode 100644
index 000000000000..643f04b9cef5
--- /dev/null
+++ b/usr.bin/find/printf.c
@@ -0,0 +1,307 @@
+/*-
+ * Copyright (c) 2023, Netflix, Inc
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <sys/types.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <err.h>
+#include <fts.h>
+#include <grp.h>
+#include <pwd.h>
+#include <time.h>
+
+#include "find.h"
+
+/* translate \X to proper escape, or to itself if no special meaning */
+static const char *esc = "\a\bcde\fghijklm\nopq\rs\tu\v";
+
+static inline bool
+isoct(char c)
+{
+       return (c >= '0' && c <= '7');
+}
+
+static inline bool
+isesc(char c)
+{
+       return (c >= 'a' && c <= 'v' && esc[c - 'a'] != c);
+}
+
+static const char *
+escape(const char *str, bool *flush, bool *warned)
+{
+       char c;
+       int value;
+       char *tmpstr;
+       size_t tmplen;
+       FILE *fp;
+
+       fp = open_memstream(&tmpstr, &tmplen);
+
+       /*
+        * Copy the str string into a new struct sbuf and return that expanding
+        * the different ANSI escape sequences.
+        */
+       *flush = false;
+       for (c = *str++; c; c = *str++) {
+               if (c != '\\') {
+                       putc(c, fp);
+                       continue;
+               }
+               c = *str++;
+
+               /*
+                * User error \ at end of string
+                */
+               if (c == '\0') {
+                       putc('\\', fp);
+                       break;
+               }
+
+               /*
+                * \c terminates output now and is supposed to flush the output
+                * too...
+                */
+               if (c == 'c') {
+                       *flush = true;
+                       break;
+               }
+
+               /*
+                * Is it octal? If so, decode up to 3 octal characters.
+                */
+               if (isoct(c)) {
+                       value = 0;
+                       for (int i = 3; i-- > 0 && isoct(c);
+                            c = *str++) {
+                               value <<= 3;
+                               value += c - '0';
+                       }
+                       str--;
+                       putc((char)value, fp);
+                       continue;
+               }
+
+               /*
+                * It's an ANSI X3.159-1989 escape, use the mini-escape lookup
+                * table to translate.
+                */
+               if (isesc(c)) {
+                       putc(esc[c - 'a'], fp);
+                       continue;
+               }
+
+               /*
+                * Otherwise, it's self inserting. gnu find specifically says
+                * not to rely on this behavior though. gnu find will issue
+                * a warning here, while printf(1) won't.
+                */
+               if (!*warned) {
+                       warn("Unknown character %c after \\.", c);
+                       *warned = true;
+               }
+               putc(c, fp);
+       }
+       fclose(fp);
+
+       return (tmpstr);
+}
+
+static void
+fp_ctime(FILE *fp, time_t t)
+{
+       char s[26];
+
+       ctime_r(&t, s);
+       s[24] = '\0';   /* kill newline, though gnu find info silent on issue */
+       fputs(s, fp);
+}
+
+/*
+ * Assumes all times are displayed in UTC rather than local time, gnu find info
+ * page silent on the issue.
+ *
+ * Also assumes that gnu find doesn't support multiple character escape 
sequences,
+ * which it's info page is also silent on.
+ */
+static void
+fp_strftime(FILE *fp, time_t t, char mod)
+{
+       struct tm tm;
+       char buffer[128];
+       char fmt[3] = "% ";
+
+       /*
+        * Gnu libc extension we don't yet support -- seconds since epoch
+        * Used in Linux kernel build, so we kinda have to support it here
+        */
+       if (mod == '@') {
+               fprintf(fp, "%ju", (uintmax_t)t);
+               return;
+       }
+
+       gmtime_r(&t, &tm);
+       fmt[1] = mod;
+       printf("fmt is '%s'\n", fmt);
+       if (strftime(buffer, sizeof(buffer), fmt, &tm) == 0)
+               errx(1, "Format bad or data too long for buffer"); /* Can't 
really happen ??? */
+       fputs(buffer, fp);
+}
+
+void
+do_printf(PLAN *plan, FTSENT *entry, FILE *fout)
+{
+       const char *fmt, *path, *pend, *all;
+       char c;
+       FILE *fp;
+       bool flush, warned;
+       struct stat *sb;
+       char *tmp;
+       size_t tmplen;
+
+       fp = open_memstream(&tmp, &tmplen);
+       warned = (plan->flags & F_HAS_WARNED) != 0;
+       all = fmt = escape(plan->c_data, &flush, &warned);
+       if (warned)
+               plan->flags |= F_HAS_WARNED;
+       sb = entry->fts_statp;
+       for (c = *fmt++; c; c = *fmt++) {
+               if (c != '%') {
+                       putc(c, fp);
+                       continue;
+               }
+               c = *fmt++;
+               /* Style(9) deviation: case order same as gnu find info doc */
+               switch (c) {
+               case '%':
+                       putc(c, fp);
+                       break;
+               case 'p': /* Path to file */
+                       fputs(entry->fts_path, fp);
+                       break;
+               case 'f': /* filename w/o dirs */
+                       fputs(entry->fts_name, fp);
+                       break;
+               case 'h':
+                       /*
+                        * path, relative to the starting point, of the file, or
+                        * '.' if that's empty for some reason.
+                        */
+                       path = entry->fts_path;
+                       pend = strrchr(path, '/');
+                       if (pend == NULL)
+                               putc('.', fp);
+                       else {
+                               char *t = malloc(pend - path + 1);
+                               memcpy(t, path, pend - path);
+                               t[pend - path] = '\0';
+                               fputs(t, fp);
+                               free(t);
+                       }
+                       break;
+               case 'P': /* file with command line arg rm'd -- HOW? 
fts_parent? */
+                       errx(1, "%%%c is unimplemented", c);
+               case 'H': /* Command line arg -- HOW? */
+                       errx(1, "%%%c is unimplemented", c);
+               case 'g': /* gid human readable */
+                       fputs(group_from_gid(sb->st_gid, 0), fp);
+                       break;
+               case 'G': /* gid numeric */
+                       fprintf(fp, "%d", sb->st_gid);
+                       break;
+               case 'u': /* uid human readable */
+                       fputs(user_from_uid(sb->st_uid, 0), fp);
+                       break;
+               case 'U': /* uid numeric */
+                       fprintf(fp, "%d", sb->st_uid);
+                       break;
+               case 'm': /* mode in octal */
+                       fprintf(fp, "%o", sb->st_mode & 07777);
+                       break;
+               case 'M': { /* Mode in ls-standard form */
+                       char mode[12];
+                       strmode(sb->st_mode, mode);
+                       fputs(mode, fp);
+                       break;
+               }
+               case 'k': /* kbytes used by file */
+                       fprintf(fp, "%ld", sb->st_blocks / 2);
+                       break;
+               case 'b': /* blocks used by file */
+                       fprintf(fp, "%ld", sb->st_blocks);
+                       break;
+               case 's': /* size in bytes of file */
+                       fprintf(fp, "%zu", sb->st_size);
+                       break;
+               case 'S': /* sparseness of file */
+                       fprintf(fp, "%3.1f",
+                           (float)sb->st_blocks * 512 / (float)sb->st_size);
+                       break;
+               case 'd': /* Depth in tree */
+                       fprintf(fp, "%ld", entry->fts_level);
+                       break;
+               case 'D': /* device number */
+                       fprintf(fp, "%lu", sb->st_dev);
+                       break;
+               case 'F': /* Filesystem type */
+                       errx(1, "%%%c is unimplemented", c);
+               case 'l': /* object of symbolic link */
+                       fprintf(fp, "%s", entry->fts_accpath);
+                       break;
+               case 'i': /* inode # */
+                       fprintf(fp, "%lu", sb->st_ino);
+                       break;
+               case 'n': /* number of hard links */
+                       fprintf(fp, "%lu", sb->st_nlink);
+                       break;
+               case 'y': /* -type of file, incl 'l' */
+                       errx(1, "%%%c is unimplemented", c);
+               case 'Y': /* -type of file, following 'l' types L loop ? error 
*/
+                       errx(1, "%%%c is unimplemented", c);
+               case 'a': /* access time ctime */
+                       fp_ctime(fp, sb->st_atime);
+                       break;
+               case 'A': /* access time with next char strftime format */
+                       fp_strftime(fp, sb->st_atime, *fmt++);
+                       break;
+               case 'B': /* birth time with next char strftime format */
+#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME
+                       if (sb->st_birthtime != 0)
+                               fp_strftime(fp, sb->st_birthtime, *fmt);
+#endif
+                       fmt++;
+                       break;  /* blank on systems that don't support it */
+               case 'c': /* status change time ctime */
+                       fp_ctime(fp, sb->st_ctime);
+                       break;
+               case 'C': /* statuc change time with next char strftime format 
*/
+                       fp_strftime(fp, sb->st_ctime, *fmt++);
+                       break;
+               case 't': /* modification change time ctime */
+                       fp_ctime(fp, sb->st_mtime);
+                       break;
+               case 'T': /* modification time with next char strftime format */
+                       fp_strftime(fp, sb->st_mtime, *fmt++);
+                       break;
+               case 'Z': /* empty string for compat SELinux context string */
+                       break;
+               /* Modifier parsing here, but also need to modify above somehow 
*/
+               case '#': case '-': case '0': case '1': case '2': case '3': 
case '4':
+               case '5': case '6': case '7': case '8': case '9': case '.':
+                       errx(1, "Format modifier %c not yet supported: '%s'", 
c, all);
+               /* Any FeeeBSD-specific modifications here -- none yet */
+               default:
+                       errx(1, "Unknown format %c '%s'", c, all);
+               }
+       }
+       fputs(tmp, fout);
+       if (flush)
+               fflush(fout);
+       free(__DECONST(char *, fmt));
+       free(tmp);
+}

Reply via email to