The branch main has been updated by imp: URL: https://cgit.FreeBSD.org/src/commit/?id=7b9c912c41f484b0fe75b30fbac465cc984e56b2
commit 7b9c912c41f484b0fe75b30fbac465cc984e56b2 Author: Warner Losh <i...@freebsd.org> AuthorDate: 2025-07-25 16:30:06 +0000 Commit: Warner Losh <i...@freebsd.org> CommitDate: 2025-07-25 16:34:37 +0000 find: Add GNU find's -printf Implements most of gnu find's -printf predicate. However, the '#', '-', '.' and size format modifiers are unimplemented, as are %P, %H, %F, %y, and %Y formats. Follows what I think it should do based on the info page, I've not looked at the gnu find code. Sponsored by: Netflix Discussed with: des, jilles Reviewed by: paua...@gundo.com (man) Differential Revision: https://reviews.freebsd.org/D38138 --- usr.bin/find/Makefile | 2 +- usr.bin/find/extern.h | 4 + usr.bin/find/find.1 | 84 +++++++++++++ usr.bin/find/find.h | 2 + usr.bin/find/function.c | 27 +++++ usr.bin/find/option.c | 2 +- usr.bin/find/printf.c | 307 ++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 426 insertions(+), 2 deletions(-) diff --git a/usr.bin/find/Makefile b/usr.bin/find/Makefile index 904c08620833..48b164133bb0 100644 --- a/usr.bin/find/Makefile +++ b/usr.bin/find/Makefile @@ -3,7 +3,7 @@ PACKAGE= runtime PROG= find -SRCS= find.c function.c ls.c main.c misc.c operator.c option.c \ +SRCS= find.c function.c ls.c main.c misc.c operator.c option.c printf.c \ getdate.y YFLAGS= CFLAGS.clang+= -Werror=undef diff --git a/usr.bin/find/extern.h b/usr.bin/find/extern.h index feb2e0202056..6760ac24fb04 100644 --- a/usr.bin/find/extern.h +++ b/usr.bin/find/extern.h @@ -44,6 +44,8 @@ void printlong(char *, char *, struct stat *); int queryuser(char **); OPTION *lookup_option(const char *); void finish_execplus(void); +void do_printf(PLAN *plan, FTSENT *entry, FILE *fout); + creat_f c_Xmin; creat_f c_Xtime; @@ -68,6 +70,7 @@ creat_f c_nogroup; creat_f c_nouser; creat_f c_perm; creat_f c_print; +creat_f c_printf; creat_f c_regex; creat_f c_samefile; creat_f c_simple; @@ -106,6 +109,7 @@ exec_f f_path; exec_f f_perm; exec_f f_print; exec_f f_print0; +exec_f f_printf; exec_f f_prune; exec_f f_quit; exec_f f_readable; diff --git a/usr.bin/find/find.1 b/usr.bin/find/find.1 index 8c2d8624a82a..1217d9151168 100644 --- a/usr.bin/find/find.1 +++ b/usr.bin/find/find.1 @@ -821,6 +821,17 @@ It prints the pathname of the current file to standard output, followed by an ASCII .Dv NUL character (character code 0). +.It Ic -printf Ar fmt +This primary always evaluates to true. +It prints information about the file, interpreting +.Sq \ +and +.Sq % +escape sequences as described in the PRINTF FORMATS section. +Unlike +.Ic -print , +.Ic -printf +does not add a newline automatically. .It Ic -prune This primary always evaluates to true. It causes @@ -993,6 +1004,79 @@ All operands and primaries must be separate arguments to Primaries which themselves take arguments expect each argument to be a separate argument to .Nm . +.Sh PRINTF FORMATS +The following +.Sq \e +escapes are recognized: +.Bl -tag -width Ds -offset indent -compact +.It Cm \ea +Write a <bell> character. +.It Cm \eb +Write a <backspace> character. +.It Cm \ec +Writes no characters, but terminates the string and flushes the output so far +after each match. +.It Cm \ef +Write a <form-feed> character. +.It Cm \en +Write a <new-line> character. +.It Cm \er +Write a <carriage return> character. +.It Cm \et +Write a <tab> character. +.It Cm \ev +Write a <vertical tab> character. +.It Cm \e\' +Write a <single quote> character. +.It Cm \e\e +Write a backslash character. +.It Cm \e Ns Ar num +Write a byte whose +value is the 1-, 2-, or 3-digit +octal number +.Ar num . +Multibyte characters can be constructed using multiple +.Cm \e Ns Ar num +sequences. +.El +.Pp +Each format specification is introduced by the percent character +(``%''). +The remainder of the format specification includes, +in the following order: +.Bl -tag -width Ds +.It "Zero or more of the following flags:" +.Bl -tag -width Ds +.It Cm # +A `#' character, has no effect on almost all formats. +It is not yet implemented. +.It Cm \&\- +A minus sign `\-' which specifies +.Em left adjustment +of the output in the indicated field; +It is not yet implemented. +.It "Field Width:" +An optional digit string specifying a +.Em field width ; +if the output string has fewer bytes than the field width it will +be blank-padded on the left (or right, if the left-adjustment indicator +has been given) to make up the field width (note that a leading zero +is a flag, but an embedded zero is part of a field width); +It is not yet implemented. +.It Precision: +An optional period, +.Sq Cm \&.\& , +followed by an optional digit string giving a +.Em precision +which specifies the maximum number of bytes to be printed +from a string; if the digit string is missing, the precision is treated +as zero; +It is not yet implemented. +.It Format: +One or two characters, described below, which indicates the information to display. +XXX need to write this. +.El +.El .Sh ENVIRONMENT The .Ev LANG , LC_ALL , LC_COLLATE , LC_CTYPE , LC_MESSAGES diff --git a/usr.bin/find/find.h b/usr.bin/find/find.h index 1664eeb9a93f..2ddb70fd7bcc 100644 --- a/usr.bin/find/find.h +++ b/usr.bin/find/find.h @@ -97,6 +97,8 @@ typedef struct _plandata *creat_f(struct _option *, char ***); #define F_TIME2_B 0x00080000 /* one of -newer?B */ #endif #define F_LINK 0x00100000 /* lname or ilname */ +/* Notes about execution */ +#define F_HAS_WARNED 0x10000000 /* Has issued a warning for maybe bad input */ /* node definition */ typedef struct _plandata { diff --git a/usr.bin/find/function.c b/usr.bin/find/function.c index ef610903cc00..21dfab8fe408 100644 --- a/usr.bin/find/function.c +++ b/usr.bin/find/function.c @@ -1388,6 +1388,33 @@ f_print0(PLAN *plan __unused, FTSENT *entry) /* c_print0 is the same as c_print */ +/* + * -printf functions -- + * + * Always true. Causes information as specified in the + * argument to be written to standard output. + */ +int +f_printf(PLAN *plan, FTSENT *entry) +{ + do_printf(plan, entry, stdout); + return 1; +} + +PLAN * +c_printf(OPTION *option, char ***argvp) +{ + PLAN *new; + + isoutput = 1; + ftsoptions &= ~FTS_NOSTAT; + + new = palloc(option); + new->c_data = nextarg(option, argvp); + + return (new); +} + /* * -prune functions -- * diff --git a/usr.bin/find/option.c b/usr.bin/find/option.c index 268803343a8d..79fa581e79f5 100644 --- a/usr.bin/find/option.c +++ b/usr.bin/find/option.c @@ -148,7 +148,7 @@ static OPTION const options[] = { { "-perm", c_perm, f_perm, 0 }, { "-print", c_print, f_print, 0 }, { "-print0", c_print, f_print0, 0 }, -// -printf + { "-printf", c_printf, f_printf, 0 }, { "-prune", c_simple, f_prune, 0 }, { "-quit", c_simple, f_quit, 0 }, { "-readable", c_simple, f_readable, 0 }, diff --git a/usr.bin/find/printf.c b/usr.bin/find/printf.c new file mode 100644 index 000000000000..643f04b9cef5 --- /dev/null +++ b/usr.bin/find/printf.c @@ -0,0 +1,307 @@ +/*- + * Copyright (c) 2023, Netflix, Inc + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <sys/types.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <err.h> +#include <fts.h> +#include <grp.h> +#include <pwd.h> +#include <time.h> + +#include "find.h" + +/* translate \X to proper escape, or to itself if no special meaning */ +static const char *esc = "\a\bcde\fghijklm\nopq\rs\tu\v"; + +static inline bool +isoct(char c) +{ + return (c >= '0' && c <= '7'); +} + +static inline bool +isesc(char c) +{ + return (c >= 'a' && c <= 'v' && esc[c - 'a'] != c); +} + +static const char * +escape(const char *str, bool *flush, bool *warned) +{ + char c; + int value; + char *tmpstr; + size_t tmplen; + FILE *fp; + + fp = open_memstream(&tmpstr, &tmplen); + + /* + * Copy the str string into a new struct sbuf and return that expanding + * the different ANSI escape sequences. + */ + *flush = false; + for (c = *str++; c; c = *str++) { + if (c != '\\') { + putc(c, fp); + continue; + } + c = *str++; + + /* + * User error \ at end of string + */ + if (c == '\0') { + putc('\\', fp); + break; + } + + /* + * \c terminates output now and is supposed to flush the output + * too... + */ + if (c == 'c') { + *flush = true; + break; + } + + /* + * Is it octal? If so, decode up to 3 octal characters. + */ + if (isoct(c)) { + value = 0; + for (int i = 3; i-- > 0 && isoct(c); + c = *str++) { + value <<= 3; + value += c - '0'; + } + str--; + putc((char)value, fp); + continue; + } + + /* + * It's an ANSI X3.159-1989 escape, use the mini-escape lookup + * table to translate. + */ + if (isesc(c)) { + putc(esc[c - 'a'], fp); + continue; + } + + /* + * Otherwise, it's self inserting. gnu find specifically says + * not to rely on this behavior though. gnu find will issue + * a warning here, while printf(1) won't. + */ + if (!*warned) { + warn("Unknown character %c after \\.", c); + *warned = true; + } + putc(c, fp); + } + fclose(fp); + + return (tmpstr); +} + +static void +fp_ctime(FILE *fp, time_t t) +{ + char s[26]; + + ctime_r(&t, s); + s[24] = '\0'; /* kill newline, though gnu find info silent on issue */ + fputs(s, fp); +} + +/* + * Assumes all times are displayed in UTC rather than local time, gnu find info + * page silent on the issue. + * + * Also assumes that gnu find doesn't support multiple character escape sequences, + * which it's info page is also silent on. + */ +static void +fp_strftime(FILE *fp, time_t t, char mod) +{ + struct tm tm; + char buffer[128]; + char fmt[3] = "% "; + + /* + * Gnu libc extension we don't yet support -- seconds since epoch + * Used in Linux kernel build, so we kinda have to support it here + */ + if (mod == '@') { + fprintf(fp, "%ju", (uintmax_t)t); + return; + } + + gmtime_r(&t, &tm); + fmt[1] = mod; + printf("fmt is '%s'\n", fmt); + if (strftime(buffer, sizeof(buffer), fmt, &tm) == 0) + errx(1, "Format bad or data too long for buffer"); /* Can't really happen ??? */ + fputs(buffer, fp); +} + +void +do_printf(PLAN *plan, FTSENT *entry, FILE *fout) +{ + const char *fmt, *path, *pend, *all; + char c; + FILE *fp; + bool flush, warned; + struct stat *sb; + char *tmp; + size_t tmplen; + + fp = open_memstream(&tmp, &tmplen); + warned = (plan->flags & F_HAS_WARNED) != 0; + all = fmt = escape(plan->c_data, &flush, &warned); + if (warned) + plan->flags |= F_HAS_WARNED; + sb = entry->fts_statp; + for (c = *fmt++; c; c = *fmt++) { + if (c != '%') { + putc(c, fp); + continue; + } + c = *fmt++; + /* Style(9) deviation: case order same as gnu find info doc */ + switch (c) { + case '%': + putc(c, fp); + break; + case 'p': /* Path to file */ + fputs(entry->fts_path, fp); + break; + case 'f': /* filename w/o dirs */ + fputs(entry->fts_name, fp); + break; + case 'h': + /* + * path, relative to the starting point, of the file, or + * '.' if that's empty for some reason. + */ + path = entry->fts_path; + pend = strrchr(path, '/'); + if (pend == NULL) + putc('.', fp); + else { + char *t = malloc(pend - path + 1); + memcpy(t, path, pend - path); + t[pend - path] = '\0'; + fputs(t, fp); + free(t); + } + break; + case 'P': /* file with command line arg rm'd -- HOW? fts_parent? */ + errx(1, "%%%c is unimplemented", c); + case 'H': /* Command line arg -- HOW? */ + errx(1, "%%%c is unimplemented", c); + case 'g': /* gid human readable */ + fputs(group_from_gid(sb->st_gid, 0), fp); + break; + case 'G': /* gid numeric */ + fprintf(fp, "%d", sb->st_gid); + break; + case 'u': /* uid human readable */ + fputs(user_from_uid(sb->st_uid, 0), fp); + break; + case 'U': /* uid numeric */ + fprintf(fp, "%d", sb->st_uid); + break; + case 'm': /* mode in octal */ + fprintf(fp, "%o", sb->st_mode & 07777); + break; + case 'M': { /* Mode in ls-standard form */ + char mode[12]; + strmode(sb->st_mode, mode); + fputs(mode, fp); + break; + } + case 'k': /* kbytes used by file */ + fprintf(fp, "%ld", sb->st_blocks / 2); + break; + case 'b': /* blocks used by file */ + fprintf(fp, "%ld", sb->st_blocks); + break; + case 's': /* size in bytes of file */ + fprintf(fp, "%zu", sb->st_size); + break; + case 'S': /* sparseness of file */ + fprintf(fp, "%3.1f", + (float)sb->st_blocks * 512 / (float)sb->st_size); + break; + case 'd': /* Depth in tree */ + fprintf(fp, "%ld", entry->fts_level); + break; + case 'D': /* device number */ + fprintf(fp, "%lu", sb->st_dev); + break; + case 'F': /* Filesystem type */ + errx(1, "%%%c is unimplemented", c); + case 'l': /* object of symbolic link */ + fprintf(fp, "%s", entry->fts_accpath); + break; + case 'i': /* inode # */ + fprintf(fp, "%lu", sb->st_ino); + break; + case 'n': /* number of hard links */ + fprintf(fp, "%lu", sb->st_nlink); + break; + case 'y': /* -type of file, incl 'l' */ + errx(1, "%%%c is unimplemented", c); + case 'Y': /* -type of file, following 'l' types L loop ? error */ + errx(1, "%%%c is unimplemented", c); + case 'a': /* access time ctime */ + fp_ctime(fp, sb->st_atime); + break; + case 'A': /* access time with next char strftime format */ + fp_strftime(fp, sb->st_atime, *fmt++); + break; + case 'B': /* birth time with next char strftime format */ +#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME + if (sb->st_birthtime != 0) + fp_strftime(fp, sb->st_birthtime, *fmt); +#endif + fmt++; + break; /* blank on systems that don't support it */ + case 'c': /* status change time ctime */ + fp_ctime(fp, sb->st_ctime); + break; + case 'C': /* statuc change time with next char strftime format */ + fp_strftime(fp, sb->st_ctime, *fmt++); + break; + case 't': /* modification change time ctime */ + fp_ctime(fp, sb->st_mtime); + break; + case 'T': /* modification time with next char strftime format */ + fp_strftime(fp, sb->st_mtime, *fmt++); + break; + case 'Z': /* empty string for compat SELinux context string */ + break; + /* Modifier parsing here, but also need to modify above somehow */ + case '#': case '-': case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': case '.': + errx(1, "Format modifier %c not yet supported: '%s'", c, all); + /* Any FeeeBSD-specific modifications here -- none yet */ + default: + errx(1, "Unknown format %c '%s'", c, all); + } + } + fputs(tmp, fout); + if (flush) + fflush(fout); + free(__DECONST(char *, fmt)); + free(tmp); +}