This is my first try at getting support for terminating '+' in find(1)'s -exec statement to work, as required by POSIX. Code shamelessly guttenberged from NetBSD, with some minor modifications.
Index: extern.h =================================================================== RCS file: /cvs/src/usr.bin/find/extern.h,v retrieving revision 1.16 diff -u -r1.16 extern.h --- extern.h 21 Apr 2011 01:14:21 -0000 1.16 +++ extern.h 26 Jun 2011 19:07:30 -0000 @@ -37,10 +37,12 @@ PLAN *find_create(char ***); int find_execute(PLAN *, char **); PLAN *find_formplan(char **); +int find_traverse(PLAN *, int (*)(PLAN *, void *), void *); PLAN *not_squish(PLAN *); OPTION *option(char *); PLAN *or_squish(PLAN *); PLAN *paren_squish(PLAN *); +int plan_cleanup(PLAN *, void *); struct stat; void printlong(char *, char *, struct stat *); int queryuser(char **); Index: find.1 =================================================================== RCS file: /cvs/src/usr.bin/find/find.1,v retrieving revision 1.82 diff -u -r1.82 find.1 --- find.1 13 Feb 2011 12:35:02 -0000 1.82 +++ find.1 26 Jun 2011 19:07:30 -0000 @@ -189,28 +189,48 @@ .Op argument ... .No ; .Xc -True if the program named -.Ar utility -returns a zero value as its exit status. +.It Xo +.Ic -exec Ar utility +.Op argument ... +.No {} ++ +.Xc +Execute the specified +.Ar utility . Optional arguments may be passed to the utility. The expression must be terminated by a semicolon -.Pq Ql \&; . +.Pq Ql \&; +or a plus sign +.Pq Ql \&+ . +.Pp +If terminated by a semicolon, the +.Ar utility +is executed once per path. If the string .Qq {} appears anywhere in the utility name or the arguments it is replaced by the pathname of the current file. -.Ar utility -will be executed from the directory from which -.Nm -was executed. .Pp -Since +If terminated by a plus sign +.Pq Dq \&+ , +the pathnames for which the +primary is evaluated are aggregated into sets, and .Ar utility -is executed every time a match is made, -it is often more efficient to pipe the output of -.Nm -to +will be invoked once per set, similar to .Xr xargs 1 . +If any invocation exits with non-zero exit status, then +.Nm +will eventually do so as well, but this does not cause +.Nm +to exit early. +The string +.Dq {} +must appear, and must appear last. +Each set is limited to no more than 5,000 pathnames, +and is also limited such that the invocation of +.Ar utility +does not exceed +.Dv ARG_MAX . .It Xo .Ic -execdir Ar utility .Op argument ... Index: find.c =================================================================== RCS file: /cvs/src/usr.bin/find/find.c,v retrieving revision 1.15 diff -u -r1.15 find.c --- find.c 21 Apr 2011 01:14:21 -0000 1.15 +++ find.c 26 Jun 2011 19:07:30 -0000 @@ -148,7 +148,7 @@ char **paths) /* array of pathnames to traverse */ { sigset_t fullset, oset; - int rval; + int r, rval; PLAN *p; rval = 0; @@ -201,5 +201,45 @@ ; } (void)fts_close(tree); + + /* + * Cleanup any plans with leftover state. + * Keep the last non-zero return value. + */ + if ((r = find_traverse(plan, plan_cleanup, NULL)) != 0) + rval = r; return (rval); +} + +/* + * find_traverse -- + * traverse the plan tree and execute func() on all plans. This + * does not evaluate each plan's eval() function; it is intended + * for operations that must run on all plans, such as state + * cleanup. + * + * If any func() returns non-zero, then so will find_traverse(). + */ +int +find_traverse(PLAN *plan, int (*func)(PLAN *, void *), void *arg) +{ + PLAN *p; + int r, rval; + + rval = 0; + for (p = plan; p; p = p->next) { + if ((r = func(p, arg)) != 0) + rval = r; + if (p->type == N_EXPR || p->type == N_OR) { + if (p->p_data[0]) + if ((r = find_traverse(p->p_data[0], + func, arg)) != 0) + rval = r; + if (p->p_data[1]) + if ((r = find_traverse(p->p_data[1], + func, arg)) != 0) + rval = r; + } + } + return rval; } Index: find.h =================================================================== RCS file: /cvs/src/usr.bin/find/find.h,v retrieving revision 1.14 diff -u -r1.14 find.h --- find.h 15 Sep 2004 18:43:25 -0000 1.14 +++ find.h 26 Jun 2011 19:07:30 -0000 @@ -54,6 +54,7 @@ #define F_LESSTHAN 2 #define F_GREATER 3 #define F_NEEDOK 1 /* exec ok */ +#define F_PLUSSET 2 /* -exec ... {} + */ #define F_MTFLAG 1 /* fstype */ #define F_MTTYPE 2 #define F_ATLEAST 1 /* perm */ @@ -77,6 +78,13 @@ char **_e_argv; /* argv array */ char **_e_orig; /* original strings */ int *_e_len; /* allocated length */ + char **_ep_bxp; /* ptr to 1st addt'l arg */ + char *_ep_p; /* current buffer pointer */ + char *_ep_bbp; /* begin buffer pointer */ + char *_ep_ebp; /* end buffer pointer */ + int _ep_maxargs; /* max #args */ + int _ep_narg; /* # addt'l args */ + int _ep_rval; /* return value */ } ex; char *_a_data[2]; /* array of char pointers */ char *_c_data; /* char pointer */ @@ -104,6 +112,15 @@ #define e_argv p_un.ex._e_argv #define e_orig p_un.ex._e_orig #define e_len p_un.ex._e_len +#define ep_p p_un.ex._ep_p +#define ep_bbp p_un.ex._ep_bbp +#define ep_ebp p_un.ex._ep_ebp +#define ep_bxp p_un.ex._ep_bxp +#define ep_cnt p_un.ex._ep_cnt +#define ep_maxargs p_un.ex._ep_maxargs +#define ep_nline p_un.ex._ep_nline +#define ep_narg p_un.ex._ep_narg +#define ep_rval p_un.ex._ep_rval typedef struct _option { char *name; /* option name */ Index: function.c =================================================================== RCS file: /cvs/src/usr.bin/find/function.c,v retrieving revision 1.36 diff -u -r1.36 function.c --- function.c 1 Dec 2010 01:20:29 -0000 1.36 +++ function.c 26 Jun 2011 19:07:30 -0000 @@ -46,6 +46,7 @@ #include <fts.h> #include <grp.h> #include <libgen.h> +#include <limits.h> #include <pwd.h> #include <stdio.h> #include <stdlib.h> @@ -71,6 +72,7 @@ static PLAN *palloc(enum ntype, int (*)(PLAN *, FTSENT *)); static long find_parsenum(PLAN *plan, char *option, char *vp, char *endch); +static void run_f_exec(PLAN *plan); static PLAN *palloc(enum ntype t, int (*f)(PLAN *, FTSENT *)); int f_amin(PLAN *, FTSENT *); @@ -339,38 +341,108 @@ /* * [-exec | -ok] utility [arg ... ] ; functions -- + * [-exec | -ok] utility [arg ... ] {} + functions -- * - * True if the executed utility returns a zero value as exit status. - * The end of the primary expression is delimited by a semicolon. If - * "{}" occurs anywhere, it gets replaced by the current pathname. - * The current directory for the execution of utility is the same as - * the current directory when the find utility was started. + * If the end of the primary expression is delimited by a + * semicolon: true if the executed utility returns a zero value + * as exit status. If "{}" occurs anywhere, it gets replaced by + * the current pathname. + * + * If the end of the primary expression is delimited by a plus + * sign: always true. Pathnames for which the primary is + * evaluated shall be aggregated into sets. The utility will be + * executed once per set, with "{}" replaced by the entire set of + * pathnames (as if xargs). "{}" must appear last. * - * The primary -ok is different in that it requests affirmation of the - * user before executing the utility. + * The current directory for the execution of utility is the same + * as the current directory when the find utility was started. + * + * The primary -ok is different in that it requests affirmation + * of the user before executing the utility. */ int f_exec(PLAN *plan, FTSENT *entry) { - int cnt; + int cnt, l; pid_t pid; int status; - for (cnt = 0; plan->e_argv[cnt]; ++cnt) - if (plan->e_len[cnt]) - brace_subst(plan->e_orig[cnt], &plan->e_argv[cnt], - entry->fts_path, plan->e_len[cnt]); + if (plan->flags & F_PLUSSET) { + /* + * Confirm sufficient buffer space, then copy the path + * to the buffer. + */ + l = strlen(entry->fts_path); + if (plan->ep_p + l < plan->ep_ebp) { + plan->ep_bxp[plan->ep_narg++] = plan->ep_p; + strlcpy(plan->ep_p, entry->fts_path, l + 1); + plan->ep_p += l + 1; + + if (plan->ep_narg == plan->ep_maxargs) + run_f_exec(plan); + } else { + /* + * Without sufficient space to copy in the next + * argument, run the command to empty out the + * buffer before re-attepting the copy. + */ + run_f_exec(plan); + if (plan->ep_p + l < plan->ep_ebp) { + plan->ep_bxp[plan->ep_narg++] = plan->ep_p; + strlcpy(plan->ep_p, entry->fts_path, l + 1); + plan->ep_p += l + 1; + } else + errx(1, "insufficient space for argument"); + } + return (1); + } else { + for (cnt = 0; plan->e_argv[cnt]; ++cnt) + if (plan->e_len[cnt]) + brace_subst(plan->e_orig[cnt], + &plan->e_argv[cnt], + entry->fts_path, + plan->e_len[cnt]); + if (plan->flags & F_NEEDOK && !queryuser(plan->e_argv)) + return (0); - if (plan->flags == F_NEEDOK && !queryuser(plan->e_argv)) - return (0); + /* don't mix output of command with find output */ + fflush(stdout); + fflush(stderr); + + switch (pid = vfork()) { + case -1: + err(1, "fork"); + /* NOTREACHED */ + case 0: + if (fchdir(dotfd)) { + warn("chdir"); + _exit(1); + } + execvp(plan->e_argv[0], plan->e_argv); + warn("%s", plan->e_argv[0]); + _exit(1); + } + pid = waitpid(pid, &status, 0); + return (pid != -1 && WIFEXITED(status) && !WEXITSTATUS(status)); + } +} - /* don't mix output of command with find output */ - fflush(stdout); - fflush(stderr); +static void +run_f_exec(PLAN *plan) +{ + pid_t pid; + int rval, status; + + /* Ensure arg list is null terminated. */ + plan->ep_bxp[plan->ep_narg] = NULL; + + /* Don't mix output of command with find output. */ + fflush(stdout); + fflush(stderr); switch (pid = vfork()) { case -1: - err(1, "fork"); + err(1, "vfork"); /* NOTREACHED */ case 0: if (fchdir(dotfd)) { @@ -381,8 +453,26 @@ warn("%s", plan->e_argv[0]); _exit(1); } + + /* Clear out the argument list. */ + plan->ep_narg = 0; + plan->ep_bxp[plan->ep_narg] = NULL; + /* As well as the argument buffer. */ + plan->ep_p = plan->ep_bbp; + *plan->ep_p = '\0'; + pid = waitpid(pid, &status, 0); - return (pid != -1 && WIFEXITED(status) && !WEXITSTATUS(status)); + if (WIFEXITED(status)) + rval = WEXITSTATUS(status); + else + rval = -1; + + /* + * If we have a non-zero exit status, preserve it so find(1) can + * later exit with it. + */ + if (rval) + plan->ep_rval = rval; } /* @@ -391,12 +481,16 @@ * on the command line, one with (possibly duplicated) pointers to the * argv array, and one with integer values that are lengths of the * strings, but also flags meaning that the string has to be massaged. + * + * If -exec ... {} +, use only the first array, but make it large + * enough to hold 5000 args (cf. src/usr.bin/xargs/xargs.c for a + * discussion), and then allocate ARG_MAX - 4K of space for args. */ PLAN * c_exec(char *unused, char ***argvp, int isok) { PLAN *new; /* node returned */ - int cnt; + int cnt, brace, lastbrace; char **argv, **ap, *p; /* make sure the current directory is readable */ @@ -407,36 +501,93 @@ new = palloc(N_EXEC, f_exec); if (isok) - new->flags = F_NEEDOK; + new->flags |= F_NEEDOK; - for (ap = argv = *argvp;; ++ap) { + /* + * Terminate if we encounter an arg exacty equal to ";", or an + * arg exacty equal to "+" following an arg exacty equal to + * "{}". + */ + for (ap = argv = *argvp, brace = 0;; ++ap) { if (!*ap) - errx(1, - "%s: no terminating \";\"", isok ? "-ok" : "-exec"); - if (**ap == ';') + errx(1, "%s: no terminating \";\" or \"+\"", + isok ? "-ok" : "-exec"); + lastbrace = brace; + brace = 0; + if (strcmp(*ap, "{}") == 0) + brace = 1; + if (strcmp(*ap, ";") == 0) break; + if (strcmp(*ap, "+") == 0 && lastbrace) { + new->flags |= F_PLUSSET; + break; + } } - cnt = ap - *argvp + 1; - new->e_argv = (char **)emalloc((u_int)cnt * sizeof(char *)); - new->e_orig = (char **)emalloc((u_int)cnt * sizeof(char *)); - new->e_len = (int *)emalloc((u_int)cnt * sizeof(int)); - for (argv = *argvp, cnt = 0; argv < ap; ++argv, ++cnt) { - new->e_orig[cnt] = *argv; - for (p = *argv; *p; ++p) - if (p[0] == '{' && p[1] == '}') { - new->e_argv[cnt] = emalloc((u_int)MAXPATHLEN); - new->e_len[cnt] = MAXPATHLEN; - break; + /* + * POSIX says -ok ... {} + "need not be supported," and it does + * not make much sense anyway. + */ + if (new->flags & F_NEEDOK && new->flags & F_PLUSSET) + errx(1, "-ok: terminating \"+\" not permitted."); + + if (new->flags & F_PLUSSET) { + u_int c, bufsize; + + cnt = ap - *argvp - 1; /* units are words */ + new->ep_maxargs = 5000; + new->e_argv = (char **)emalloc((u_int)(cnt + new->ep_maxargs) + * sizeof(char **)); + + /* We start stuffing arguments after the user's last one. */ + new->ep_bxp = &new->e_argv[cnt]; + new->ep_narg = 0; + + /* + * Count up the space of the user's arguments, and + * subtract that from what we allocate. + */ + for (argv = *argvp, c = 0, cnt = 0; + argv < ap; + ++argv, ++cnt) { + c += strlen(*argv) + 1; + new->e_argv[cnt] = *argv; + } + bufsize = ARG_MAX - 4 * 1024 - c; + + + /* + * Allocate, and then initialize current, base, and + * end pointers. + */ + new->ep_p = new->ep_bbp = malloc(bufsize + 1); + new->ep_ebp = new->ep_bbp + bufsize - 1; + new->ep_rval = 0; + } else { /* !F_PLUSSET */ + cnt = ap - *argvp + 1; + new->e_argv = (char **)emalloc((u_int)cnt * sizeof(char *)); + new->e_orig = (char **)emalloc((u_int)cnt * sizeof(char *)); + new->e_len = (int *)emalloc((u_int)cnt * sizeof(int)); + + for (argv = *argvp, cnt = 0; argv < ap; ++argv, ++cnt) { + new->e_orig[cnt] = *argv; + for (p = *argv; *p; ++p) + if (p[0] == '{' && p[1] == '}') { + new->e_argv[cnt] = + emalloc((u_int)MAXPATHLEN); + new->e_len[cnt] = MAXPATHLEN; + break; + } + if (!*p) { + new->e_argv[cnt] = *argv; + new->e_len[cnt] = 0; } - if (!*p) { - new->e_argv[cnt] = *argv; - new->e_len[cnt] = 0; } - } - new->e_argv[cnt] = new->e_orig[cnt] = NULL; + new->e_orig[cnt] = NULL; + } + new->e_argv[cnt] = NULL; *argvp = argv + 1; return (new); } @@ -1440,6 +1591,27 @@ { return (palloc(N_OR, f_or)); } + + +/* + * plan_cleanup -- + * Check and see if the specified plan has any residual state, + * and if so, clean it up as appropriate. + * + * At the moment, only N_EXEC has state. Two kinds: 1) + * lists of files to feed to subprocesses 2) State on exit + * statusses of past subprocesses. + */ +/* ARGSUSED1 */ +int +plan_cleanup(PLAN *plan, void *arg) +{ + if (plan->type==N_EXEC && plan->ep_narg) + run_f_exec(plan); + + return plan->ep_rval; /* Passed save exit-status up chain */ +} + static PLAN * palloc(enum ntype t, int (*f)(PLAN *, FTSENT *))