On Sat, Sep 03, 2016 at 04:55:59PM +0100, Theo Buehler wrote:
> The -w flag to jot() allows the user to specify a format string that
> will be passed to printf after it was verified that it contains only one
> conversion specifier. There are some subtle differences what jot's
> getformat() accepts and what printf will do. Thus, I took vfprintf.c and
> carved out whatever is unneeded for jot itself. The result is a slightly
> less ugly function in a separate file.
Please see some comments below.
I really tried to understand all the corner cases in the getformat()
function, but couldn't wrap my head around it. I believe it would be
best to just axe the -w flag. Yes, there are probably scripts out there
using it, but I think carrying that burden around is not worth it. Every
possible implementation either is an adapted reimplementation of printf,
or whitewashing the string before passing it to printf(). I would like
to remind of the patch(1) ed script issue that resulted in shell
injection, just because the whitewash code and the actual parser where
not in sync. It's a losing game.
natano
>
> Index: Makefile
> ===================================================================
> RCS file: /var/cvs/src/usr.bin/jot/Makefile,v
> retrieving revision 1.5
> diff -u -p -r1.5 Makefile
> --- Makefile 10 Jan 2016 01:15:52 -0000 1.5
> +++ Makefile 3 Sep 2016 15:48:06 -0000
> @@ -1,6 +1,7 @@
> # $OpenBSD: Makefile,v 1.5 2016/01/10 01:15:52 tb Exp $
>
> PROG= jot
> +SRCS= getformat.c jot.c
> CFLAGS+= -Wall
> LDADD+= -lm
> DPADD+= ${LIBM}
> Index: getformat.c
> ===================================================================
> RCS file: getformat.c
> diff -N getformat.c
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ getformat.c 3 Sep 2016 15:55:21 -0000
> @@ -0,0 +1,188 @@
> +/* $OpenBSD$ */
> +/*-
> + * Copyright (c) 1990 The Regents of the University of California.
> + * All rights reserved.
> + *
> + * This code is derived from software contributed to Berkeley by
> + * Chris Torek.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in the
> + * documentation and/or other materials provided with the distribution.
> + * 3. Neither the name of the University nor the names of its contributors
> + * may be used to endorse or promote products derived from this software
> + * without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> +/* Based on src/lib/libc/stdio/vfprintf.c r1.77 */
> +
> +#include <err.h>
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <wchar.h>
> +
> +extern int prec;
> +extern bool boring;
> +extern bool chardata;
> +extern bool infinity;
> +extern bool intdata;
> +extern bool longdata;
> +extern bool nosign;
> +extern bool randomize;
'boring', 'infinity' and 'randomize' are unused in getformat.c.
> +
> +int getformat(char *);
> +
> +/*
> + * Macros for converting digits to letters and vice versa
> + */
> +#define to_digit(c) ((c) - '0')
> +#define is_digit(c) ((unsigned)to_digit(c) <= 9)
I would prefer this to be less magic. How about something like this
instead?
#define is_digit(c) ((c) >= '0' && (c) <= '9')
This would also allow to remove the to_digit() macro.
> +
> +int
> +getformat(char *fmt)
> +{
> + int ch; /* character from fmt */
> + wchar_t wc;
> + mbstate_t ps;
> + size_t sz;
> + bool firsttime = true;
> +
> + sz = sizeof(fmt) - strlen(fmt) - 1;
The sizeof() doesn't do what you expect it to do here. 'fmt' is just a
pointer here, so the value returned is far to low. What we want is the
size of the original array instead.
$ jot -w 'xxx' 10
jot: -w word too long
> +
> + memset(&ps, 0, sizeof(ps));
> + /*
> + * Scan the format for conversions (`%' character).
> + */
> + for (;;) {
> + size_t len;
> +
> + while ((len = mbrtowc(&wc, fmt, MB_CUR_MAX, &ps)) != 0) {
> + if (len == (size_t)-1 || len == (size_t)-2) {
> + err(1, "illegal or incomplete byte sequence "
> + "in format string");
> + }
> + if (wc == '%') {
> + if (*(fmt + 1) == '\0') {
> + /* no trailing '%' allowed */
> + if (sz <= 0)
> + errx(1, "-w word too long");
> + strlcat(fmt, "%", sizeof fmt);
> + return 0;
> + } else if (*(fmt + 1) != '%')
> + break;
> + fmt += 2; /* skip over "%%" */
> + } else
> + fmt += len;
> + }
> +
> + if (!firsttime) {
> + if (len == 0)
> + return 0;
> + return 1;
Previously the error message for 'jot -w %d%d 10' was "jot: too many
conversions", now it is "jot: illegal or unsupported format '%d%d'". I
think the previous error was more helpful.
> + }
> +
> + if (len == 0) {
> + if (chardata) {
> + if (strlcpy(fmt, "%c", sz) >= sz)
> + errx(1, "-w word too long");
> + intdata = true;
> + } else {
> + int n;
> + n = snprintf(fmt, sz, "%%.%df", prec);
> + if (n == -1 || n >= (int)sz)
> + errx(1, "-w word too long");
> + }
> + return 0;
> + }
> +
> + fmt++; /* skip over % */
> +rflag: ch = *fmt++;
> +reswitch: switch (ch) {
> + case ' ':
> + case '#':
> + case '-':
> + case '+':
> + goto rflag;
> + case '.':
> + if ((ch = *fmt++) == '*')
> + return 1;
> + while (is_digit(ch))
> + ch = *fmt++;
> + if (ch == '$')
What is this check supposed to do? There is no '$' case, so the default
will be invoked after 'got reswitch;'.
> + return 1;
> + goto reswitch;
> + case '0':
> + /*
> + * ``Note that 0 is taken as a flag, not as the
> + * beginning of a field width.''
> + * -- ANSI X3J11
> + */
> + goto rflag;
> + case '1': case '2': case '3': case '4':
> + case '5': case '6': case '7': case '8': case '9':
> + do {
> + ch = *fmt++;
> + } while (is_digit(ch));
> + if (ch == '$')
ditto.
> + return 1;
> + goto reswitch;
> + case 'l':
> + if (*fmt == 'l')
> + return 1;
> + longdata = true;
> + goto rflag;
> + case 'a':
> + case 'A':
> + case 'e':
> + case 'E':
> + case 'f':
> + case 'F':
> + case 'g':
> + case 'G':
> + break;
> + case 'c':
> + chardata = true;
> + break;
> + case 'D':
> + longdata = true;
> + /* FALLTHROUGH */
> + case 'd':
> + case 'i':
> + intdata = true;
> + break;
> + case 'O':
> + case 'U':
> + longdata = true;
> + /* FALLTHROUGH */
> + case 'o':
> + case 'u':
> + case 'X':
> + case 'x':
> + intdata = nosign = true;
> + break;
> + default:
> + return 1;
> + }
> +
> + firsttime = false;
> + }
> +}
> Index: jot.c
> ===================================================================
> RCS file: /var/cvs/src/usr.bin/jot/jot.c,v
> retrieving revision 1.36
> diff -u -p -r1.36 jot.c
> --- jot.c 2 Sep 2016 14:23:09 -0000 1.36
> +++ jot.c 3 Sep 2016 15:48:06 -0000
> @@ -61,17 +61,18 @@ static double step = 1;
>
> static char format[BUFSIZ];
> static char sepstring[BUFSIZ] = "\n";
> -static int prec = -1;
> -static bool boring;
> -static bool chardata;
> -static bool finalnl = true;
> -static bool infinity;
> -static bool intdata;
> -static bool longdata;
> -static bool nosign;
> -static bool randomize;
'boring', 'infinity' and 'randomize' are not used in getformat.c, so
they can stay static.
>
> -static void getformat(void);
> +int prec;
> +bool boring;
> +bool chardata;
> +bool finalnl = true;
> +bool infinity;
> +bool intdata;
> +bool longdata;
> +bool nosign;
> +bool randomize;
> +
> +extern int getformat(char *);
> static int getprec(char *);
> static int putdata(double, bool);
> static void __dead usage(void);
> @@ -90,6 +91,10 @@ main(int argc, char *argv[])
> if (pledge("stdio", NULL) == -1)
> err(1, "pledge");
>
> + boring = chardata = infinity = intdata = longdata = nosign = randomize
> + = false;
> + prec = -1;
> +
> while ((ch = getopt(argc, argv, "b:cnp:rs:w:")) != -1) {
> switch (ch) {
> case 'b':
> @@ -176,7 +181,8 @@ main(int argc, char *argv[])
> argv[4]);
> }
>
> - getformat();
> + if (!boring && getformat(format))
> + errx(1, "illegal or unsupported format '%s'", format);
>
> if (!randomize) {
> /*
> @@ -350,110 +356,4 @@ getprec(char *s)
> if ((s = strchr(s, '.')) == NULL)
> return 0;
> return strspn(s + 1, "0123456789");
> -}
> -
> -static void
> -getformat(void)
> -{
> - char *p, *p2;
> - int dot, hash, space, sign, numbers = 0;
> - size_t sz;
> -
> - if (boring) /* no need to bother */
> - return;
> - for (p = format; *p != '\0'; p++) /* look for '%' */
> - if (*p == '%') {
> - if (*(p+1) != '%')
> - break;
> - p++; /* leave %% alone */
> - }
> - sz = sizeof(format) - strlen(format) - 1;
> - if (*p == '\0' && !chardata) {
> - int n;
> -
> - n = snprintf(p, sz, "%%.%df", prec);
> - if (n == -1 || n >= (int)sz)
> - errx(1, "-w word too long");
> - } else if (*p == '\0' && chardata) {
> - if (strlcpy(p, "%c", sz) >= sz)
> - errx(1, "-w word too long");
> - intdata = true;
> - } else if (*(p+1) == '\0') {
> - if (sz <= 0)
> - errx(1, "-w word too long");
> - /* cannot end in single '%' */
> - strlcat(format, "%", sizeof format);
> - } else {
> - /*
> - * Allow conversion format specifiers of the form
> - * %[#][ ][{+,-}][0-9]*[.[0-9]*]? where ? must be one of
> - * [l]{d,i,o,u,x} or {f,e,g,E,G,d,o,x,D,O,U,X,c,u}
> - */
> - p2 = p++;
> - dot = hash = space = sign = numbers = 0;
> - while (!isalpha((unsigned char)*p)) {
> - if (isdigit((unsigned char)*p)) {
> - numbers++;
> - p++;
> - } else if ((*p == '#' && !(numbers|dot|sign|space|
> - hash++)) ||
> - (*p == ' ' && !(numbers|dot|space++)) ||
> - ((*p == '+' || *p == '-') && !(numbers|dot|sign++))
> - || (*p == '.' && !(dot++)))
> - p++;
> - else
> - goto fmt_broken;
> - }
> - if (*p == 'l') {
> - longdata = true;
> - if (*++p == 'l') {
> - if (p[1] != '\0')
> - p++;
> - goto fmt_broken;
> - }
> - }
> - switch (*p) {
> - case 'o': case 'u': case 'x': case 'X':
> - intdata = nosign = true;
> - break;
> - case 'd': case 'i':
> - intdata = true;
> - break;
> - case 'D':
> - if (!longdata) {
> - intdata = true;
> - break;
> - }
> - case 'O': case 'U':
> - if (!longdata) {
> - intdata = nosign = true;
> - break;
> - }
> - case 'c':
> - if (!(intdata | longdata)) {
> - chardata = true;
> - break;
> - }
> - case 'h': case 'n': case 'p': case 'q': case 's': case 'L':
> - case '$': case '*':
> - goto fmt_broken;
> - case 'f': case 'e': case 'g': case 'E': case 'G':
> - if (!longdata)
> - break;
> - /* FALLTHROUGH */
> - default:
> -fmt_broken:
> - *++p = '\0';
> - errx(1, "illegal or unsupported format '%s'", p2);
> - }
> - while (*++p != '\0')
> - if (*p == '%' && *(p+1) != '\0' && *(p+1) != '%')
> - errx(1, "too many conversions");
> - else if (*p == '%' && *(p+1) == '%')
> - p++;
> - else if (*p == '%' && *(p+1) == '\0') {
> - strlcat(format, "%", sizeof format);
> - break;
> - }
> - }
> }
>