On Wed, Sep 07, 2016 at 12:00:17AM +0200, Martin Natano wrote:
> On Sat, Sep 03, 2016 at 04:55:59PM +0100, Theo Buehler wrote:
> > The -w flag to jot() allows the user to specify a format string that
> > will be passed to printf after it was verified that it contains only one
> > conversion specifier. There are some subtle differences what jot's
> > getformat() accepts and what printf will do. Thus, I took vfprintf.c and
> > carved out whatever is unneeded for jot itself. The result is a slightly
> > less ugly function in a separate file.
>
> Please see some comments below.
>
> I really tried to understand all the corner cases in the getformat()
> function, but couldn't wrap my head around it. I believe it would be
> best to just axe the -w flag. Yes, there are probably scripts out there
> using it, but I think carrying that burden around is not worth it. Every
> possible implementation either is an adapted reimplementation of printf,
> or whitewashing the string before passing it to printf(). I would like
> to remind of the patch(1) ed script issue that resulted in shell
> injection, just because the whitewash code and the actual parser where
> not in sync. It's a losing game.
Thanks for taking the time of looking into it. I would very much like to
ditch -w if there are no objections. It's just plain horrific and a
terrible idea to begin with.
I can understand the convenience of something like "jot -w '%02d' 10",
but everything more complicated seems to be better left to awk, perl or
whatever else your preferred scripting language is.
There is a similar printf reimplementation in usr.bin/printf/prinf.c
which is just as unpleasant, but mandated by POSIX.
Below is a diff addressing your comments.
> 'boring', 'infinity' and 'randomize' are unused in getformat.c.
yes.
> How about something like this instead?
>
> #define is_digit(c) ((c) >= '0' && (c) <= '9')
>
> This would also allow to remove the to_digit() macro.
I agree.
> > + sz = sizeof(fmt) - strlen(fmt) - 1;
>
> The sizeof() doesn't do what you expect it to do here.
that was really stupid, thanks.
> Previously the error message for 'jot -w %d%d 10' was "jot: too many
> conversions", now it is "jot: illegal or unsupported format '%d%d'". I
> think the previous error was more helpful.
restored previous error message
> > + if (ch == '$')
>
> What is this check supposed to do? There is no '$' case, so the default
> will be invoked after 'got reswitch;'.
incomplete purging of cases unneeded for jot
Index: Makefile
===================================================================
RCS file: /var/cvs/src/usr.bin/jot/Makefile,v
retrieving revision 1.5
diff -u -p -r1.5 Makefile
--- Makefile 10 Jan 2016 01:15:52 -0000 1.5
+++ Makefile 6 Sep 2016 22:35:48 -0000
@@ -1,6 +1,7 @@
# $OpenBSD: Makefile,v 1.5 2016/01/10 01:15:52 tb Exp $
PROG= jot
+SRCS= getformat.c jot.c
CFLAGS+= -Wall
LDADD+= -lm
DPADD+= ${LIBM}
Index: getformat.c
===================================================================
RCS file: getformat.c
diff -N getformat.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ getformat.c 6 Sep 2016 22:35:58 -0000
@@ -0,0 +1,180 @@
+/* $OpenBSD$ */
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chris Torek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Based on src/lib/libc/stdio/vfprintf.c r1.77 */
+
+#include <err.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+
+extern int prec;
+extern bool chardata;
+extern bool intdata;
+extern bool longdata;
+extern bool nosign;
+
+int getformat(char *);
+
+/*
+ * Macros for converting digits to letters and vice versa
+ */
+#define is_digit(c) ((c) >= '0' && (c) <= 9)
+
+int
+getformat(char *fmt)
+{
+ int ch; /* character from fmt */
+ wchar_t wc;
+ mbstate_t ps;
+ size_t sz;
+ bool firsttime = true;
+
+ sz = BUFSIZ - strlen(fmt) - 1;
+
+ memset(&ps, 0, sizeof(ps));
+ /*
+ * Scan the format for conversions (`%' character).
+ */
+ for (;;) {
+ size_t len;
+
+ while ((len = mbrtowc(&wc, fmt, MB_CUR_MAX, &ps)) != 0) {
+ if (len == (size_t)-1 || len == (size_t)-2) {
+ err(1, "illegal or incomplete byte sequence "
+ "in format string");
+ }
+ if (wc == '%') {
+ if (*(fmt + 1) == '\0') {
+ /* no trailing '%' allowed */
+ if (sz <= 0)
+ errx(1, "-w word too long");
+ strlcat(fmt, "%", sizeof fmt);
+ return 0;
+ } else if (*(fmt + 1) != '%')
+ break;
+ fmt += 2; /* skip over "%%" */
+ } else
+ fmt += len;
+ }
+
+ if (!firsttime) {
+ if (len == 0)
+ return 0;
+ errx(1, "too many conversions");
+ }
+
+ if (len == 0) {
+ if (chardata) {
+ if (strlcpy(fmt, "%c", sz) >= sz)
+ errx(1, "-w word too long");
+ intdata = true;
+ } else {
+ int n;
+ n = snprintf(fmt, sz, "%%.%df", prec);
+ if (n == -1 || n >= (int)sz)
+ errx(1, "-w word too long");
+ }
+ return 0;
+ }
+
+ fmt++; /* skip over % */
+rflag: ch = *fmt++;
+reswitch: switch (ch) {
+ case ' ':
+ case '#':
+ case '-':
+ case '+':
+ goto rflag;
+ case '.':
+ if ((ch = *fmt++) == '*')
+ return 1;
+ while (is_digit(ch))
+ ch = *fmt++;
+ goto reswitch;
+ case '0':
+ /*
+ * ``Note that 0 is taken as a flag, not as the
+ * beginning of a field width.''
+ * -- ANSI X3J11
+ */
+ goto rflag;
+ case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ do {
+ ch = *fmt++;
+ } while (is_digit(ch));
+ goto reswitch;
+ case 'l':
+ if (*fmt == 'l')
+ return 1;
+ longdata = true;
+ goto rflag;
+ case 'a':
+ case 'A':
+ case 'e':
+ case 'E':
+ case 'f':
+ case 'F':
+ case 'g':
+ case 'G':
+ break;
+ case 'c':
+ chardata = true;
+ break;
+ case 'D':
+ longdata = true;
+ /* FALLTHROUGH */
+ case 'd':
+ case 'i':
+ intdata = true;
+ break;
+ case 'O':
+ case 'U':
+ longdata = true;
+ /* FALLTHROUGH */
+ case 'o':
+ case 'u':
+ case 'X':
+ case 'x':
+ intdata = nosign = true;
+ break;
+ default:
+ return 1;
+ }
+
+ firsttime = false;
+ }
+}
Index: jot.c
===================================================================
RCS file: /var/cvs/src/usr.bin/jot/jot.c,v
retrieving revision 1.36
diff -u -p -r1.36 jot.c
--- jot.c 2 Sep 2016 14:23:09 -0000 1.36
+++ jot.c 6 Sep 2016 22:35:48 -0000
@@ -54,6 +54,12 @@
#define is_default(s) (strcmp((s), "-") == 0)
+int prec;
+bool chardata;
+bool intdata;
+bool longdata;
+bool nosign;
+
static long reps = 100;
static double begin = 1;
static double ender = 100;
@@ -61,17 +67,13 @@ static double step = 1;
static char format[BUFSIZ];
static char sepstring[BUFSIZ] = "\n";
-static int prec = -1;
+
static bool boring;
-static bool chardata;
static bool finalnl = true;
static bool infinity;
-static bool intdata;
-static bool longdata;
-static bool nosign;
static bool randomize;
-static void getformat(void);
+extern int getformat(char *);
static int getprec(char *);
static int putdata(double, bool);
static void __dead usage(void);
@@ -90,6 +92,9 @@ main(int argc, char *argv[])
if (pledge("stdio", NULL) == -1)
err(1, "pledge");
+ chardata = intdata = longdata = nosign = false;
+ prec = -1;
+
while ((ch = getopt(argc, argv, "b:cnp:rs:w:")) != -1) {
switch (ch) {
case 'b':
@@ -176,7 +181,8 @@ main(int argc, char *argv[])
argv[4]);
}
- getformat();
+ if (!boring && getformat(format))
+ errx(1, "illegal or unsupported format '%s'", format);
if (!randomize) {
/*
@@ -350,110 +356,4 @@ getprec(char *s)
if ((s = strchr(s, '.')) == NULL)
return 0;
return strspn(s + 1, "0123456789");
-}
-
-static void
-getformat(void)
-{
- char *p, *p2;
- int dot, hash, space, sign, numbers = 0;
- size_t sz;
-
- if (boring) /* no need to bother */
- return;
- for (p = format; *p != '\0'; p++) /* look for '%' */
- if (*p == '%') {
- if (*(p+1) != '%')
- break;
- p++; /* leave %% alone */
- }
- sz = sizeof(format) - strlen(format) - 1;
- if (*p == '\0' && !chardata) {
- int n;
-
- n = snprintf(p, sz, "%%.%df", prec);
- if (n == -1 || n >= (int)sz)
- errx(1, "-w word too long");
- } else if (*p == '\0' && chardata) {
- if (strlcpy(p, "%c", sz) >= sz)
- errx(1, "-w word too long");
- intdata = true;
- } else if (*(p+1) == '\0') {
- if (sz <= 0)
- errx(1, "-w word too long");
- /* cannot end in single '%' */
- strlcat(format, "%", sizeof format);
- } else {
- /*
- * Allow conversion format specifiers of the form
- * %[#][ ][{+,-}][0-9]*[.[0-9]*]? where ? must be one of
- * [l]{d,i,o,u,x} or {f,e,g,E,G,d,o,x,D,O,U,X,c,u}
- */
- p2 = p++;
- dot = hash = space = sign = numbers = 0;
- while (!isalpha((unsigned char)*p)) {
- if (isdigit((unsigned char)*p)) {
- numbers++;
- p++;
- } else if ((*p == '#' && !(numbers|dot|sign|space|
- hash++)) ||
- (*p == ' ' && !(numbers|dot|space++)) ||
- ((*p == '+' || *p == '-') && !(numbers|dot|sign++))
- || (*p == '.' && !(dot++)))
- p++;
- else
- goto fmt_broken;
- }
- if (*p == 'l') {
- longdata = true;
- if (*++p == 'l') {
- if (p[1] != '\0')
- p++;
- goto fmt_broken;
- }
- }
- switch (*p) {
- case 'o': case 'u': case 'x': case 'X':
- intdata = nosign = true;
- break;
- case 'd': case 'i':
- intdata = true;
- break;
- case 'D':
- if (!longdata) {
- intdata = true;
- break;
- }
- case 'O': case 'U':
- if (!longdata) {
- intdata = nosign = true;
- break;
- }
- case 'c':
- if (!(intdata | longdata)) {
- chardata = true;
- break;
- }
- case 'h': case 'n': case 'p': case 'q': case 's': case 'L':
- case '$': case '*':
- goto fmt_broken;
- case 'f': case 'e': case 'g': case 'E': case 'G':
- if (!longdata)
- break;
- /* FALLTHROUGH */
- default:
-fmt_broken:
- *++p = '\0';
- errx(1, "illegal or unsupported format '%s'", p2);
- }
- while (*++p != '\0')
- if (*p == '%' && *(p+1) != '\0' && *(p+1) != '%')
- errx(1, "too many conversions");
- else if (*p == '%' && *(p+1) == '%')
- p++;
- else if (*p == '%' && *(p+1) == '\0') {
- strlcat(format, "%", sizeof format);
- break;
- }
- }
}