I fixed a few bugs and tried to sbaseify the new expr. I've attached a
patch. Please let me know if I need to make any more changes.

Make sure to look at the differences in the Makefile before deciding
to apply. I'm not convinced I did it the best way, but it is working.
Besides the changes I made to add expr I moved .POSIX to the top as
specified by POSIX and cleared the suffixes before appending ours.

I think writing a script or makefile for sbase-box may be worthwhile
as the recipe is getting ugly and it isn't built the same as the rest,
just one big call to the compiler. I've started to put some thought
into that but haven't come up with a great idea yet.

-emg
From babfcb8b019893124e55f2f2898606fb8ede67e3 Mon Sep 17 00:00:00 2001
From: Evan Gates <evan.ga...@gmail.com>
Date: Fri, 7 Nov 2014 17:59:55 -0800
Subject: [PATCH] replace expr.c with expr.y

---
 Makefile |  43 +++---
 expr.c   | 517 ---------------------------------------------------------------
 expr.y   | 167 +++++++++++++++++++++
 3 files changed, 192 insertions(+), 535 deletions(-)
 delete mode 100644 expr.c
 create mode 100644 expr.y

diff --git a/Makefile b/Makefile
index dd05f52..5122cb4 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,8 @@
-include config.mk
-
 .POSIX:
-.SUFFIXES: .c .o
+.SUFFIXES:
+.SUFFIXES: .o .c .y
+
+include config.mk
 
 HDR = crypt.h fs.h text.h md5.h sha1.h sha256.h sha512.h util.h arg.h
 LIB = \
@@ -29,7 +30,7 @@ LIB = \
        util/strlcat.o   \
        util/strlcpy.o
 
-SRC = \
+CSRC = \
        basename.c \
        cal.c      \
        cat.c      \
@@ -51,7 +52,6 @@ SRC = \
        echo.c     \
        env.c      \
        expand.c   \
-       expr.c     \
        false.c    \
        fold.c     \
        grep.c     \
@@ -107,16 +107,16 @@ SRC = \
        xargs.c    \
        yes.c
 
-OBJ = $(SRC:.c=.o) $(LIB)
-BIN = $(SRC:.c=)
-MAN = $(SRC:.c=.1)
+YSRC = expr.y
 
-all: binlib
+SRC = $(CSRC)     $(YSRC)
+BIN = $(CSRC:.c=) $(YSRC:.y=)
+OBJ = $(BIN:=.o) $(LIB)
+MAN = $(BIN:=.1)
 
-binlib: util.a
-       $(MAKE) bin
+all: $(BIN)
 
-bin: $(BIN)
+$(BIN): util.a
 
 $(OBJ): util.h config.mk
 cat.o fold.o grep.o nl.o sort.o tail.o uniq.o: text.h
@@ -130,6 +130,12 @@ cp.o mv.o rm.o: fs.h
        @echo CC $<
        @$(CC) -c -o $@ $< $(CFLAGS)
 
+.y.o:
+       @echo YACC $<
+       @$(YACC) $(YFLAGS) $<
+       @$(CC) -c -o $@ y.tab.c $(CFLAGS)
+       @$(RM) y.tab.c
+
 util.a: $(LIB)
        @echo AR $@
        @$(AR) -r -c $@ $?
@@ -163,20 +169,21 @@ sbase-box: $(SRC) util.a
        @echo creating box binary
        @mkdir -p build
        @cp $(HDR) build
-       @for f in $(SRC); do sed "s/^main(/`basename $$f .c`_&/" < $$f > 
build/$$f; done
+       @for f in $(SRC); do sed "s/^main(/$${f%.?}_&/" $$f > build/$$f; done
        @echo '#include <libgen.h>'  > build/$@.c
        @echo '#include <stdio.h>'  >> build/$@.c
        @echo '#include <stdlib.h>' >> build/$@.c
        @echo '#include <string.h>' >> build/$@.c
        @echo '#include "util.h"'   >> build/$@.c
-       @for f in $(SRC); do echo "int `basename $$f .c`_main(int, char **);" 
>> build/$@.c; done
+       @for f in $(SRC); do echo "int $${f%.?}_main(int, char **);"; done >> 
build/$@.c
        @echo 'int main(int argc, char *argv[]) { char *s = basename(argv[0]); 
if(!strcmp(s,"sbase-box")) { argc--; argv++; s = basename(argv[0]); } if(0) ;' 
>> build/$@.c
-       @for f in $(SRC); do echo "else if(!strcmp(s, \"`basename $$f .c`\")) 
return `basename $$f .c`_main(argc, argv);" >> build/$@.c; done
+       @for f in $(SRC); do echo "else if(!strcmp(s, \"$${f%.?}\")) return 
$${f%.?}_main(argc, argv);"; done >> build/$@.c
        @echo 'else {' >> build/$@.c
-       @for f in $(SRC); do echo "printf(\"`basename $$f .c`\"); putchar(' 
');" >> build/$@.c; done
+       @for f in $(SRC); do echo "printf(\"$${f%.?}\"); putchar(' ');"; done | 
sort >> build/$@.c
        @echo "putchar(0xa); }; return 0; }" >> build/$@.c
-       @echo LD $@
-       @$(LD) -o $@ build/*.c util.a $(CFLAGS) $(LDFLAGS)
+       @for f in $(YSRC); do yacc build/$$f && mv y.tab.c build/$${f%.?}.c; 
done
+       @echo CC $@
+       @$(CC) -o $@ build/*.c util.a $(CFLAGS) $(LDFLAGS)
        @rm -r build
 
 clean:
diff --git a/expr.c b/expr.c
deleted file mode 100644
index 9457d39..0000000
--- a/expr.c
+++ /dev/null
@@ -1,517 +0,0 @@
-/*     $OpenBSD: src/bin/expr/expr.c,v 1.19 2013/11/21 15:54:45 deraadt Exp $  
*/
-/*     $NetBSD: expr.c,v 1.3.6.1 1996/06/04 20:41:47 cgd Exp $ */
-
-/*
- * Written by J.T. Conklin <j...@netbsd.org>.
- * Public domain.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include <locale.h>
-#include <ctype.h>
-#include <regex.h>
-#include <err.h>
-
-static struct val      *make_int(int);
-static struct val      *make_str(char *);
-static void            free_value(struct val *);
-static int             is_integer(struct val *, int *);
-static int             to_integer(struct val *);
-static void            to_string(struct val *);
-static int             is_zero_or_null(struct val *);
-static void            nexttoken(int);
-static void            error(void);
-static struct val      *eval6(void);
-static struct val      *eval5(void);
-static struct val      *eval4(void);
-static struct val      *eval3(void);
-static struct val      *eval2(void);
-static struct val      *eval1(void);
-static struct val      *eval0(void);
-
-enum token {
-       OR, AND, EQ, LT, GT, ADD, SUB, MUL, DIV, MOD, MATCH, RP, LP,
-       NE, LE, GE, OPERAND, EOI
-};
-
-struct val {
-       enum {
-               integer,
-               string
-       } type;
-
-       union {
-               char           *s;
-               int             i;
-       } u;
-};
-
-static enum token      token;
-static struct val     *tokval;
-static char          **av;
-
-static struct val *
-make_int(int i)
-{
-       struct val     *vp;
-
-       vp = (struct val *) malloc(sizeof(*vp));
-       if (vp == NULL) {
-               err(3, NULL);
-       }
-       vp->type = integer;
-       vp->u.i = i;
-       return vp;
-}
-
-static struct val *
-make_str(char *s)
-{
-       struct val     *vp;
-
-       vp = (struct val *) malloc(sizeof(*vp));
-       if (vp == NULL || ((vp->u.s = strdup(s)) == NULL)) {
-               err(3, NULL);
-       }
-       vp->type = string;
-       return vp;
-}
-
-static void
-free_value(struct val *vp)
-{
-       if (vp->type == string)
-               free(vp->u.s);
-       free(vp);
-}
-
-/* determine if vp is an integer; if so, return it's value in *r */
-static int
-is_integer(struct val *vp, int *r)
-{
-       char           *s;
-       int             neg;
-       int             i;
-
-       if (vp->type == integer) {
-               *r = vp->u.i;
-               return 1;
-       }
-
-       /*
-        * POSIX.2 defines an "integer" as an optional unary minus
-        * followed by digits.
-        */
-       s = vp->u.s;
-       i = 0;
-
-       neg = (*s == '-');
-       if (neg)
-               s++;
-
-       while (*s) {
-               if (!isdigit((unsigned char)*s))
-                       return 0;
-
-               i *= 10;
-               i += *s - '0';
-
-               s++;
-       }
-
-       if (neg)
-               i *= -1;
-
-       *r = i;
-       return 1;
-}
-
-/* coerce to vp to an integer */
-static int
-to_integer(struct val *vp)
-{
-       int             r;
-
-       if (vp->type == integer)
-               return 1;
-
-       if (is_integer(vp, &r)) {
-               free(vp->u.s);
-               vp->u.i = r;
-               vp->type = integer;
-               return 1;
-       }
-
-       return 0;
-}
-
-/* coerce to vp to an string */
-static void
-to_string(struct val *vp)
-{
-       char           *tmp;
-
-       if (vp->type == string)
-               return;
-
-       if (asprintf(&tmp, "%d", vp->u.i) == -1)
-               err(3, NULL);
-
-       vp->type = string;
-       vp->u.s = tmp;
-}
-
-static int
-is_zero_or_null(struct val *vp)
-{
-       if (vp->type == integer) {
-               return (vp->u.i == 0);
-       } else {
-               return (*vp->u.s == 0 || (to_integer(vp) && vp->u.i == 0));
-       }
-       /* NOTREACHED */
-}
-
-static void
-nexttoken(int pat)
-{
-       char           *p;
-
-       if ((p = *av) == NULL) {
-               token = EOI;
-               return;
-       }
-       av++;
-
-       if (pat == 0 && p[0] != '\0') {
-               if (p[1] == '\0') {
-                       const char     *x = "|&=<>+-*/%:()";
-                       char           *i;      /* index */
-
-                       if ((i = strchr(x, *p)) != NULL) {
-                               token = i - x;
-                               return;
-                       }
-               } else if (p[1] == '=' && p[2] == '\0') {
-                       switch (*p) {
-                       case '<':
-                               token = LE;
-                               return;
-                       case '>':
-                               token = GE;
-                               return;
-                       case '!':
-                               token = NE;
-                               return;
-                       }
-               }
-       }
-       tokval = make_str(p);
-       token = OPERAND;
-       return;
-}
-
-static void
-error(void)
-{
-       errx(2, "syntax error");
-       /* NOTREACHED */
-}
-
-static struct val *
-eval6(void)
-{
-       struct val     *v;
-
-       if (token == OPERAND) {
-               nexttoken(0);
-               return tokval;
-
-       } else if (token == RP) {
-               nexttoken(0);
-               v = eval0();
-
-               if (token != LP) {
-                       error();
-                       /* NOTREACHED */
-               }
-               nexttoken(0);
-               return v;
-       } else {
-               error();
-       }
-       /* NOTREACHED */
-       return NULL;
-}
-
-/* Parse and evaluate match (regex) expressions */
-static struct val *
-eval5(void)
-{
-       regex_t         rp;
-       regmatch_t      rm[2];
-       char            errbuf[256];
-       int             eval;
-       struct val     *l, *r;
-       struct val     *v;
-
-       l = eval6();
-       while (token == MATCH) {
-               nexttoken(1);
-               r = eval6();
-
-               /* coerce to both arguments to strings */
-               to_string(l);
-               to_string(r);
-
-               /* compile regular expression */
-               if ((eval = regcomp(&rp, r->u.s, 0)) != 0) {
-                       regerror(eval, &rp, errbuf, sizeof(errbuf));
-                       errx(2, "%s", errbuf);
-               }
-
-               /* compare string against pattern --  remember that patterns
-                  are anchored to the beginning of the line */
-               if (regexec(&rp, l->u.s, 2, rm, 0) == 0 && rm[0].rm_so == 0) {
-                       if (rm[1].rm_so >= 0) {
-                               *(l->u.s + rm[1].rm_eo) = '\0';
-                               v = make_str(l->u.s + rm[1].rm_so);
-
-                       } else {
-                               v = make_int((int)(rm[0].rm_eo - rm[0].rm_so));
-                       }
-               } else {
-                       if (rp.re_nsub == 0) {
-                               v = make_int(0);
-                       } else {
-                               v = make_str("");
-                       }
-               }
-
-               /* free arguments and pattern buffer */
-               free_value(l);
-               free_value(r);
-               regfree(&rp);
-
-               l = v;
-       }
-
-       return l;
-}
-
-/* Parse and evaluate multiplication and division expressions */
-static struct val *
-eval4(void)
-{
-       struct val     *l, *r;
-       enum token      op;
-
-       l = eval5();
-       while ((op = token) == MUL || op == DIV || op == MOD) {
-               nexttoken(0);
-               r = eval5();
-
-               if (!to_integer(l) || !to_integer(r)) {
-                       errx(2, "non-numeric argument");
-               }
-
-               if (op == MUL) {
-                       l->u.i *= r->u.i;
-               } else {
-                       if (r->u.i == 0) {
-                               errx(2, "division by zero");
-                       }
-                       if (op == DIV) {
-                               if (l->u.i != INT_MIN || r->u.i != -1)
-                                       l->u.i /= r->u.i;
-                       } else {
-                               if (l->u.i != INT_MIN || r->u.i != -1)
-                                       l->u.i %= r->u.i;
-                               else
-                                       l->u.i = 0;
-                       }
-               }
-
-               free_value(r);
-       }
-
-       return l;
-}
-
-/* Parse and evaluate addition and subtraction expressions */
-static struct val *
-eval3(void)
-{
-       struct val     *l, *r;
-       enum token      op;
-
-       l = eval4();
-       while ((op = token) == ADD || op == SUB) {
-               nexttoken(0);
-               r = eval4();
-
-               if (!to_integer(l) || !to_integer(r)) {
-                       errx(2, "non-numeric argument");
-               }
-
-               if (op == ADD) {
-                       l->u.i += r->u.i;
-               } else {
-                       l->u.i -= r->u.i;
-               }
-
-               free_value(r);
-       }
-
-       return l;
-}
-
-/* Parse and evaluate comparison expressions */
-static struct val *
-eval2(void)
-{
-       struct val     *l, *r;
-       enum token      op;
-       int             v = 0, li, ri;
-
-       l = eval3();
-       while ((op = token) == EQ || op == NE || op == LT || op == GT ||
-           op == LE || op == GE) {
-               nexttoken(0);
-               r = eval3();
-
-               if (is_integer(l, &li) && is_integer(r, &ri)) {
-                       switch (op) {
-                       case GT:
-                               v = (li >  ri);
-                               break;
-                       case GE:
-                               v = (li >= ri);
-                               break;
-                       case LT:
-                               v = (li <  ri);
-                               break;
-                       case LE:
-                               v = (li <= ri);
-                               break;
-                       case EQ:
-                               v = (li == ri);
-                               break;
-                       case NE:
-                               v = (li != ri);
-                               break;
-                       default:
-                               break;
-                       }
-               } else {
-                       to_string(l);
-                       to_string(r);
-
-                       switch (op) {
-                       case GT:
-                               v = (strcoll(l->u.s, r->u.s) > 0);
-                               break;
-                       case GE:
-                               v = (strcoll(l->u.s, r->u.s) >= 0);
-                               break;
-                       case LT:
-                               v = (strcoll(l->u.s, r->u.s) < 0);
-                               break;
-                       case LE:
-                               v = (strcoll(l->u.s, r->u.s) <= 0);
-                               break;
-                       case EQ:
-                               v = (strcoll(l->u.s, r->u.s) == 0);
-                               break;
-                       case NE:
-                               v = (strcoll(l->u.s, r->u.s) != 0);
-                               break;
-                       default:
-                               break;
-                       }
-               }
-
-               free_value(l);
-               free_value(r);
-               l = make_int(v);
-       }
-
-       return l;
-}
-
-/* Parse and evaluate & expressions */
-static struct val *
-eval1(void)
-{
-       struct val     *l, *r;
-
-       l = eval2();
-       while (token == AND) {
-               nexttoken(0);
-               r = eval2();
-
-               if (is_zero_or_null(l) || is_zero_or_null(r)) {
-                       free_value(l);
-                       free_value(r);
-                       l = make_int(0);
-               } else {
-                       free_value(r);
-               }
-       }
-
-       return l;
-}
-
-/* Parse and evaluate | expressions */
-static struct val *
-eval0(void)
-{
-       struct val     *l, *r;
-
-       l = eval1();
-       while (token == OR) {
-               nexttoken(0);
-               r = eval1();
-
-               if (is_zero_or_null(l)) {
-                       free_value(l);
-                       l = r;
-               } else {
-                       free_value(r);
-               }
-       }
-
-       return l;
-}
-
-
-int
-main(int argc, char *argv[])
-{
-       struct val     *vp;
-
-       (void) setlocale(LC_ALL, "");
-
-       if (argc > 1 && !strcmp(argv[1], "--"))
-               argv++;
-
-       av = argv + 1;
-
-       nexttoken(0);
-       vp = eval0();
-
-       if (token != EOI) {
-               error();
-               /* NOTREACHED */
-       }
-
-       if (vp->type == integer)
-               printf("%d\n", vp->u.i);
-       else
-               printf("%s\n", vp->u.s);
-
-       exit(is_zero_or_null(vp));
-}
diff --git a/expr.y b/expr.y
new file mode 100644
index 0000000..7bd7665
--- /dev/null
+++ b/expr.y
@@ -0,0 +1,167 @@
+%{
+#include <inttypes.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include "util.h"
+
+typedef struct {
+    char    *s;
+    intmax_t n;
+} Val;
+#define YYSTYPE Val
+
+static Val   match  (Val, Val);
+static void  num    (Val);
+static char *valstr (Val, char *);
+static int   valcmp (Val, Val);
+static int   yylex  (void);
+static void  yyerror(char*);
+
+static char **args;
+static int    intlen;
+
+static char *
+valstr(Val val, char *buf)
+{
+    char *p = val.s;
+    if(!p) sprintf(p = buf, "%"PRIdMAX, val.n);
+    return p;
+}
+
+static int
+valcmp(Val a, Val b)
+{
+    char b1[intlen], *p = valstr(a, b1);
+    char b2[intlen], *q = valstr(b, b2);
+
+    if(!a.s && !b.s)
+        return (a.n > b.n) - (a.n < b.n);
+    return strcmp(p, q);
+}
+
+static Val
+match(Val vstr, Val vregx)
+{
+    char b1[intlen], *str  = valstr(vstr , b1);
+    char b2[intlen], *regx = valstr(vregx, b2);
+
+    regex_t    re;
+    regmatch_t matches[2];
+    char       anchreg[strlen(regx) + 2];
+
+    sprintf(anchreg, "^%s", regx);
+
+    if(regcomp(&re, anchreg, 0))
+        enprintf(3, "regcomp failed");
+    if(regexec(&re, str, 2, matches, 0) == REG_NOMATCH)
+        return (Val){ (re.re_nsub ? "" : NULL), 0 };
+    if(re.re_nsub) {
+        intmax_t d;
+        char    *ret, *p;
+        regoff_t len = matches[1].rm_eo - matches[1].rm_so + 1;
+
+        if(!(ret = malloc(len)))
+            enprintf(3, "malloc failed");
+
+        strlcpy(ret, str + matches[1].rm_so, len);
+
+        d = strtoimax(ret, &p, 10);
+        if(*ret && !*p)
+            return (Val){ NULL, d };
+        return (Val){ ret, 0 };
+    }
+    return (Val){ NULL, matches[0].rm_eo - matches[0].rm_so };
+}
+
+static void
+num(Val v)
+{
+    if(v.s)
+        enprintf(2, "expected integer, got `%s'\n", v.s);
+}
+%}
+%token VAL GE LE NE
+
+%left '|'
+%left '&'
+%left '=' '>' GE '<' LE NE
+%left '+' '-'
+%left '*' '/' '%'
+%left ':'
+%%
+prog: expr { if($1.s) printf("%s\n"        , $1.s);
+             else     printf("%"PRIdMAX"\n", $1.n);
+             return !(($1.s && *$1.s) || $1.n); }
+    ;
+
+expr: VAL
+    | expr '|' expr { if     ( $1.s && *$1.s) $$ = (Val){ $1.s,    0 };
+                      else if(!$1.s &&  $1.n) $$ = (Val){ NULL, $1.n };
+                      else if( $3.s && *$3.s) $$ = (Val){ $3.s,    0 };
+                      else                    $$ = (Val){ NULL, $3.n }; }
+
+    | expr '&' expr { if((($1.s && *$1.s) || $1.n) &&
+                         (($3.s && *$3.s) || $3.n)) $$ = $1;
+                      else $$ = (Val){ NULL, 0 }; }
+
+    | expr '=' expr { $$ = (Val){ NULL, valcmp($1, $3) == 0 }; }
+    | expr '>' expr { $$ = (Val){ NULL, valcmp($1, $3) >  0 }; }
+    | expr GE  expr { $$ = (Val){ NULL, valcmp($1, $3) >= 0 }; }
+    | expr '<' expr { $$ = (Val){ NULL, valcmp($1, $3) <  0 }; }
+    | expr LE  expr { $$ = (Val){ NULL, valcmp($1, $3) <= 0 }; }
+    | expr NE  expr { $$ = (Val){ NULL, valcmp($1, $3) != 0 }; }
+
+    | expr '+' expr { num($1); num($3); $$ = (Val){ NULL, $1.n + $3.n }; }
+    | expr '-' expr { num($1); num($3); $$ = (Val){ NULL, $1.n - $3.n }; }
+    | expr '*' expr { num($1); num($3); $$ = (Val){ NULL, $1.n * $3.n }; }
+    | expr '/' expr { num($1); num($3); $$ = (Val){ NULL, $1.n / $3.n }; }
+    | expr '%' expr { num($1); num($3); $$ = (Val){ NULL, $1.n % $3.n }; }
+
+    | expr ':' expr { $$ = match($1, $3); }
+
+    | '(' expr ')'  { $$ = $2; }
+    ;
+%%
+int
+main(int argc, char *argv[])
+{
+    if(!(intlen = snprintf(NULL, 0, "%"PRIdMAX, INTMAX_MIN) + 1))
+        enprintf(3, "failed to get max digits\n");
+    args = argv + 1;
+    return yyparse();
+}
+
+static int
+yylex(void)
+{
+    intmax_t d;
+    char    *p, *q, *ops = "|&=><+-*/%():";
+
+    if(!(p = *args++))
+        return 0;
+
+    d = strtoimax(p, &q, 10);
+    if(*p && !*q) {
+        yylval = (Val){ NULL, d };
+        return VAL;
+    }
+
+    if(*p && !p[1] && strchr(ops, *p))
+        return *p;
+
+    if(strcmp(p, ">=") == 0) return GE;
+    if(strcmp(p, "<=") == 0) return LE;
+    if(strcmp(p, "!=") == 0) return NE;
+
+    yylval = (Val){ p, 0 };
+    return VAL;
+}
+
+static void
+yyerror(char *s)
+{
+    enprintf(2, "%s\n", s);
+}
-- 
2.1.3

Reply via email to