Author: gabor
Date: Wed Oct  5 09:56:43 2011
New Revision: 226035
URL: http://svn.freebsd.org/changeset/base/226035

Log:
  Update BSD grep to the latest development version.  It has some code
  backported that was written for the TRE integration project in Google
  Summer of Code 2011.  This is a temporary solution until the whole
  regex library is not replaced so that BSD grep development can continue
  and the backported code gets some review and testing.  This change only
  improves scalability slightly, there is no big performance boost yet
  but several minor bugs have been found and fixed.
  
  Approved by:  delphij (mentor)
  Sposored by:  Google Summer of Code 2011
  MFC after:    1 week

Added:
  head/usr.bin/grep/regex/
  head/usr.bin/grep/regex/fastmatch.c   (contents, props changed)
  head/usr.bin/grep/regex/fastmatch.h   (contents, props changed)
  head/usr.bin/grep/regex/glue.h   (contents, props changed)
  head/usr.bin/grep/regex/hashtable.c   (contents, props changed)
  head/usr.bin/grep/regex/hashtable.h   (contents, props changed)
  head/usr.bin/grep/regex/tre-compile.c   (contents, props changed)
  head/usr.bin/grep/regex/tre-fastmatch.c   (contents, props changed)
  head/usr.bin/grep/regex/tre-fastmatch.h   (contents, props changed)
  head/usr.bin/grep/regex/xmalloc.c   (contents, props changed)
  head/usr.bin/grep/regex/xmalloc.h   (contents, props changed)
Deleted:
  head/usr.bin/grep/fastgrep.c
Modified:
  head/usr.bin/grep/Makefile
  head/usr.bin/grep/file.c
  head/usr.bin/grep/grep.c
  head/usr.bin/grep/grep.h
  head/usr.bin/grep/util.c

Modified: head/usr.bin/grep/Makefile
==============================================================================
--- head/usr.bin/grep/Makefile  Wed Oct  5 08:33:50 2011        (r226034)
+++ head/usr.bin/grep/Makefile  Wed Oct  5 09:56:43 2011        (r226035)
@@ -8,28 +8,52 @@
 PROG=  grep
 .else
 PROG=  bsdgrep
+CLEANFILES+= bsdgrep.1
+
+bsdgrep.1: grep.1
+       cp ${.ALLSRC} ${.TARGET}
 .endif
-SRCS=  fastgrep.c file.c grep.c queue.c util.c
+SRCS=  file.c grep.c queue.c util.c
+
+# Extra files ported backported form some regex improvements
+.PATH: ${.CURDIR}/regex
+SRCS+= fastmatch.c hashtable.c tre-compile.c tre-fastmatch.c xmalloc.c
+CFLAGS+=-I${.CURDIR}/regex
 
 .if ${MK_BSD_GREP} == "yes"
 LINKS= ${BINDIR}/grep ${BINDIR}/egrep \
        ${BINDIR}/grep ${BINDIR}/fgrep \
        ${BINDIR}/grep ${BINDIR}/zgrep \
        ${BINDIR}/grep ${BINDIR}/zegrep \
-       ${BINDIR}/grep ${BINDIR}/zfgrep
+       ${BINDIR}/grep ${BINDIR}/zfgrep \
+       ${BINDIR}/grep ${BINDIR}/bzgrep \
+       ${BINDIR}/grep ${BINDIR}/bzegrep \
+       ${BINDIR}/grep ${BINDIR}/bzfgrep \
+       ${BINDIR}/grep ${BINDIR}/xzgrep \
+       ${BINDIR}/grep ${BINDIR}/xzegrep \
+       ${BINDIR}/grep ${BINDIR}/xzfgrep \
+       ${BINDIR}/grep ${BINDIR}/lzgrep \
+       ${BINDIR}/grep ${BINDIR}/lzegrep \
+       ${BINDIR}/grep ${BINDIR}/lzfgrep
 
 MLINKS= grep.1 egrep.1 \
        grep.1 fgrep.1 \
        grep.1 zgrep.1 \
        grep.1 zegrep.1 \
-       grep.1 zfgrep.1
+       grep.1 zfgrep.1 \
+       grep.1 bzgrep.1 \
+       grep.1 bzegrep.1 \
+       grep.1 bzfgrep.1 \
+       grep.1 xzgrep.1 \
+       grep.1 xzegrep.1 \
+       grep.1 xzfgrep.1 \
+       grep.1 lzgrep.1 \
+       grep.1 lzegrep.1 \
+       grep.1 lzfgrep.1
 .endif
 
-bsdgrep.1: grep.1
-       cp ${.ALLSRC} ${.TARGET}
-
-LDADD= -lz -lbz2
-DPADD= ${LIBZ} ${LIBBZ2}
+LDADD= -lz -lbz2 -llzma
+DPADD= ${LIBZ} ${LIBBZ2} ${LIBLZMA}
 
 .if !defined(WITHOUT_GNU_COMPAT)
 CFLAGS+= -I/usr/include/gnu

Modified: head/usr.bin/grep/file.c
==============================================================================
--- head/usr.bin/grep/file.c    Wed Oct  5 08:33:50 2011        (r226034)
+++ head/usr.bin/grep/file.c    Wed Oct  5 09:56:43 2011        (r226035)
@@ -34,13 +34,15 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
-#include <sys/types.h>
+#include <sys/mman.h>
 #include <sys/stat.h>
+#include <sys/types.h>
 
 #include <bzlib.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <lzma.h>
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
@@ -56,10 +58,12 @@ __FBSDID("$FreeBSD$");
 
 static gzFile gzbufdesc;
 static BZFILE* bzbufdesc;
+static lzma_stream lstrm = LZMA_STREAM_INIT;
 
-static unsigned char buffer[MAXBUFSIZ];
+static unsigned char *buffer;
 static unsigned char *bufpos;
 static size_t bufrem;
+static size_t fsiz;
 
 static unsigned char *lnbuf;
 static size_t lnbuflen;
@@ -70,6 +74,9 @@ grep_refill(struct file *f)
        ssize_t nr;
        int bzerr;
 
+       if (filebehave == FILE_MMAP)
+               return (0);
+
        bufpos = buffer;
        bufrem = 0;
 
@@ -101,6 +108,36 @@ grep_refill(struct file *f)
                        /* Make sure we exit with an error */
                        nr = -1;
                }
+       } else if ((filebehave == FILE_XZ) || (filebehave == FILE_LZMA)) {
+               lzma_action action = LZMA_RUN;
+               uint8_t in_buf[MAXBUFSIZ];
+               lzma_ret ret;
+
+               ret = (filebehave == FILE_XZ) ?
+                   lzma_stream_decoder(&lstrm, UINT64_MAX,
+                   LZMA_CONCATENATED) :
+                   lzma_alone_decoder(&lstrm, UINT64_MAX);
+
+               if (ret != LZMA_OK)
+                       return (-1);
+
+               lstrm.next_out = buffer;
+               lstrm.avail_out = MAXBUFSIZ;
+               lstrm.next_in = in_buf;
+               nr = read(f->fd, in_buf, MAXBUFSIZ);
+
+               if (nr < 0)
+                       return (-1);
+               else if (nr == 0)
+                       action = LZMA_FINISH;
+
+               lstrm.avail_in = nr;
+               ret = lzma_code(&lstrm, action);
+
+               if (ret != LZMA_OK && ret != LZMA_STREAM_END)
+                       return (-1);
+               bufrem = MAXBUFSIZ - lstrm.avail_out;
+               return (0);
        } else
                nr = read(f->fd, buffer, MAXBUFSIZ);
 
@@ -186,56 +223,76 @@ error:
        return (NULL);
 }
 
-static inline struct file *
-grep_file_init(struct file *f)
+/*
+ * Opens a file for processing.
+ */
+struct file *
+grep_open(const char *path)
 {
+       struct file *f;
+
+       f = grep_malloc(sizeof *f);
+       memset(f, 0, sizeof *f);
+       if (path == NULL) {
+               /* Processing stdin implies --line-buffered. */
+               lbflag = true;
+               f->fd = STDIN_FILENO;
+       } else if ((f->fd = open(path, O_RDONLY)) == -1)
+               goto error1;
+
+       if (filebehave == FILE_MMAP) {
+               struct stat st;
+
+               if ((fstat(f->fd, &st) == -1) || (st.st_size > OFF_MAX) ||
+                   (!S_ISREG(st.st_mode)))
+                       filebehave = FILE_STDIO;
+               else {
+                       int flags = MAP_PRIVATE | MAP_NOCORE | MAP_NOSYNC;
+#ifdef MAP_PREFAULT_READ
+                       flags |= MAP_PREFAULT_READ;
+#endif
+                       fsiz = st.st_size;
+                       buffer = mmap(NULL, fsiz, PROT_READ, flags,
+                            f->fd, (off_t)0);
+                       if (buffer == MAP_FAILED)
+                               filebehave = FILE_STDIO;
+                       else {
+                               bufrem = st.st_size;
+                               bufpos = buffer;
+                               madvise(buffer, st.st_size, MADV_SEQUENTIAL);
+                       }
+               }
+       }
+
+       if ((buffer == NULL) || (buffer == MAP_FAILED))
+               buffer = grep_malloc(MAXBUFSIZ);
 
        if (filebehave == FILE_GZIP &&
            (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
-               goto error;
+               goto error2;
 
        if (filebehave == FILE_BZIP &&
            (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
-               goto error;
+               goto error2;
 
        /* Fill read buffer, also catches errors early */
-       if (grep_refill(f) != 0)
-               goto error;
+       if (bufrem == 0 && grep_refill(f) != 0)
+               goto error2;
 
        /* Check for binary stuff, if necessary */
        if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL)
-               f->binary = true;
+       f->binary = true;
 
        return (f);
-error:
+
+error2:
        close(f->fd);
+error1:
        free(f);
        return (NULL);
 }
 
 /*
- * Opens a file for processing.
- */
-struct file *
-grep_open(const char *path)
-{
-       struct file *f;
-
-       f = grep_malloc(sizeof *f);
-       memset(f, 0, sizeof *f);
-       if (path == NULL) {
-               /* Processing stdin implies --line-buffered. */
-               lbflag = true;
-               f->fd = STDIN_FILENO;
-       } else if ((f->fd = open(path, O_RDONLY)) == -1) {
-               free(f);
-               return (NULL);
-       }
-
-       return (grep_file_init(f));
-}
-
-/*
  * Closes a file.
  */
 void
@@ -245,6 +302,10 @@ grep_close(struct file *f)
        close(f->fd);
 
        /* Reset read buffer and line buffer */
+       if (filebehave == FILE_MMAP) {
+               munmap(buffer, fsiz);
+               buffer = NULL;
+       }
        bufpos = buffer;
        bufrem = 0;
 

Modified: head/usr.bin/grep/grep.c
==============================================================================
--- head/usr.bin/grep/grep.c    Wed Oct  5 08:33:50 2011        (r226034)
+++ head/usr.bin/grep/grep.c    Wed Oct  5 09:56:43 2011        (r226035)
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
+#include <fcntl.h>
 #include <getopt.h>
 #include <limits.h>
 #include <libgen.h>
@@ -48,6 +49,7 @@ __FBSDID("$FreeBSD$");
 #include <string.h>
 #include <unistd.h>
 
+#include "fastmatch.h"
 #include "grep.h"
 
 #ifndef WITHOUT_NLS
@@ -81,9 +83,9 @@ bool           matchall;
 
 /* Searching patterns */
 unsigned int    patterns, pattern_sz;
-char           **pattern;
+struct pat     *pattern;
 regex_t                *r_pattern;
-fastgrep_t     *fg_pattern;
+fastmatch_t    *fg_pattern;
 
 /* Filename exclusion/inclusion patterns */
 unsigned int    fpatterns, fpattern_sz;
@@ -104,7 +106,7 @@ bool         hflag;         /* -h: don't print filenam
 bool    iflag;         /* -i: ignore case */
 bool    lflag;         /* -l: only show names of files with matches */
 bool    mflag;         /* -m x: stop reading the files after x matches */
-unsigned long long mcount;     /* count for -m */
+long long mcount;      /* count for -m */
 bool    nflag;         /* -n: show line numbers in front of matching lines */
 bool    oflag;         /* -o: print only matching part */
 bool    qflag;         /* -q: quiet mode (don't output anything) */
@@ -164,7 +166,7 @@ usage(void)
        exit(2);
 }
 
-static const char      *optstr = 
"0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy";
+static const char      *optstr = 
"0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXy";
 
 struct option long_options[] =
 {
@@ -200,6 +202,7 @@ struct option long_options[] =
        {"files-with-matches",  no_argument,            NULL, 'l'},
        {"files-without-match", no_argument,            NULL, 'L'},
        {"max-count",           required_argument,      NULL, 'm'},
+       {"lzma",                no_argument,            NULL, 'M'},
        {"line-number",         no_argument,            NULL, 'n'},
        {"only-matching",       no_argument,            NULL, 'o'},
        {"quiet",               no_argument,            NULL, 'q'},
@@ -212,6 +215,7 @@ struct option long_options[] =
        {"version",             no_argument,            NULL, 'V'},
        {"word-regexp",         no_argument,            NULL, 'w'},
        {"line-regexp",         no_argument,            NULL, 'x'},
+       {"xz",                  no_argument,            NULL, 'X'},
        {"decompress",          no_argument,            NULL, 'Z'},
        {NULL,                  no_argument,            NULL, 0}
 };
@@ -223,23 +227,35 @@ static void
 add_pattern(char *pat, size_t len)
 {
 
+       /* Do not add further pattern is we already match everything */
+       if (matchall)
+         return;
+
        /* Check if we can do a shortcut */
-       if (len == 0 || matchall) {
+       if (len == 0) {
                matchall = true;
+               for (unsigned int i = 0; i < patterns; i++) {
+                       free(pattern[i].pat);
+               }
+               pattern = grep_realloc(pattern, sizeof(struct pat));
+               pattern[0].pat = NULL;
+               pattern[0].len = 0;
+               patterns = 1;
                return;
        }
        /* Increase size if necessary */
        if (patterns == pattern_sz) {
                pattern_sz *= 2;
                pattern = grep_realloc(pattern, ++pattern_sz *
-                   sizeof(*pattern));
+                   sizeof(struct pat));
        }
        if (len > 0 && pat[len - 1] == '\n')
                --len;
        /* pat may not be NUL-terminated */
-       pattern[patterns] = grep_malloc(len + 1);
-       memcpy(pattern[patterns], pat, len);
-       pattern[patterns][len] = '\0';
+       pattern[patterns].pat = grep_malloc(len + 1);
+       memcpy(pattern[patterns].pat, pat, len);
+       pattern[patterns].len = len;
+       pattern[patterns].pat[len] = '\0';
        ++patterns;
 }
 
@@ -285,14 +301,19 @@ add_dpattern(const char *pat, int mode)
 static void
 read_patterns(const char *fn)
 {
+       struct stat st;
        FILE *f;
        char *line;
        size_t len;
 
        if ((f = fopen(fn, "r")) == NULL)
                err(2, "%s", fn);
-       while ((line = fgetln(f, &len)) != NULL)
-               add_pattern(line, *line == '\n' ? 0 : len);
+       if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
+               fclose(f);
+               return;
+       }
+        while ((line = fgetln(f, &len)) != NULL)
+               add_pattern(line, line[0] == '\n' ? 0 : len);
        if (ferror(f))
                err(2, "%s", fn);
        fclose(f);
@@ -311,7 +332,7 @@ int
 main(int argc, char *argv[])
 {
        char **aargv, **eargv, *eopts;
-       char *ep;
+       char *pn, *ep;
        unsigned long long l;
        unsigned int aargc, eargc, i;
        int c, lastc, needpattern, newarg, prevoptind;
@@ -325,30 +346,27 @@ main(int argc, char *argv[])
        /* Check what is the program name of the binary.  In this
           way we can have all the funcionalities in one binary
           without the need of scripting and using ugly hacks. */
-       switch (__progname[0]) {
+       pn = __progname;
+       if (pn[0] == 'b' && pn[1] == 'z') {
+               filebehave = FILE_BZIP;
+               pn += 2;
+       } else if (pn[0] == 'x' && pn[1] == 'z') {
+               filebehave = FILE_XZ;
+               pn += 2;
+       } else if (pn[0] == 'l' && pn[1] == 'z') {
+               filebehave = FILE_LZMA;
+               pn += 2;
+       } else if (pn[0] == 'z') {
+               filebehave = FILE_GZIP;
+               pn += 1;
+       }
+       switch (pn[0]) {
        case 'e':
                grepbehave = GREP_EXTENDED;
                break;
        case 'f':
                grepbehave = GREP_FIXED;
                break;
-       case 'g':
-               grepbehave = GREP_BASIC;
-               break;
-       case 'z':
-               filebehave = FILE_GZIP;
-               switch(__progname[1]) {
-               case 'e':
-                       grepbehave = GREP_EXTENDED;
-                       break;
-               case 'f':
-                       grepbehave = GREP_FIXED;
-                       break;
-               case 'g':
-                       grepbehave = GREP_BASIC;
-                       break;
-               }
-               break;
        }
 
        lastc = '\0';
@@ -503,8 +521,8 @@ main(int argc, char *argv[])
                case 'm':
                        mflag = true;
                        errno = 0;
-                       mcount = strtoull(optarg, &ep, 10);
-                       if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
+                       mcount = strtoll(optarg, &ep, 10);
+                       if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
                            ((errno == EINVAL) && (mcount == 0)))
                                err(2, NULL);
                        else if (ep[0] != '\0') {
@@ -512,6 +530,9 @@ main(int argc, char *argv[])
                                err(2, NULL);
                        }
                        break;
+               case 'M':
+                       filebehave = FILE_LZMA;
+                       break;
                case 'n':
                        nflag = true;
                        break;
@@ -544,7 +565,7 @@ main(int argc, char *argv[])
                        break;
                case 'u':
                case MMAP_OPT:
-                       /* noop, compatibility */
+                       filebehave = FILE_MMAP;
                        break;
                case 'V':
                        printf(getstr(9), __progname, VERSION);
@@ -560,6 +581,9 @@ main(int argc, char *argv[])
                        xflag = true;
                        cflags &= ~REG_NOSUB;
                        break;
+               case 'X':
+                       filebehave = FILE_XZ;
+                       break;
                case 'Z':
                        filebehave = FILE_GZIP;
                        break;
@@ -630,6 +654,10 @@ main(int argc, char *argv[])
        aargc -= optind;
        aargv += optind;
 
+       /* Empty pattern file matches nothing */
+       if (!needpattern && (patterns == 0))
+               exit(1);
+
        /* Fail if we don't have any pattern */
        if (aargc == 0 && needpattern)
                usage();
@@ -642,9 +670,12 @@ main(int argc, char *argv[])
        }
 
        switch (grepbehave) {
-       case GREP_FIXED:
        case GREP_BASIC:
                break;
+       case GREP_FIXED:
+               /* XXX: header mess, REG_LITERAL not defined in gnu/regex.h */
+               cflags |= 0020;
+               break;
        case GREP_EXTENDED:
                cflags |= REG_EXTENDED;
                break;
@@ -655,24 +686,17 @@ main(int argc, char *argv[])
 
        fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
        r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
-/*
- * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
- * Optimizations should be done there.
- */
-               /* Check if cheating is allowed (always is for fgrep). */
-       if (grepbehave == GREP_FIXED) {
-               for (i = 0; i < patterns; ++i)
-                       fgrepcomp(&fg_pattern[i], pattern[i]);
-       } else {
-               for (i = 0; i < patterns; ++i) {
-                       if (fastcomp(&fg_pattern[i], pattern[i])) {
-                               /* Fall back to full regex library */
-                               c = regcomp(&r_pattern[i], pattern[i], cflags);
-                               if (c != 0) {
-                                       regerror(c, &r_pattern[i], re_error,
-                                           RE_ERROR_BUF);
-                                       errx(2, "%s", re_error);
-                               }
+
+       /* Check if cheating is allowed (always is for fgrep). */
+       for (i = 0; i < patterns; ++i) {
+               if (fastncomp(&fg_pattern[i], pattern[i].pat,
+                   pattern[i].len, cflags) != 0) {
+                       /* Fall back to full regex library */
+                       c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
+                       if (c != 0) {
+                               regerror(c, &r_pattern[i], re_error,
+                                   RE_ERROR_BUF);
+                               errx(2, "%s", re_error);
                        }
                }
        }

Modified: head/usr.bin/grep/grep.h
==============================================================================
--- head/usr.bin/grep/grep.h    Wed Oct  5 08:33:50 2011        (r226034)
+++ head/usr.bin/grep/grep.h    Wed Oct  5 09:56:43 2011        (r226035)
@@ -36,6 +36,8 @@
 #include <stdio.h>
 #include <zlib.h>
 
+#include "fastmatch.h"
+
 #ifdef WITHOUT_NLS
 #define getstr(n)       errstr[n]
 #else
@@ -58,8 +60,11 @@ extern const char            *errstr[];
 #define BINFILE_TEXT   2
 
 #define FILE_STDIO     0
-#define FILE_GZIP      1
-#define FILE_BZIP      2
+#define FILE_MMAP      1
+#define FILE_GZIP      2
+#define FILE_BZIP      3
+#define FILE_XZ                4
+#define FILE_LZMA      5
 
 #define DIR_READ       0
 #define DIR_SKIP       1
@@ -90,22 +95,16 @@ struct str {
        int              line_no;
 };
 
+struct pat {
+       char            *pat;
+       int              len;
+};
+
 struct epat {
        char            *pat;
        int              mode;
 };
 
-typedef struct {
-       size_t           len;
-       unsigned char   *pattern;
-       int              qsBc[UCHAR_MAX + 1];
-       /* flags */
-       bool             bol;
-       bool             eol;
-       bool             reversed;
-       bool             word;
-} fastgrep_t;
-
 /* Flags passed to regcomp() and regexec() */
 extern int      cflags, eflags;
 
@@ -114,7 +113,8 @@ extern bool  Eflag, Fflag, Gflag, Hflag,
                 bflag, cflag, hflag, iflag, lflag, mflag, nflag, oflag,
                 qflag, sflag, vflag, wflag, xflag;
 extern bool     dexclude, dinclude, fexclude, finclude, lbflag, nullflag;
-extern unsigned long long Aflag, Bflag, mcount;
+extern unsigned long long Aflag, Bflag;
+extern long long mcount;
 extern char    *label;
 extern const char *color;
 extern int      binbehave, devbehave, dirbehave, filebehave, grepbehave, 
linkbehave;
@@ -122,10 +122,10 @@ extern int         binbehave, devbehave, dirbeh
 extern bool     first, matchall, notfound, prev;
 extern int      tail;
 extern unsigned int dpatterns, fpatterns, patterns;
-extern char    **pattern;
+extern struct pat *pattern;
 extern struct epat *dpattern, *fpattern;
 extern regex_t *er_pattern, *r_pattern;
-extern fastgrep_t *fg_pattern;
+extern fastmatch_t *fg_pattern;
 
 /* For regex errors  */
 #define RE_ERROR_BUF   512
@@ -150,8 +150,3 @@ void         clearqueue(void);
 void            grep_close(struct file *f);
 struct file    *grep_open(const char *path);
 char           *grep_fgetln(struct file *f, size_t *len);
-
-/* fastgrep.c */
-int             fastcomp(fastgrep_t *, const char *);
-void            fgrepcomp(fastgrep_t *, const char *);
-int             grep_search(fastgrep_t *, const unsigned char *, size_t, 
regmatch_t *);

Added: head/usr.bin/grep/regex/fastmatch.c
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/usr.bin/grep/regex/fastmatch.c Wed Oct  5 09:56:43 2011        
(r226035)
@@ -0,0 +1,169 @@
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (C) 2011 Gabor Kovesdan <ga...@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "glue.h"
+
+#include <errno.h>
+#include <fastmatch.h>
+#include <regex.h>
+#include <string.h>
+
+#include "tre-fastmatch.h"
+#include "xmalloc.h"
+
+int
+tre_fixncomp(fastmatch_t *preg, const char *regex, size_t n, int cflags)
+{
+  int ret;
+  tre_char_t *wregex;
+  size_t wlen;
+
+  if (n != 0)
+    {
+      ret = tre_convert_pattern(regex, n, &wregex, &wlen);
+      if (ret != REG_OK)
+       return ret;
+      else 
+       ret = tre_compile_literal(preg, wregex, wlen, cflags);
+      tre_free_pattern(wregex);
+      return ret;
+    }
+  else
+    return tre_compile_literal(preg, NULL, 0, cflags);
+}
+
+int
+tre_fastncomp(fastmatch_t *preg, const char *regex, size_t n, int cflags)
+{
+  int ret;
+  tre_char_t *wregex;
+  size_t wlen;
+
+  if (n != 0)
+    {
+      ret = tre_convert_pattern(regex, n, &wregex, &wlen);
+      if (ret != REG_OK)
+       return ret;
+      else
+       ret = (cflags & REG_LITERAL)
+             ? tre_compile_literal(preg, wregex, wlen, cflags)
+             : tre_compile_fast(preg, wregex, wlen, cflags);
+      tre_free_pattern(wregex);
+      return ret;
+    }
+  else
+    return tre_compile_literal(preg, NULL, 0, cflags);
+}
+
+
+int
+tre_fixcomp(fastmatch_t *preg, const char *regex, int cflags)
+{
+  return tre_fixncomp(preg, regex, regex ? strlen(regex) : 0, cflags);
+}
+
+int
+tre_fastcomp(fastmatch_t *preg, const char *regex, int cflags)
+{
+  return tre_fastncomp(preg, regex, regex ? strlen(regex) : 0, cflags);
+}
+
+int
+tre_fixwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags)
+{
+  return tre_compile_literal(preg, regex, n, cflags);
+}
+
+int
+tre_fastwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags)
+{
+  return (cflags & REG_LITERAL) ?
+    tre_compile_literal(preg, regex, n, cflags) :
+    tre_compile_fast(preg, regex, n, cflags);
+}
+
+int
+tre_fixwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags)
+{
+  return tre_fixwncomp(preg, regex, regex ? tre_strlen(regex) : 0, cflags);
+}
+
+int
+tre_fastwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags)
+{
+  return tre_fastwncomp(preg, regex, regex ? tre_strlen(regex) : 0, cflags);
+}
+
+void
+tre_fastfree(fastmatch_t *preg)
+{
+  tre_free_fast(preg);
+}
+
+int
+tre_fastnexec(const fastmatch_t *preg, const char *string, size_t len,
+         size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+  tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS;
+
+  if (eflags & REG_STARTEND)
+    CALL_WITH_OFFSET(tre_match_fast(preg, &string[offset], slen,
+                    type, nmatch, pmatch, eflags));
+  else
+    return tre_match_fast(preg, string, len, type, nmatch,
+      pmatch, eflags);
+}
+
+int
+tre_fastexec(const fastmatch_t *preg, const char *string, size_t nmatch,
+            regmatch_t pmatch[], int eflags)
+{
+  return tre_fastnexec(preg, string, (size_t)-1, nmatch, pmatch, eflags);
+}
+
+int
+tre_fastwnexec(const fastmatch_t *preg, const wchar_t *string, size_t len,
+          size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+  tre_str_type_t type = STR_WIDE;
+
+  if (eflags & REG_STARTEND)
+    CALL_WITH_OFFSET(tre_match_fast(preg, &string[offset], slen,
+                    type, nmatch, pmatch, eflags));
+  else
+    return tre_match_fast(preg, string, len, type, nmatch,
+      pmatch, eflags);
+}
+
+int
+tre_fastwexec(const fastmatch_t *preg, const wchar_t *string,
+         size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+  return tre_fastwnexec(preg, string, (size_t)-1, nmatch, pmatch, eflags);
+}
+

Added: head/usr.bin/grep/regex/fastmatch.h
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/usr.bin/grep/regex/fastmatch.h Wed Oct  5 09:56:43 2011        
(r226035)
@@ -0,0 +1,108 @@
+/* $FreeBSD$ */
+
+#ifndef FASTMATCH_H
+#define FASTMATCH_H 1
+
+#include <limits.h>
+#include <regex.h>
+#include <stdbool.h>
+#include <wchar.h>
+
+typedef struct {
+  size_t        wlen;
+  size_t        len;
+  wchar_t      *wpattern;
+  bool         *wescmap;
+  unsigned int  qsBc[UCHAR_MAX + 1];
+  unsigned int *bmGs;
+  char         *pattern;
+  bool         *escmap;
+  unsigned int  defBc;
+  void         *qsBc_table;
+  unsigned int *sbmGs;
+  const char   *re_endp;
+
+  /* flags */
+  bool          hasdot;
+  bool          bol;
+  bool          eol;
+  bool          word;
+  bool          icase;
+  bool          newline;
+  bool          nosub;
+  bool          matchall;
+  bool          reversed;
+} fastmatch_t;
+
+extern int
+tre_fixcomp(fastmatch_t *preg, const char *regex, int cflags);
+
+extern int
+tre_fastcomp(fastmatch_t *preg, const char *regex, int cflags);
+
+extern int
+tre_fastexec(const fastmatch_t *preg, const char *string, size_t nmatch,
+  regmatch_t pmatch[], int eflags);
+
+extern void
+tre_fastfree(fastmatch_t *preg);
+
+extern int
+tre_fixwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags);
+
+extern int
+tre_fastwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags);
+
+extern int
+tre_fastwexec(const fastmatch_t *preg, const wchar_t *string,
+         size_t nmatch, regmatch_t pmatch[], int eflags);
+
+/* Versions with a maximum length argument and therefore the capability to
+   handle null characters in the middle of the strings. */
+extern int
+tre_fixncomp(fastmatch_t *preg, const char *regex, size_t len, int cflags);
+
+extern int
+tre_fastncomp(fastmatch_t *preg, const char *regex, size_t len, int cflags);
+
+extern int
+tre_fastnexec(const fastmatch_t *preg, const char *string, size_t len,
+  size_t nmatch, regmatch_t pmatch[], int eflags);
+
+extern int
+tre_fixwncomp(fastmatch_t *preg, const wchar_t *regex, size_t len, int cflags);
+
+extern int
+tre_fastwncomp(fastmatch_t *preg, const wchar_t *regex, size_t len, int 
cflags);
+
+extern int
+tre_fastwnexec(const fastmatch_t *preg, const wchar_t *string, size_t len,
+  size_t nmatch, regmatch_t pmatch[], int eflags);
+
+#define fixncomp       tre_fixncomp
+#define fastncomp      tre_fastncomp
+#define fixcomp                tre_fixcomp
+#define fastcomp       tre_fastcomp
+#define fixwncomp      tre_fixwncomp
+#define fastwncomp     tre_fastwncomp
+#define fixwcomp       tre_fixwcomp
+#define fastwcomp      tre_fastwcomp
+#define fastfree       tre_fastfree
+#define fastnexec      tre_fastnexec
+#define fastexec       tre_fastexec
+#define fastwnexec     tre_fastwnexec
+#define fastwexec      tre_fastwexec
+#define fixcomp                tre_fixcomp
+#define fastcomp       tre_fastcomp
+#define fastexec       tre_fastexec
+#define fastfree       tre_fastfree
+#define fixwcomp       tre_fixwcomp
+#define fastwcomp      tre_fastwcomp
+#define fastwexec      tre_fastwexec
+#define fixncomp       tre_fixncomp
+#define fastncomp      tre_fastncomp
+#define fastnexec      tre_fastnexec
+#define fixwncomp      tre_fixwncomp
+#define fastwncomp     tre_fastwncomp
+#define fastwnexec     tre_fastwnexec
+#endif         /* FASTMATCH_H */

Added: head/usr.bin/grep/regex/glue.h
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/usr.bin/grep/regex/glue.h      Wed Oct  5 09:56:43 2011        
(r226035)
@@ -0,0 +1,67 @@
+/* $FreeBSD$ */
+
+#ifndef GLUE_H
+#define GLUE_H
+
+#include <limits.h>
+#undef RE_DUP_MAX
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define TRE_WCHAR                      1
+#define TRE_MULTIBYTE                  1
+#define HAVE_MBSTATE_T                 1
+
+#define TRE_CHAR(n) L##n
+#define CHF "%lc"
+
+#define tre_char_t                     wchar_t
+#define tre_mbrtowc(pwc, s, n, ps)     (mbrtowc((pwc), (s), (n), (ps)))
+#define tre_strlen                     wcslen
+#define tre_isspace                    iswspace
+#define tre_isalnum                    iswalnum
+
+#define REG_OK                         0
+#define REG_LITERAL                    0020
+#define REG_WORD                       0100
+#define REG_GNU                                0400
+
+#define TRE_MB_CUR_MAX                 MB_CUR_MAX
+
+#ifndef _GREP_DEBUG
+#define DPRINT(msg)
+#else                  
+#define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/0)
+#endif
+
+#define MIN(a,b)                       ((a > b) ? (b) : (a))
+#define MAX(a,b)                       ((a > b) ? (a) : (b))
+
+typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t;
+
+#define CALL_WITH_OFFSET(fn)                                           \
+  do                                                                   \
+    {                                                                  \
+      size_t slen = (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so);       \
+      size_t offset = pmatch[0].rm_so;                                 \
+      int ret;                                                         \
+                                                                       \
+      if ((long long)pmatch[0].rm_eo - pmatch[0].rm_so < 0)            \
+       return REG_NOMATCH;                                             \
+      ret = fn;                                                                
\
+      for (unsigned i = 0; (!(eflags & REG_NOSUB) && (i < nmatch)); i++)\
+       {                                                               \
+         pmatch[i].rm_so += offset;                                    \
+         pmatch[i].rm_eo += offset;                                    \
+       }                                                               \
+      return ret;                                                      \
+    } while (0 /*CONSTCOND*/)
+
+int
+tre_convert_pattern(const char *regex, size_t n, tre_char_t **w,
+    size_t *wn);
+
+void
+tre_free_pattern(tre_char_t *wregex);
+#endif

Added: head/usr.bin/grep/regex/hashtable.c
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/usr.bin/grep/regex/hashtable.c Wed Oct  5 09:56:43 2011        
(r226035)
@@ -0,0 +1,268 @@
+/*      $FreeBSD$       */
+
+/*-
+ * Copyright (C) 2011 Gabor Kovesdan <ga...@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to