Hello Ralf, On Fri, May 01, 2009 at 09:50:23PM +0200, Ralf Wildenhues wrote:
> That NULTEST thing is cute but makes me cringe. If the second malloc > runs out of memory, then you return, but leak memory from the first > malloc. So in a tight memory situation, your code starts contributing > to the problem. Fixed. renamed macro to NOMEMTEST which jumps to err where I free all. Here is what I improved(benchmark attached) *abc* en_US.UTF-8 user: 2.58 user: 0.26 C user: 0.89 user: 0.25 just made functions static and __builtin_expect *[abc]bc* en_US.UTF-8 user: 4.04 user: 1.71 C user: 2.23 user: 0.85 hashing looks at first character instead last. We can skip mblen if they differ. I hope filenames with all leading chars same are rare. *ab[a-z]* en_US.UTF-8 user: 2.87 user: 1.77 C user: 0.85 user: 0.82 using speculative match. Slow because every third string is matched. casefold *abc* en_US.UTF-8 user: 4.26 user: 3.24 I convert multibyte chars to lowercase(as mb_casecmp does that too) Its faster because fnmatch first by mbstowcs computes size then second mbstowcs converts. I allocate large buffer and put 0 at end. I check if mbstowcs rewrote it and if so(unlike) I allocate correct size. C user: 1.00 user: 0.71 *ab[a-z]* en_US.UTF-8 user: 4.62 user: 5.07 C user: 1.06 user: 1.27 because I match every third string. diff --git a/MODULES.html.sh b/MODULES.html.sh index 06afa2d..03d308e 100755 --- a/MODULES.html.sh +++ b/MODULES.html.sh @@ -61,6 +61,7 @@ fenv float fmtmsg fnmatch +fnmatchcomp ftw glob grp @@ -418,6 +419,7 @@ fmodf fmodl fmtmsg fnmatch +fnmatchcomp fopen fork fpathconf diff --git a/lib/fnmatchcomp.c b/lib/fnmatchcomp.c new file mode 100644 index 0000000..bf24472 --- /dev/null +++ b/lib/fnmatchcomp.c @@ -0,0 +1,618 @@ + /* Copyright (C) 2009 + Ondrej Bilka < nel...@seznam.cz > + + This program is free software: you can redistribute it and/or modif y + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see < http:// www.gnu.org/licenses/ > . */ + +#include <stdint.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <nl_types.h> +#include <langinfo.h> +#include <locale.h> +#include <ctype.h> +#include <wchar.h> +#include <wctype.h> +#include <errno.h> +#define _GNU_SOURCE +#include <fnmatch.h> +#include "fnmatchcomp.h" + +#define UINT4 uint32_t +#define UINT2 uint16_t +#define CHARSPERINT (sizeof (UINT4)/sizeof(char)) +#define STREQ(a, b) (strcmp(a, b) == 0) +#define MAX_PATTERN_SIZE (sizeof (bracket_pattern)) +#define NOMEMTEST(exp) if (__builtin_expect(!(exp), 0)) {errno = ENOMEM; goto err; } +#define Calloc(x) calloc(1, x) +#define ALLOCALIMIT(n, lim) (__builtin_expect((n < lim), 1) ? alloca(n): malloc(n)) +#define ALLOCAFREE(ptr, n, lim) if (n > lim) free(ptr); +typedef struct +{ + char tp; + char chr; +} chr_pattern; +typedef struct +{ /* We seek chars following *. This provides major speedup. + For some encodings we could be wrong because leading char was part of multibyte character. Then after successfull match call fnmatch to eliminate false positives */ + char tp; + char chars[CHARSPERINT]; + short sizerest; + short charno; +} star_pattern; +typedef struct +{ + char tp; +} end_pattern; +typedef struct +{ /* * at end of pattern. automatic success */ + char tp; +} endstar_pattern; +#define BRACKET_INVMATCH 1 +#define BRACKET_SLASH 2 +#define BRACKET_MUSTCHECK 4 +typedef struct +{ + char tp; + char flags; + char *chars; /* multibyte characters separated by 0 */ + int sizerest; + uint32_t hash[(UCHAR_MAX / 32) + 1]; + /* We look at last char of multibyte character. We look at bit with this index in hash. On success we check if its one of chars. */ +} bracket_pattern; +typedef struct +{ /* if FNM_PERIOD was set this checks period after / */ + char tp; +} begindot_pattern; +typedef struct +{ /* if FNM_LEADING_DIR was set we go after / or 0 to endstar pattern. */ + char tp; +} slashend_pattern; + +typedef struct +{ /* we must call fnmatch */ + char tp; + char *ptr; + char *prefix; + int flags; +} fallback_pattern; +typedef struct +{ /* we fold string and then call case sensitive version */ + char tp; + void *states; +} fold_pattern; +typedef struct +{ /* we call our version and if succeed we check by fnmatch fallback */ + char tp; + void *states; + fallback_pattern *fallback; +} checkneeded_pattern; + + /* like star but we dont go beyond / */ +typedef star_pattern starslash_pattern; +enum PATTERNS +{ + PATTERN_chr, + PATTERN_star, + PATTERN_starslash, + PATTERN_end, + PATTERN_endstar, + PATTERN_bracket, + PATTERN_begindot, + PATTERN_slashend, + PATTERN_fallback, + PATTERN_fold, + PATTERN_checkneeded +}; +union _patterns +{ + chr_pattern chr; + star_pattern star; + starslash_pattern starslash; + end_pattern end; + endstar_pattern endstar; + bracket_pattern bracket; + begindot_pattern begindot; + fallback_pattern fallback; + fold_pattern fold; + checkneeded_pattern checkneeded; +}; +typedef union _patterns patterns; +static void freepatterns (patterns * p); + +struct states +{ + patterns *p; + struct states *next; +}; +struct _fnmatch_state +{ + struct states *states; /* list of states of undrelying NFA. */ + int flags; + int *refcount; + patterns *start; +}; + +int +fnmatch_fallbacked (fnmatch_state * s) +{ + return (s->states->p->fallback.tp == PATTERN_fallback); +} + +static int +ascii_compatible_encoding (const char *c) +{ /* name is misleading should return 0 if characters of encoding cant overlap. + 1 if encoding is stateless + 2 otherwise */ + + if (MB_CUR_MAX == 1 || STREQ (c, "UTF-8")) + return 0; + if (STREQ (c, "BIG5") || STREQ (c, "GB18030")) + return 1; + return 2; +} + +static void +strfold (const char *str, char *buf) +{ + if (MB_CUR_MAX == 1) + { + while (*str) + *buf++ = tolower (*str++); + *buf = 0; + } + else + { + int i; + int len = 2 * strlen (str) + 1; + wchar_t *towc = ALLOCALIMIT (sizeof (wint_t) * (len), 1000); + towc[len - 1] = L'\0'; + mbstowcs (towc, str, len); + if (towc[len - 1] != L'\0') + { + ALLOCAFREE (towc, sizeof (wint_t) * (len), 1000); + len = mbstowcs (NULL, str, 0) + 1; + towc = ALLOCALIMIT (sizeof (wint_t) * len, 1000); + mbstowcs (towc, str, len); + } + for (i = 0; towc[i] != L'\0'; i++) + { + towc[i] = towlower (towc[i]); + } + wcstombs (buf, towc, len); + ALLOCAFREE (towc, sizeof (wint_t) * (len), 1000); + } +} + +static fnmatch_state * +initfnmatchstate () +{ + fnmatch_state *st; + NOMEMTEST (st = (fnmatch_state *) Calloc (sizeof (fnmatch_state))); + NOMEMTEST (st->states = Calloc (sizeof (struct states))); + st->states->next = NULL; + NOMEMTEST (st->refcount = Calloc (sizeof (int))); + *st->refcount = 1; + return st; +err: + if (st) + { + free (st->states); + free (st->refcount); + } + free (st); + return NULL; +} + +static fnmatch_state * +fnmatch_fallback (const char *str, int flags) +{ + fnmatch_state *st; + NOMEMTEST (st = initfnmatchstate ()); + NOMEMTEST (st->start = st->states->p = Calloc (sizeof (fallback_pattern))); + st->states->p->fallback.tp = PATTERN_fallback; + NOMEMTEST (st->states->p->fallback.ptr = strdup (str)); + st->states->p->fallback.flags = flags; + NOMEMTEST (st->states->p->fallback.prefix = strdup ("")); + return st; +err: + fnmatch_free (st); + return NULL; +} + +static fnmatch_state * +fold_fallback (fnmatch_state * s) +{ + fnmatch_state *st; + NOMEMTEST (st = initfnmatchstate ()); + NOMEMTEST (s); + NOMEMTEST (st->start = st->states->p = Calloc (sizeof (fold_pattern))); + st->states->p->fold.tp = PATTERN_fold; + st->states->p->fold.states = (void *) s; + return st; +err: + fnmatch_free (s); + fnmatch_free (st); + return NULL; +} + +static fnmatch_state * +checkneeded_fallback (fnmatch_state * s, fnmatch_state * fb) +{ + fnmatch_state *st; + NOMEMTEST (st = initfnmatchstate ()); + NOMEMTEST (s); + NOMEMTEST (fb); + NOMEMTEST (st->start = st->states->p = + Calloc (sizeof (checkneeded_pattern))); + st->states->p->checkneeded.tp = PATTERN_checkneeded; + st->states->p->checkneeded.states = (void *) s; + st->states->p->checkneeded.fallback = (void *) fb; + return st; +err: + fnmatch_free (s); + fnmatch_free (fb); + fnmatch_free (st); + return NULL; +} + +static int +parse_bracket (const char *str, patterns * pat, int flags) +{ + char *chr, firstbyte; + int mlen; + const char *s = str; + int i; + pat->bracket.tp = PATTERN_bracket; + if (*s == '!' || (getenv ("POSIXLY_CORRECT") != NULL && *s == '^')) + { + pat->bracket.flags = BRACKET_INVMATCH; + s++; + } + if (flags & FNM_PATHNAME) + pat->bracket.flags |= BRACKET_SLASH; + if (!pat->bracket.chars) + { + if (!(pat->bracket.chars = Calloc (2 * strlen (s) + 2))) + return -3; + } + chr = pat->bracket.chars; + do + { + if (*s == 0 || *s == '-' || (*s == '[' && s[1] == ':')) + pat->bracket.flags |= BRACKET_MUSTCHECK | BRACKET_INVMATCH; + if (*s == '[' && (s[1] == '=' || s[1] == '.')) + return -1; /* could be multichar sequence */ + mlen = mblen (s, MB_CUR_MAX); + if (mlen < 0) + return -3; + firstbyte = *s; /* this is bad hash for multibyte characters. + We assume that in pathnames is lot of ascii */ + pat->bracket.hash[firstbyte / 32] |= 1 << (firstbyte % 32); + memcpy (chr, s, mlen); + s += mlen; + chr += mlen + 1; + } + while (*s && *s != ']'); + *chr = 0; + if (pat->bracket.flags & BRACKET_MUSTCHECK) + { /* bracket is too complicated to process here + we replace it with ? and when we match entire pattern + we call fnmatch it wasn't false positive */ + *pat->bracket.chars = 0; + for (i = 0; i < (UCHAR_MAX / 32) + 1; i++) + pat->bracket.hash[i] = 0; + } + return s - str; +} + +#define NEXTPATTERN(type) pat->chr.tp = PATTERN_##type; pat = (patterns *)(((void *) pat)+sizeof (type##_pattern)); +#define FALLBACK freepatterns(ret); free(ret); +#define BRACTEST(x) if ((x) == -3) {goto err; } +fnmatch_state * +fnmatch_compile (const char *str, int flags) +{ + const char *s; + patterns *pat, *ret = NULL; + int i, pass; + int size = 0, patsize; + int checkneeded = 0; + char *codeset = nl_langinfo (CODESET); + char *buf; + if (ascii_compatible_encoding (codeset) == 1) + checkneeded = 1; + if (ascii_compatible_encoding (codeset) == 2) + { + return fnmatch_fallback (str, flags); + } + if (flags & FNM_CASEFOLD) + { + NOMEMTEST (buf = Calloc (2 * strlen (str) + 1)); + strfold (str, buf); + fnmatch_state *st = + fold_fallback (fnmatch_compile (buf, flags & (~FNM_CASEFOLD))); + free (buf); + NOMEMTEST (st); + return st; + } + + ret = (patterns *) Calloc ((strlen (str) + 3) * 2 * MAX_PATTERN_SIZE); + + for (pass = 0; pass < 2; pass++) + { + /* two passes in first we compute values we use in second to optimize + */ + patsize = size; + size = 0; /* patsize-size is minimal size of rest of string */ + /* it makes loops tighter and we dont need to check if we went outsize string. */ + pat = ret; + if (flags & FNM_PERIOD && *str != '.') + { + NEXTPATTERN (begindot)} + for (s = str; *s;) + { + if (flags & FNM_EXTMATCH && *(s + 1) == '(' && + (*s == '*' || *s == '+' || *s == '@' || *s == '!')) + { + FALLBACK; + return fnmatch_fallback (str, flags); + } + switch (*s) + { + case '*': + while (*(s + 1) == '?') + { + /* by specification [!]] matches anything except ] */ + BRACTEST (parse_bracket ("!", pat, flags)); + size++; + pat->bracket.sizerest = patsize - size; + NEXTPATTERN (bracket); + s++; + } + if (*(s + 1)) + { + if (pass) + { + /* we will find ocurence of next characters fast. */ + patterns *tmppat = pat; + pat->star.sizerest = patsize - size; + NEXTPATTERN (star); + for (i = 0; pat->chr.tp == PATTERN_chr && pat->chr.chr + && i < CHARSPERINT; i++) + { + tmppat->star.chars[i] = pat->chr.chr; + NEXTPATTERN (chr); + } + if (i == 3) + i = 2; + tmppat->star.charno = i; + pat = tmppat; + } + if (flags & FNM_PATHNAME) + { + NEXTPATTERN (starslash); + } + else + { + NEXTPATTERN (star); + } + } + else + { + NEXTPATTERN (endstar); + } + s++; + break; + case '?': + BRACTEST (parse_bracket ("!", pat, flags)); + size++; + pat->bracket.sizerest = patsize - size; + NEXTPATTERN (bracket); + s++; + break; + case '[': + { + int siz = parse_bracket (s + 1, pat, flags); + BRACTEST (siz); + if (siz < 0) + { + FALLBACK; + return fnmatch_fallback (str, flags); + } + size++; + s += siz + 2; + pat->bracket.sizerest = patsize - size; + if (pat->bracket.flags & BRACKET_MUSTCHECK) + { + checkneeded = 1; + } + NEXTPATTERN (bracket); + } + break; + default: + if (*s == '\\' && (!(flags & FNM_NOESCAPE))) + s++; + { + int nolp = (*s == '/' && (flags & FNM_PERIOD) + && (flags & FNM_PATHNAME) + && *(s + 1) != '.'), mlen = mblen (s, MB_CUR_MAX); + if (mlen < 0) + goto err; + for (i = 0; i < mlen; i++, s++) + { + pat->chr.chr = *s; + size++; + NEXTPATTERN (chr); + } + if (nolp) + { + NEXTPATTERN (begindot)} + } + break; + } + } + } + if (flags & FNM_LEADING_DIR) + { + NEXTPATTERN (slashend); + NEXTPATTERN (endstar); + } + else + { + NEXTPATTERN (end); + } + + fnmatch_state *st; + NOMEMTEST (st = initfnmatchstate ()); + st->states->p = ret; + st->start = ret; + if (checkneeded) + st = checkneeded_fallback (st, fnmatch_fallback (str, flags)); + return st; +err: + freepatterns (ret); + return NULL; +} + +#undef NEXTPATTERN +#define NEXTPATTERN(type) p = (patterns *)(((void *) p)+sizeof (type##_pattern)); + +static inline int +fnmatch2_internal (const patterns * p, const char *str, int len) +{ +#include "fnmatchcomp_loop.h" +} + +int +fnmatch_exec (const fnmatch_state * p, const char *str) +{ + const struct states *s; + int len = strlen (str); + for (s = p->states; s; s = s->next) + { + int ret = fnmatch2_internal (s->p, str, len); + if (ret <= 0) + return ret; + } + return FNM_NOMATCH; +} + +#define FNMATCH_PREFIX +static int +fnmatch2_prefix_internal (patterns * p, const char *str, int len, + fnmatch_state * ret) +{ +#include "fnmatchcomp_loop.h" +} + +fnmatch_state * +fnmatch_prefix (const fnmatch_state * p, const char *str) +{ + struct states *s; + int len = strlen (str); + fnmatch_state *ret; + NOMEMTEST (ret = (fnmatch_state *) Calloc (sizeof (fnmatch_state))); + memcpy ((void *) ret, (void *) p, sizeof (fnmatch_state)); + ret->states = NULL; + (*ret->refcount)++; + for (s = p->states; s; s = s->next) + { + fnmatch2_prefix_internal (s->p, str, len, ret); + } + return ret; +err: + return NULL; +} + +static void +freepatterns (patterns * p) +{ + if (!p) + return; +#define SKIPPATTERN(type) case PATTERN_##type: NEXTPATTERN(type); break; + while (p->chr.tp != PATTERN_end && p->chr.tp != PATTERN_endstar) + { + switch (p->chr.tp) + { + case PATTERN_chr: + if (!p->chr.chr) + return; + NEXTPATTERN (chr); + break; + SKIPPATTERN (star); + SKIPPATTERN (starslash); + SKIPPATTERN (end); + SKIPPATTERN (endstar); + case PATTERN_bracket: + free (p->bracket.chars); + NEXTPATTERN (bracket); + break; + SKIPPATTERN (begindot); + SKIPPATTERN (slashend); + case PATTERN_fallback: + free (p->fallback.ptr); + return; + case PATTERN_fold: + return; + case PATTERN_checkneeded: + return; + } + } + +} + +void +fnmatch_free (fnmatch_state * s) +{ /* fnmatch_free must process incomplete fnmatch_state too */ + if (!s) + return; + struct states *st, *st2; + for (st = s->states; st;) + { + if (st->p) + { + switch (st->p->chr.tp) + { + case PATTERN_fallback: + free (st->p->fallback.prefix); + if (s->start != st->p) + free (st->p); + break; + case PATTERN_fold: + fnmatch_free ((fnmatch_state *) st->p->fold.states); + if (s->start != st->p) + free (st->p); + break; + case PATTERN_checkneeded: + fnmatch_free ((fnmatch_state *) st->p->checkneeded.states); + fnmatch_free ((fnmatch_state *) st->p->checkneeded.fallback); + if (s->start != st->p) + free (st->p); + break; + } + } + st2 = st->next; + free (st); + st = st2; + } + (*s->refcount)--; + if (!*s->refcount) + { + free (s->refcount); + freepatterns (s->start); + free (s->start); + } + free (s); +} diff --git a/lib/fnmatchcomp.h b/lib/fnmatchcomp.h new file mode 100644 index 0000000..931c920 --- /dev/null +++ b/lib/fnmatchcomp.h @@ -0,0 +1,41 @@ + /* Copyright (C) 2009 + Ondrej Bilka < nel...@seznam.cz > + + This program is free software: you can redistribute it and/or modif y + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see < http:// www.gnu.org/licenses/ > . */ + +struct _fnmatch_state; +typedef struct _fnmatch_state fnmatch_state; + + /* compile pattern */ +fnmatch_state *fnmatch_compile (const char *pattern, int flags); + + /* add prefix to matched strings. + suppose you want match foo againist /home/foo, /home/bar/foo, + /home/bar/bar + fnmatch_state* s = fnmatch_compile("foo", 0); + fnmatch_state* home = fnmatch_prefix(s, "/home"); + fnmatch_state* bar = fnmatch_prefix(home, "/bar"); + fnmatch_exec(home, "/foo"); + fnmatch_exec(bar, "/foo"); + fnmatch_exec(bar, "/bar"); + */ +fnmatch_state *fnmatch_prefix (const fnmatch_state * pattern, + const char *prefix); + +int fnmatch_exec (const fnmatch_state * pattern, const char *string); + +void fnmatch_free (fnmatch_state * pattern); + + /* For some encodings we decide just call fnmatch. In that case s you can gain some performance by checking fnmatch_fallbacked and if returns 1 call fnmatch yourself */ +int fnmatch_fallbacked (fnmatch_state * pattern); diff --git a/lib/fnmatchcomp_loop.h b/lib/fnmatchcomp_loop.h new file mode 100644 index 0000000..3bae0ec --- /dev/null +++ b/lib/fnmatchcomp_loop.h @@ -0,0 +1,230 @@ + /* Copyright (C) 2009 + Ondrej Bilka < nel...@seznam.cz > + + This program is free software: you can redistribute it and/or modif y + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see < http:// www.gnu.org/licenses/ > . */ + +const char *s = str, *chars; +int i, buflen; +char *buf; +while (1) + { +#ifndef FNMATCH_PREFIX + int sizerest, charno, retval; +#undef ADDSTATE +#define ADDSTATE +#else + patterns *pat, *pat2; + int _statepresent; + struct states *state, *stateiter; +#undef ADDSTATE +#define ADDSTATE _statepresent = 0; \ + for (stateiter = ret->states; stateiter; stateiter = stateiter->next) {\ + if (stateiter->p == p) _statepresent = 1; \ + } \ + if (!_statepresent) {\ + state = calloc(1, sizeof (struct states)); \ + state->next = ret->states; \ + ret->states = state; \ + state->p = p; \ + } + if (!*s) + { + ADDSTATE; + return 0; + } +#endif + switch (p->chr.tp) + { + case PATTERN_chr: + if (__builtin_expect ((*s != p->chr.chr), 1)) + return FNM_NOMATCH; + NEXTPATTERN (chr); + s++; + break; + case PATTERN_starslash: +#undef SLASHTEST +#define SLASHTEST if (str[i] == '/') return FNM_NOMATCH; +#include "fnmatchcomp_star.h" + break; + case PATTERN_star: +#undef SLASHTEST +#define SLASHTEST +#include "fnmatchcomp_star.h" + break; + + case PATTERN_end: + return (*s) ? FNM_NOMATCH : 0; + break; + case PATTERN_slashend: + if (*s == '/' || *s == 0) + { + NEXTPATTERN (slashend); + } + else + return FNM_NOMATCH; + break; + case PATTERN_endstar: + ADDSTATE; + return 0; + break; + case PATTERN_bracket: + { + int mat; + int mlen, mbuflen; + if (MB_CUR_MAX == 1) + { + mlen = 1; + mat = (p->bracket.hash[*s / 32] & (1 << (*s % 32))) ? 1 : 0; + } + else + { + mlen = 0; + if (!(p->bracket.hash[*s / 32] & (1 << (*s % 32)))) + { + mat = 0; + goto match; + } + mat = 1; + mlen = mblen (s, MB_CUR_MAX); + if (mlen < 0) + return FNM_NOMATCH; + chars = p->bracket.chars; + while (*chars) + { + mbuflen = strlen (chars); + if (!strncmp (chars, s, mlen) && mlen == mbuflen) + goto match; + chars += mbuflen + 1; + } + mat = 0; + } + match: + if (mat ^ (p->bracket.flags & BRACKET_INVMATCH)) + { + if (!mlen) + { + mlen = mblen (s, MB_CUR_MAX); + if (mlen < 0) + return FNM_NOMATCH; + } + if (p->bracket.flags & BRACKET_SLASH && *s == '/') + return FNM_NOMATCH; + s += mlen; + if (len - (str - s) < p->bracket.sizerest) + return FNM_NOMATCH; + } + else + { + return FNM_NOMATCH; + } + NEXTPATTERN (bracket); + } + break; + case PATTERN_begindot: + if (*s == '.') + return FNM_NOMATCH; + NEXTPATTERN (begindot); + break; + case PATTERN_fallback: + + buflen = strlen (p->fallback.prefix) + len + 1; + NOMEMTEST (buf = ALLOCALIMIT (buflen, 1000)); + strcpy (buf, p->fallback.prefix); + strcat (buf, s); +#ifndef FNMATCH_PREFIX + retval = fnmatch (p->fallback.ptr, buf, p->fallback.flags); + ALLOCAFREE (buf, buflen, 1000); + return retval; +#else + if (!(pat2 = malloc (sizeof (fallback_pattern)))) + { + ALLOCAFREE (buf, buflen, 1000); + NOMEMTEST (NULL); + } + memcpy (pat2, p, sizeof (fallback_pattern)); + if (!(pat2->fallback.prefix = strdup (buf))) + { + ALLOCAFREE (buf, buflen, 1000); + NOMEMTEST (NULL); + } + p = (patterns *) pat2; + ADDSTATE; + return FNM_NOMATCH; +#endif + ALLOCAFREE (buf, buflen, 1000); + + case PATTERN_fold: + buflen = 2 * len + 1; + NOMEMTEST (buf = (ALLOCALIMIT (buflen, 1000))); + strfold (str, buf); +#ifndef FNMATCH_PREFIX + retval = fnmatch_exec ((fnmatch_state *) p->fold.states, buf); + ALLOCAFREE (buf, 2 * len + 1, 1000); + return retval; +#else + if (!(pat2 = malloc (sizeof (fallback_pattern)))) + { + ALLOCAFREE (buf, buflen, 1000); + NOMEMTEST (NULL); + } + pat2->fold.tp = PATTERN_fold; + if (!(pat2->fold.states = + (void *) fold_fallback (fnmatch_prefix (p->fold.states, buf)))) + { + free (pat2); + ALLOCAFREE (buf, buflen, 1000); + NOMEMTEST (NULL); + } + p = (patterns *) pat2; + ADDSTATE; + ALLOCAFREE (buf, buflen, 1000); + return FNM_NOMATCH; +#endif + break; + case PATTERN_checkneeded: +#ifndef FNMATCH_PREFIX + { + int ret = fnmatch_exec ((fnmatch_state *) p->checkneeded.states, s); + if (ret > 0) + return FNM_NOMATCH; + if (ret < 0) + return ret; + return fnmatch_exec ((fnmatch_state *) p->checkneeded.fallback, s); + } +#else + NOMEMTEST (pat2 = malloc (sizeof (checkneeded_pattern))); + pat2->checkneeded.tp = PATTERN_checkneeded; + if (!(pat2->checkneeded.states = (void *) + fnmatch_prefix ((fnmatch_state *) p->checkneeded.states, s))) + { + free (pat2); + NOMEMTEST (NULL); + } + if (!(pat2->checkneeded.fallback = + (void *) fnmatch_prefix ((fnmatch_state *) p-> + checkneeded.fallback, s))) + { + fnmatch_free ((fnmatch_state *) pat2->checkneeded.states); + free (pat2); + NOMEMTEST (NULL); + } + ADDSTATE; + return FNM_NOMATCH; +#endif + break; + } + } + +err: +return -1; diff --git a/lib/fnmatchcomp_star.h b/lib/fnmatchcomp_star.h new file mode 100644 index 0000000..42a601d --- /dev/null +++ b/lib/fnmatchcomp_star.h @@ -0,0 +1,66 @@ + /* Copyright (C) 2009 + Ondrej Bilka < nel...@seznam.cz > + + This program is free software: you can redistribute it and/or modif y + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see < http:// www.gnu.org/licenses/ > . */ + +#ifndef FNMATCH_PREFIX +sizerest = p->star.sizerest; +chars = p->star.chars; +charno = p->star.charno; +NEXTPATTERN (star); +for (i = 0; i < charno; i++) + { + NEXTPATTERN (chr); + } + +switch (charno) /* we check charno character at once by converting to + coresponding integer type */ + { + case 0: + for (i = s - str; i < len - sizerest; i++) + { + if (!fnmatch2_internal (p, str + i, len - i)) + return 0; + SLASHTEST; + } + break; +#define CASEN(type, no) \ + case no:\ + for (i = s-str; i <= len-sizerest; i++) {\ + if (__builtin_expect((*((type*)(str+i)) == *(type *) chars), 0)\ + && !fnmatch2_internal(p, str+i+no, len-i-no)) return 0; \ + SLASHTEST \ + } \ + break; + CASEN (char, 1); + CASEN (UINT2, 2); + case 3: + CASEN (UINT4, 4); + } + +return FNM_NOMATCH; +#else +pat = p; +NEXTPATTERN (star); +for (i = s - str; i < len; i++) + { + fnmatch2_prefix_internal (p, str + i, len - i, ret); + SLASHTEST; + } + +p = pat; +ADDSTATE; +return FNM_NOMATCH; +#endif +break; diff --git a/modules/fnmatchcomp b/modules/fnmatchcomp new file mode 100644 index 0000000..c89b1e7 --- /dev/null +++ b/modules/fnmatchcomp @@ -0,0 +1,34 @@ +Description: + Allows you to compile fnmatch pattern. You can add to this pattern prefix + which will be added as prefix of any string you will match with this pattern + +Files: +lib/fnmatchcomp.c +lib/fnmatchcomp.h +lib/fnmatchcomp_loop.h +lib/fnmatchcomp_star.h + +Depends-on: +fnmatch +extensions +alloca +stdbool +wchar +wctype +memchr +memcmp +mbsrtowcs +mbsinit + +configure.ac: + +Makefile.am: + +Include: +<fnmatchcomp.h> + +License: +LGPLv2+ + +Maintainer: +Ondrej Bilka -- Incorrect time synchronization
compare() { echo $1 echo en_US.UTF-8 LANG=en_US.UTF-8 /usr/bin/time -f " user: %U" ./fnmatch $1 $2 /usr/bin/time -f " user: %U" ./fnmatch2 $1 $2 echo C LANG=C /usr/bin/time -f " user: %U" ./fnmatch $1 $2 /usr/bin/time -f " user: %U" ./fnmatch2 $1 $2 } compare *abc* 0 compare *[abc]bc* 0 compare *a[bcd]c* 0 compare *ab[bcd]* 0 compare *?abc* 0 compare *ab[a-z]* 0 echo "casefold" compare *abc* 16 compare *ab[a-z]* 16
all: gcc -Wall -DCOMPARE -O2 ftest.c fnmatchcomp.c -o fnmatch gcc -O2 ftest.c fnmatchcomp.c -o fnmatch2
#include <stdio.h> #include <string.h> #include <stdlib.h> #include <nl_types.h> #include <langinfo.h> #include <locale.h> #define _GNU_SOURCE #include <fnmatch.h> #include "fnmatchcomp.h" int fnmatch2(const char *ptr,const char *str,int flags){ return fnmatch_exec(fnmatch_compile(ptr,flags),str); } int main(int argc, char *argv[]) {int i; setlocale(LC_ALL, ""); char *ptr = argv[1]; fnmatch_state *x; int flags = atoi(argv[2]); fnmatch_state *p = fnmatch_compile(ptr, flags); int cnt = 0; char *fooptr="*foo*"; char *s1 = strdup("abcda"), *s2 =strdup("bbidaasxas"), *s3 = strdup("nabcasdxabccx"); for (i = 0; i < 3000000; i++) { s1[1]++; if (s1[1] > 126||s1[1] < 0)s1[1] = 1; s2[1]++; if (s2[1] > 126||s2[1] < 0)s2[1] = 1; s2[1]++; if (s3[1] > 126||s3[1] < 0)s3[1] = 1; #ifdef COMPARE cnt += fnmatch(ptr, s1, flags)+fnmatch(ptr, s2, flags)+fnmatch(ptr, s3, flags); #else cnt += fnmatch_exec(p, s1)+fnmatch_exec(p, s2)+fnmatch_exec(p, s3); #endif } #define TESTPREF(st,pat,str,fullstr,flag) if (fnmatch(pat,fullstr,flag)!=fnmatch_exec(st,str)) { printf ("partial %s =~ %s failed flags %i \n",pat,fullstr,flag);} for (i=0;i<32;i++){ fnmatch_state* s = fnmatch_compile(fooptr, i); fnmatch_state* home = fnmatch_prefix(s, "/home"); fnmatch_state* bar = fnmatch_prefix(s, "/bar"); fnmatch_state* baz = fnmatch_prefix(bar,"/baz"); TESTPREF(home,fooptr,"/foo","/home/foo",i); TESTPREF(bar,fooptr,"/foo","/home/bar/foo",i); TESTPREF(bar,fooptr,"/bar","/home/bar/bar",i); TESTPREF(baz,fooptr,"/blabla","/home/bar/baz/blabla",i); fnmatch_free(home); fnmatch_free(bar); fnmatch_free(s); fnmatch_free(baz); } #define TESTP(pat,str,flag) x=fnmatch_compile(pat,flag); if (fnmatch(pat,str,flag)!=fnmatch_exec(x,str)) { printf ("%s =~ %s failed flags %i \n",pat,str,flag);} fnmatch_free(x); for(i=0;i<32;i++){ TESTP("*/aa*","asdasd/aab",i); TESTP("*/[ab]a*","as/asd/aab",i); TESTP("*ads*",".asdvdf",i); TESTP("*/?as","asdasd/.asasd",i); TESTP("*/[a-z]as","asdasd/.asasd",i); TESTP("*/[a-z]a*","asdasd/aab",i); } /*m4 test of fnmatch*/ char const *Apat = 'A' < '\\' ? "[A-\\\\]" : "[\\\\-A]"; char const *apat = 'a' < '\\' ? "[a-\\\\]" : "[\\\\-a]"; static char const A_1[] = { 'A' - 1, 0 }; static char const A01[] = { 'A' + 1, 0 }; static char const a_1[] = { 'a' - 1, 0 }; static char const a01[] = { 'a' + 1, 0 }; static char const bs_1[] = { '\\' - 1, 0 }; static char const bs01[] = { '\\' + 1, 0 }; #define y TESTP #define n TESTP n ("a*", "", 0) y ("a*", "abc", 0) n ("d*/*1", "d/s/1", FNM_PATHNAME) y ("a\\bc", "abc", 0) n ("a\\bc", "abc", FNM_NOESCAPE) y ("*x", ".x", 0) n ("*x", ".x", FNM_PERIOD) y (Apat, "\\", 0) y (Apat, "A", 0) y (apat, "\\", 0) y (apat, "a", 0) n (Apat, A_1, 0) n (apat, a_1, 0) y (Apat, A01, 0) y (apat, a01, 0) y (Apat, bs_1, 0) y (apat, bs_1, 0) n (Apat, bs01, 0) n (apat, bs01, 0) y ("xxXX", "xXxX", FNM_CASEFOLD) y ("a++(x|yy)b", "a+xyyyyxb", FNM_EXTMATCH) n ("d*/*1", "d/s/1", FNM_FILE_NAME) y ("*", "x", FNM_FILE_NAME | FNM_LEADING_DIR) y ("x*", "x/y/z", FNM_FILE_NAME | FNM_LEADING_DIR) y ("*c*", "c/x", FNM_FILE_NAME | FNM_LEADING_DIR); fnmatch_free(p); return (0); }