Hello I finished writing this patch. I added test(and fnmatchcomp.m4 which I forgot in previous). I didnt change much, prettied and fixed bugs discovered by tests. Now it should be stable. I hope this patch looks OK.
Moreover I made two independent changes first is preallocating buffer in fnmatch which should speed multibyte strings by third. Second is adding compiled fnmatch in glob diff --git a/lib/fnmatch.c b/lib/fnmatch.c index 48bc8b5..6daf8b0 100644 --- a/lib/fnmatch.c +++ b/lib/fnmatch.c @@ -275,6 +348,10 @@ int fnmatch (const char *pattern, const char *string, int flags) { # if HANDLE_MULTIBYTE +#define ALLOCALIMIT(n) (__builtin_expect((n < 2000), 1) ? alloca(n): malloc(n)) +#define ALLOCAFREE(ptr, n) if (__builtin_expect((n >= 2000), 0)) free(ptr); +#define NOMEMTEST(exp) if (__builtin_expect(!(exp), 0)) {errno = ENOMEM; goto err; } +#define BE __builtin_expect # define ALLOCA_LIMIT 2000 if (__builtin_expect (MB_CUR_MAX, 1) != 1) { @@ -285,7 +362,35 @@ fnmatch (const char *pattern, const char *string, int flags) wchar_t *wpattern; wchar_t *wstring; int res; + char *s = string, *p = pattern; + size_t plen = 2 * strlen (p) + 2, slen = 2 * strlen (s) + 2; + NOMEMTEST (wpattern = + ALLOCALIMIT (sizeof (wchar_t) * (plen + slen + 1))); + memset (&ps, '\0', sizeof (ps)); + patsize = mbsrtowcs (wpattern, &p, plen, &ps); + if (BE (patsize == plen, 0)) + { + ALLOCAFREE (wpattern); + goto toolarge; + } + if (BE (patsize < 0, 0)) + return -1; + wstring = wpattern + patsize; + assert (mbsinit (&ps)); + strsize = mbsrtowcs (wstring, &s, slen, &ps); + if (BE (strsize < 0, 0)) + return -1; + if (BE (strsize == slen, 0)) + { + ALLOCAFREE (wpattern); + goto toolarge; + } + res = internal_fnwmatch (wpattern, wstring, wstring + strsize - 1, + flags & FNM_PERIOD, flags); + ALLOCAFREE (wpattern); + return res; + toolarge: /* Calculate the size needed to convert the strings to wide characters. */ memset (&ps, '\0', sizeof (ps)); @@ -339,6 +445,8 @@ fnmatch (const char *pattern, const char *string, int flags) return internal_fnmatch (pattern, string, string + strlen (string), flags & FNM_PERIOD, flags); +err: + return -1; } # ifdef _LIBC diff --git a/lib/glob.c b/lib/glob.c index 40cc9b3..1b61f67 100644 --- a/lib/glob.c +++ b/lib/glob.c @@ -152,7 +152,7 @@ #endif /* _LIBC */ #include <fnmatch.h> - +#include <fnmatchcomp.h> #ifdef _SC_GETPW_R_SIZE_MAX # define GETPW_R_SIZE_MAX() sysconf (_SC_GETPW_R_SIZE_MAX) #else @@ -1315,7 +1311,7 @@ glob_in_dir (const char *pattern, const char *directory, int flags, int meta; int save; int result; - + fnmatch_state *fnm_pattern = NULL; init_names.next = NULL; init_names.count = INITIAL_COUNT; @@ -1368,6 +1362,7 @@ glob_in_dir (const char *pattern, const char *directory, int flags, | FNM_CASEFOLD #endif ); + fnm_pattern = fnmatch_compile (pattern, fnm_flags); flags |= GLOB_MAGCHAR; while (1) @@ -1415,7 +1411,7 @@ glob_in_dir (const char *pattern, const char *directory, int flags, name = d->d_name; - if (fnmatch (pattern, name, fnm_flags) == 0) + if (fnmatch_exec (fnm_pattern, name) == 0) { /* If the file we found is a symlink we have to make sure the target file exists. */ @@ -1544,5 +1538,6 @@ glob_in_dir (const char *pattern, const char *directory, int flags, __set_errno (save); } + fnmatch_free (fnm_pattern); return result; } diff --git a/MODULES.html.sh b/MODULES.html.sh index 06afa2d..03d308e 100755 --- a/MODULES.html.sh +++ b/MODULES.html.sh @@ -61,6 +61,7 @@ fenv float fmtmsg fnmatch +fnmatchcomp ftw glob grp @@ -418,6 +419,7 @@ fmodf fmodl fmtmsg fnmatch +fnmatchcomp fopen fork fpathconf diff --git a/lib/fnmatchcomp.c b/lib/fnmatchcomp.c new file mode 100644 index 0000000..b374fec --- /dev/null +++ b/lib/fnmatchcomp.c @@ -0,0 +1,632 @@ + /* Copyright (C) 2009 + Ondrej Bilka < nel...@seznam.cz > + + This program is free software: you can redistribute it and/or modif y + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see < http:// www.gnu.org/licenses/ > . */ +#ifndef _LIBC +# include <config.h> +#endif + + /* Enable GNU extensions in fnmatch.h. */ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif +#include <fnmatch.h> +#include <stdint.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <nl_types.h> +#include <langinfo.h> +#include <locale.h> +#include <ctype.h> +#include <errno.h> +#include <fnmatchcomp.h> + +#if !defined __builtin_expect && __GNUC__ < 3 +# define __builtin_expect(expr, expected) (expr) +#endif + +#define WIDE_CHAR_SUPPORT \ + ((HAVE_MBLEN && HAVE_WCSTOMBS) || _LIBC) + + /* For platfor m which support the ISO C amendement 1 functionality we + support user defined character classes. */ +#if WIDE_CHAR_SUPPORT +# include <wctype.h> +# include <wchar.h> +#else +#define mblen(x, y) 1 +#define MB_CUR_MAX 1 +#endif + +#define UINT4 uint32_t +#define UINT2 uint16_t +#define CHARSPERINT (sizeof (UINT4)/sizeof(char)) +#define STREQ(a, b) (strcmp(a, b) == 0) +#define MAX_PATTERN_SIZE (sizeof (bracket_pattern)) +#define NOMEMTEST(exp) if (__builtin_expect(!(exp), 0)) {errno = ENOMEM; goto err; } +#define Calloc(x) calloc(1, x) +#define ALLOCALIMIT(n) (__builtin_expect((n < 2000), 1) ? alloca(n): malloc(n)) +#define ALLOCAFREE(ptr, n) if (__builtin_expect((n >= 2000), 0)) free(ptr); +typedef struct +{ + char tp; + char chr; +} chr_pattern; +typedef struct +{ /* We seek chars following *. This provides major speedup. + For some encodings we could be wrong because leading char was part of multibyte character. Then after successfull match call fnmatch to eliminate false positives */ + char tp; + char chars[CHARSPERINT]; + short sizerest; + short charno; +} star_pattern; +typedef struct +{ /*string must end here but cant continue*/ + char tp; +} end_pattern; +typedef struct +{ /* * at end of pattern. automatic success */ + char tp; +} endstar_pattern; +#define BRACKET_INVMATCH 1 +#define BRACKET_SLASH 2 +#define BRACKET_MUSTCHECK 4 +typedef struct +{ + char tp; + char flags; + char *chars; /* multibyte characters separated by 0 */ + int sizerest; + uint32_t hash[(UCHAR_MAX / 32) + 1]; + /* We look at first char of multibyte character. We dont have to call mblen if this fails. */ +} bracket_pattern; +typedef struct +{ /* if FNM_PERIOD was set this checks period after / If character is . it fails otherwise dont consume anything*/ + char tp; +} begindot_pattern; +typedef struct +{ /* if FNM_LEADING_DIR was set we go after / or 0 to endstar pattern which accept everything. */ + char tp; +} slashend_pattern; + +typedef struct +{ /* we must call fnmatch */ + char tp; + char *ptr; + char *prefix; + int flags; +} fallback_pattern; +typedef struct +{ /* we fold string and then call case sensitive version */ + char tp; + fnmatch_state *states; +} fold_pattern; +typedef struct +{ /* we call our version and if succeed we check by fnmatch fallback */ + char tp; + fnmatch_state *states; + fnmatch_state *fallback; +} checkneeded_pattern; + + /* like star but we dont go beyond / */ +typedef star_pattern starslash_pattern; +enum PATTERNS +{ + PATTERN_chr, + PATTERN_star, + PATTERN_starslash, + PATTERN_end, + PATTERN_endstar, + PATTERN_bracket, + PATTERN_begindot, + PATTERN_slashend, + PATTERN_fallback, + PATTERN_fold, + PATTERN_checkneeded +}; +union _patterns +{ + chr_pattern chr; + star_pattern star; + starslash_pattern starslash; + end_pattern end; + endstar_pattern endstar; + bracket_pattern bracket; + begindot_pattern begindot; + fallback_pattern fallback; + fold_pattern fold; + checkneeded_pattern checkneeded; +}; +typedef union _patterns patterns; +static void freepatterns (patterns * p); + +struct states +{ + patterns *p; + struct states *next; +}; +struct _fnmatch_state +{ + struct states *states; /* list of states of undrelying NFA. */ + int flags; + int *refcount; + patterns *start; +}; + +int +fnmatch_fallbacked (fnmatch_state * s) +{ + return (s->states->p->fallback.tp == PATTERN_fallback); +} + + +static void +strfold (const char *str, char *buf) +{ + if (MB_CUR_MAX == 1) + { + while (*str) + *buf++ = tolower (*str++); + *buf = 0; + } + else + { +#if WIDE_CHAR_SUPPORT + int i; + int len = 2 * strlen (str) + 1; + wchar_t *towc = ALLOCALIMIT (sizeof (wint_t) * (len)); + towc[len - 1] = L'\0'; + mbstowcs (towc, str, len); + if (towc[len - 1] != L'\0') + { + ALLOCAFREE (towc, sizeof (wint_t) * (len)); + len = mbstowcs (NULL, str, 0) + 1; + towc = ALLOCALIMIT (sizeof (wint_t) * len); + mbstowcs (towc, str, len); + } + for (i = 0; towc[i] != L'\0'; i++) + { + towc[i] = towlower (towc[i]); + } + wcstombs (buf, towc, len); + ALLOCAFREE (towc, sizeof (wint_t) * (len)); +#endif + } +} + +static fnmatch_state * +initfnmatchstate () +{ + fnmatch_state *st; + NOMEMTEST (st = (fnmatch_state *) Calloc (sizeof (fnmatch_state))); + NOMEMTEST (st->states = Calloc (sizeof (struct states))); + st->states->next = NULL; + NOMEMTEST (st->refcount = Calloc (sizeof (int))); + *st->refcount = 1; + return st; +err: + if (st) + { + free (st->states); + free (st->refcount); + } + free (st); + return NULL; +} + +#define FALLBACK_INIT(type) \ + fnmatch_state *st; \ + NOMEMTEST (st = initfnmatchstate ()); \ + NOMEMTEST (st->start = st->states->p = Calloc (sizeof (type##_pattern))); \ + st->states->p->fallback.tp = PATTERN_##type; + +static fnmatch_state * +fnmatch_fallback (const char *str, int flags) +{ + FALLBACK_INIT (fallback); + NOMEMTEST (st->states->p->fallback.ptr = strdup (str)); + st->states->p->fallback.flags = flags; + NOMEMTEST (st->states->p->fallback.prefix = strdup ("")); + return st; +err: + fnmatch_free (st); + return NULL; +} + +static fnmatch_state * +fold_fallback (fnmatch_state * s) +{ + FALLBACK_INIT (fold); + NOMEMTEST (s); + st->states->p->fold.states = s; + return st; +err: + fnmatch_free (s); + fnmatch_free (st); + return NULL; +} + +static fnmatch_state * +checkneeded_fallback (fnmatch_state * s, fnmatch_state * fb) +{ + FALLBACK_INIT (checkneeded); + NOMEMTEST (fb); + NOMEMTEST (s); + st->states->p->checkneeded.states = s; + st->states->p->checkneeded.fallback = fb; + return st; +err: + fnmatch_free (s); + fnmatch_free (fb); + fnmatch_free (st); + return NULL; +} + +static int +parse_bracket (const char *str, patterns * pat, int flags) +{ + unsigned char firstbyte; + char *chr; + int mlen; + const char *s = str; + int i; + pat->bracket.tp = PATTERN_bracket; + if (*s == '!' || (getenv ("POSIXLY_CORRECT") != NULL && *s == '^')) + { + pat->bracket.flags = BRACKET_INVMATCH; + s++; + } + if (flags & FNM_PATHNAME) + pat->bracket.flags |= BRACKET_SLASH; + if (!pat->bracket.chars) + { + if (!(pat->bracket.chars = Calloc (2 * strlen (s) + 2))) + return -3; + } + chr = pat->bracket.chars; + do + { + if (*s == 0 || *s == '-') + pat->bracket.flags |= BRACKET_MUSTCHECK | BRACKET_INVMATCH; + if (*s == '[' && (s[1] == '=' || s[1] == '.' || s[1] == ':')) + return -1; /* could be multichar sequence */ + mlen = mblen (s, MB_CUR_MAX); + if (mlen < 0) + return -3; + /* We dont have call mblen often because first character of multibyte + is often not allowed at other places. */ + firstbyte = (unsigned char) *s; + pat->bracket.hash[firstbyte / 32] |= 1 << (firstbyte % 32); + memcpy (chr, s, mlen); + s += mlen; + chr += mlen + 1; + } + while (*s && *s != ']'); + *chr = 0; + if (pat->bracket.flags & BRACKET_MUSTCHECK) + { /* bracket is too complicated to process here + we replace it with ? and when we match entire pattern + we call fnmatch it wasn't false positive */ + *pat->bracket.chars = 0; + for (i = 0; i < (UCHAR_MAX / 32) + 1; i++) + pat->bracket.hash[i] = 0; + } + return s - str; +} + +#define NEXTPATTERN(type) pat->chr.tp = PATTERN_##type; pat = (patterns *)(((void *) pat)+sizeof (type##_pattern)); +#define FALLBACK freepatterns(ret); free(ret); +#define BRACTEST(x) if ((x) == -3) {goto err; } +fnmatch_state * +fnmatch_compile (const char *str, int flags) +{ + const char *s; + patterns *pat, *ret = NULL; + int i, pass; + int size = 0, patsize; + int checkneeded = 0; + char *codeset = nl_langinfo (CODESET); + char *buf; + if (MB_CUR_MAX == 1 || STREQ (codeset, "UTF-8")) + { + /* encoding can be handled as singlebyte */ + } + else if (mblen (NULL, 0) == 0) + { /* stateless */ + checkneeded = 1; + } + else + { + return fnmatch_fallback (str, flags); + } + if (flags & FNM_CASEFOLD) + { + NOMEMTEST (buf = Calloc (2 * strlen (str) + 1)); + strfold (str, buf); + fnmatch_state *st = + fold_fallback (fnmatch_compile (buf, flags & (~FNM_CASEFOLD))); + free (buf); + NOMEMTEST (st); + return st; + } + ret = (patterns *) Calloc ((strlen (str) + 3) * 2 * MAX_PATTERN_SIZE); + + for (pass = 0; pass < 2; pass++) + { + /* two passes in first we compute values we use in second to optimize + */ + patsize = size; + size = 0; /* patsize-size is minimal size of rest of string */ + /* it makes loops tighter and we dont need to check if we went outsize string. */ + pat = ret; + if (flags & FNM_PERIOD && *str != '.') + { + NEXTPATTERN (begindot)} + for (s = str; *s;) + { + if (flags & FNM_EXTMATCH && *(s + 1) == '(' && + (*s == '*' || *s == '+' || *s == '@' || *s == '!')) + { + FALLBACK; + return fnmatch_fallback (str, flags); + } + switch (*s) + { + case '*': + while (*(s + 1) == '?') + { + /* by specif ication [!]] matches anything except ] */ + BRACTEST (parse_bracket ("!", pat, flags)); + size++; + pat->bracket.sizerest = patsize - size; + NEXTPATTERN (bracket); + s++; + } + if (*(s + 1) || (flags & FNM_PATHNAME)) + { + if (pass) + { + /* we will find ocurence of next characters fast. */ + patterns *tmppat = pat; + pat->star.sizerest = patsize - size; + NEXTPATTERN (star); + for (i = 0; pat->chr.tp == PATTERN_chr && pat->chr.chr + && i < CHARSPERINT; i++) + { + tmppat->star.chars[i] = pat->chr.chr; + NEXTPATTERN (chr); + } + if (i == 3) + i = 2; + tmppat->star.charno = i; + pat = tmppat; + } + if (flags & FNM_PATHNAME) + { + NEXTPATTERN (starslash); + } + else + { + NEXTPATTERN (star); + } + } + else + { + NEXTPATTERN (endstar); + } + s++; + break; + case '?': + BRACTEST (parse_bracket ("!", pat, flags)); + size++; + pat->bracket.sizerest = patsize - size; + NEXTPATTERN (bracket); + s++; + break; + case '[': + { + int siz = parse_bracket (s + 1, pat, flags); + BRACTEST (siz); + if (siz < 0) + { + FALLBACK; + return fnmatch_fallback (str, flags); + } + size++; + s += siz + 2; + pat->bracket.sizerest = patsize - size; + if (pat->bracket.flags & BRACKET_MUSTCHECK) + { + checkneeded = 1; + } + NEXTPATTERN (bracket); + } + break; + default: + if (*s == '\\' && (!(flags & FNM_NOESCAPE))) + s++; + { + int nolp = (*s == '/' && (flags & FNM_PERIOD) + && (flags & FNM_PATHNAME) + && *(s + 1) != '.'), mlen = mblen (s, MB_CUR_MAX); + if (mlen < 0) + goto err; + for (i = 0; i < mlen; i++, s++) + { + pat->chr.chr = *s; + size++; + NEXTPATTERN (chr); + } + if (nolp) + { + NEXTPATTERN (begindot)} + } + break; + } + } + } + if (flags & FNM_LEADING_DIR) + { + NEXTPATTERN (slashend); + NEXTPATTERN (endstar); + } + else + { + NEXTPATTERN (end); + } + + fnmatch_state *st; + NOMEMTEST (st = initfnmatchstate ()); + st->states->p = ret; + st->start = ret; + if (checkneeded) + st = checkneeded_fallback (st, fnmatch_fallback (str, flags)); + return st; +err: + freepatterns (ret); + return NULL; +} + +#undef NEXTPATTERN +#define NEXTPATTERN(type) p = (patterns *)(((void *) p)+sizeof (type##_pattern)); + +static inline int +fnmatch2_internal (const patterns * p, const char *str, int len) +{ +#include "fnmatchcomp_loop.h" +} + +int +fnmatch_exec (const fnmatch_state * p, const char *str) +{ + const struct states *s; + int len = strlen (str); + for (s = p->states; s; s = s->next) + { + int ret = fnmatch2_internal (s->p, str, len); + if (ret <= 0) + return ret; + } + return FNM_NOMATCH; +} + +#define FNMATCH_PREFIX +static int +fnmatch2_prefix_internal (patterns * p, const char *str, int len, + fnmatch_state * ret) +{ +#include "fnmatchcomp_loop.h" +} + +fnmatch_state * +fnmatch_prefix (const fnmatch_state * p, const char *str) +{ + struct states *s; + int len = strlen (str); + fnmatch_state *ret; + NOMEMTEST (ret = (fnmatch_state *) Calloc (sizeof (fnmatch_state))); + memcpy ((void *) ret, (void *) p, sizeof (fnmatch_state)); + ret->states = NULL; + (*ret->refcount)++; + for (s = p->states; s; s = s->next) + { + fnmatch2_prefix_internal (s->p, str, len, ret); + } + return ret; +err: + return NULL; +} + +static void +freepatterns (patterns * p) +{ + if (!p) + return; +#define SKIPPATTERN(type) case PATTERN_##type: NEXTPATTERN(type); break; + while (p->chr.tp != PATTERN_end && p->chr.tp != PATTERN_endstar) + { + switch (p->chr.tp) + { + case PATTERN_chr: + if (!p->chr.chr) + return; + NEXTPATTERN (chr); + break; + SKIPPATTERN (star); + SKIPPATTERN (starslash); + SKIPPATTERN (end); + SKIPPATTERN (endstar); + case PATTERN_bracket: + free (p->bracket.chars); + NEXTPATTERN (bracket); + break; + SKIPPATTERN (begindot); + SKIPPATTERN (slashend); + case PATTERN_fallback: + free (p->fallback.ptr); + return; + case PATTERN_fold: + return; + case PATTERN_checkneeded: + return; + } + } + +} + +void +fnmatch_free (fnmatch_state * s) +{ /* fnmatch_free must process incomplete fnmatch_state too */ + if (!s) + return; + struct states *st, *st2; + for (st = s->states; st;) + { + if (st->p) + { + switch (st->p->chr.tp) + { + case PATTERN_fallback: + free (st->p->fallback.prefix); + if (s->start != st->p) + free (st->p); + break; + case PATTERN_fold: + fnmatch_free (st->p->fold.states); + if (s->start != st->p) + free (st->p); + break; + case PATTERN_checkneeded: + fnmatch_free (st->p->checkneeded.states); + fnmatch_free (st->p->checkneeded.fallback); + if (s->start != st->p) + free (st->p); + break; + } + } + st2 = st->next; + free (st); + st = st2; + } + (*s->refcount)--; + if (!*s->refcount) + { + free (s->refcount); + freepatterns (s->start); + free (s->start); + } + free (s); +} diff --git a/lib/fnmatchcomp.h b/lib/fnmatchcomp.h new file mode 100644 index 0000000..356fbf8 --- /dev/null +++ b/lib/fnmatchcomp.h @@ -0,0 +1,41 @@ + /* Copyright (C) 2009 + Ondrej Bilka < nel...@seznam.cz > + + This program is free software: you can redistribute it and/or modif y + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see < http:// www.gnu.org/licenses/ > . */ + +struct _fnmatch_state; +typedef struct _fnmatch_state fnmatch_state; + + /* compile pattern */ +fnmatch_state *fnmatch_compile (const char *pattern, int flags); + + /* add prefix to matched strings. + suppose you want match foo againist /home/foo, /home/bar/foo, + /home/bar/bar + fnmatch_state* s = fnmatch_compile("foo", 0); + fnmatch_state* home = fnmatch_prefix(s, "/home"); + fnmatch_state* bar = fnmatch_prefix(home, "/bar"); + fnmatch_exec(home, "/foo"); + fnmatch_exec(bar, "/foo"); + fnmatch_exec(bar, "/bar"); + */ +fnmatch_state *fnmatch_prefix (const fnmatch_state * pattern, + const char *prefix); + +int fnmatch_exec (const fnmatch_state * pattern, const char *string); + +void fnmatch_free (fnmatch_state * pattern); + + /* For some encodings we decide just call fnmatch. In that case s you can gain some performance by checking fnmatch_fallbacked and if returns 1 call fnmatch yourself */ +int fnmatch_fallbacked (fnmatch_state * pattern); diff --git a/lib/fnmatchcomp_loop.h b/lib/fnmatchcomp_loop.h new file mode 100644 index 0000000..f722ffb --- /dev/null +++ b/lib/fnmatchcomp_loop.h @@ -0,0 +1,234 @@ + /* Copyright (C) 2009 + Ondrej Bilka < nel...@seznam.cz > + + This program is free software: you can redistribute it and/or modif y + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see < http:// www.gnu.org/licenses/ > . */ + +const char *s = str, *chars; +int i, buflen; +char *buf; +while (1) + { +#ifndef FNMATCH_PREFIX + int retval; + int sizerest, charno; +#undef ADDSTATE +#define ADDSTATE +#else + int statetwice; + patterns *pat, *pat2; + struct states *state; +#undef ADDSTATE +#define ADDSTATE \ + statetwice = 0; \ + for (state = ret->states; state; state = state->next)\ + if (state->p == p) statetwice = 1; \ + if (!statetwice) {\ + state = calloc(1, sizeof (struct states)); \ + state->next = ret->states; \ + ret->states = state; \ + state->p = p; \ + } + if (!*s) + { + ADDSTATE; + return 0; + } +#endif + switch (p->chr.tp) + { + case PATTERN_chr: + if (__builtin_expect ((*s != p->chr.chr), 1)) + return FNM_NOMATCH; + NEXTPATTERN (chr); + s++; + break; + case PATTERN_starslash: +#undef SLASHTEST +#define SLASHTEST if (str[i] == '/') return FNM_NOMATCH; +#include "fnmatchcomp_star.h" + break; + case PATTERN_star: +#undef SLASHTEST +#define SLASHTEST +#include "fnmatchcomp_star.h" + break; + + case PATTERN_end: + return (*s) ? FNM_NOMATCH : 0; + break; + case PATTERN_slashend: + if (*s == '/' || *s == 0) + { + NEXTPATTERN (slashend); + } + else + return FNM_NOMATCH; + break; + case PATTERN_endstar: + ADDSTATE; + return 0; + break; + case PATTERN_bracket: + { + int mat; + int mlen, mbuflen; + int firstbyte = (unsigned char) *s; + chars = p->bracket.chars; + if (MB_CUR_MAX == 1) + { + mlen = 1; + mat = (p->bracket.hash[firstbyte / 32] & + (1 << (firstbyte % 32))) ? 1 : 0; + } + else + { + mlen = 0; + mat = 1; + if (! + (p->bracket.hash[firstbyte / 32] & (1 << (firstbyte % 32)))) + { + mat = 0; + goto match; + } + mlen = mblen (s, MB_CUR_MAX); + if (mlen < 0) + return FNM_NOMATCH; + + while (*chars) + { + mbuflen = strlen (chars); + if (!strncmp (chars, s, mlen) && mlen == mbuflen) + goto match; + chars += mbuflen + 1; + } + mat = 0; + } + match: + if (mat ^ (p->bracket.flags & BRACKET_INVMATCH)) + { + if (p->bracket.flags & BRACKET_SLASH && *s == '/') + return FNM_NOMATCH; + if (!mlen) + mlen = mblen (s, MB_CUR_MAX); + if (mlen < 0) + return FNM_NOMATCH; + + s += mlen; + if (len - (str - s) < p->bracket.sizerest) + return FNM_NOMATCH; + } + else + { + return FNM_NOMATCH; + } + NEXTPATTERN (bracket); + } + break; + case PATTERN_begindot: + if (*s == '.') + return FNM_NOMATCH; + NEXTPATTERN (begindot); + break; + case PATTERN_fallback: + + buflen = strlen (p->fallback.prefix) + len + 1; + NOMEMTEST (buf = ALLOCALIMIT (buflen)); + strcpy (buf, p->fallback.prefix); + strcat (buf, s); +#ifndef FNMATCH_PREFIX + retval = fnmatch (p->fallback.ptr, buf, p->fallback.flags); + ALLOCAFREE (buf, buflen); + return retval; +#else + if (!(pat2 = malloc (sizeof (fallback_pattern)))) + { + ALLOCAFREE (buf, buflen); + NOMEMTEST (NULL); + } + memcpy (pat2, p, sizeof (fallback_pattern)); + if (!(pat2->fallback.prefix = strdup (buf))) + { + ALLOCAFREE (buf, buflen); + NOMEMTEST (NULL); + } + p = (patterns *) pat2; + ADDSTATE; + return FNM_NOMATCH; +#endif + ALLOCAFREE (buf, buflen); + + case PATTERN_fold: + buflen = 2 * len + 1; + NOMEMTEST (buf = (ALLOCALIMIT (buflen))); + strfold (str, buf); +#ifndef FNMATCH_PREFIX + retval = fnmatch_exec (p->fold.states, buf); + ALLOCAFREE (buf, 2 * len + 1); + return retval; +#else + if (!(pat2 = malloc (sizeof (fallback_pattern)))) + { + ALLOCAFREE (buf, buflen); + NOMEMTEST (NULL); + } + pat2->fold.tp = PATTERN_fold; + if (!(pat2->fold.states = + (void *) fold_fallback (fnmatch_prefix (p->fold.states, buf)))) + { + free (pat2); + ALLOCAFREE (buf, buflen); + NOMEMTEST (NULL); + } + p = (patterns *) pat2; + ADDSTATE; + ALLOCAFREE (buf, buflen); + return FNM_NOMATCH; +#endif + break; + case PATTERN_checkneeded: +#ifndef FNMATCH_PREFIX + { + int ret = fnmatch_exec (p->checkneeded.states, s); + if (ret > 0) + return FNM_NOMATCH; + if (ret < 0) + return ret; + return fnmatch_exec (p->checkneeded.fallback, s); + } +#else + NOMEMTEST (pat2 = malloc (sizeof (checkneeded_pattern))); + pat2->checkneeded.tp = PATTERN_checkneeded; + if (!(pat2->checkneeded.states = + fnmatch_prefix (p->checkneeded.states, s))) + { + free (pat2); + NOMEMTEST (NULL); + } + if (!(pat2->checkneeded.fallback = + fnmatch_prefix (p->checkneeded.fallback, s))) + { + fnmatch_free (pat2->checkneeded.states); + free (pat2); + NOMEMTEST (NULL); + } + p = pat2; + ADDSTATE; + return FNM_NOMATCH; +#endif + break; + } + } + +err: +return -1; diff --git a/lib/fnmatchcomp_star.h b/lib/fnmatchcomp_star.h new file mode 100644 index 0000000..f542e3d --- /dev/null +++ b/lib/fnmatchcomp_star.h @@ -0,0 +1,65 @@ + /* Copyright (C) 2009 + Ondrej Bilka < nel...@seznam.cz > + + This program is free software: you can redistribute it and/or modif y + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see < http:// www.gnu.org/licenses/ > . */ + +#ifndef FNMATCH_PREFIX +sizerest = p->star.sizerest; +chars = p->star.chars; +charno = p->star.charno; +NEXTPATTERN (star); +for (i = 0; i < charno; i++) + { + NEXTPATTERN (chr); + } + +switch (charno) + { + case 0: + for (i = s - str; i <= len - sizerest; i++) + { + if (!fnmatch2_internal (p, str + i, len - i)) + return 0; + SLASHTEST; + } + break; +#define CASEN(type, no) \ + case no:\ + for (i = s-str; i <= len-sizerest; i++) {\ + if (__builtin_expect((*((type*)(str+i)) == *(type *) chars), 0)\ + && !fnmatch2_internal(p, str+i+no, len-i-no)) return 0; \ + SLASHTEST \ + } \ + break; + CASEN (char, 1); + CASEN (UINT2, 2); + case 3: + CASEN (UINT4, 4); + } + +return FNM_NOMATCH; +#else +pat = p; +NEXTPATTERN (star); +for (i = s - str; i < len; i++) + { + fnmatch2_prefix_internal (p, str + i, len - i, ret); + SLASHTEST; + } + +p = pat; +ADDSTATE; +return FNM_NOMATCH; +#endif +break; diff --git a/m4/fnmatchcomp.m4 b/m4/fnmatchcomp.m4 new file mode 100644 index 0000000..c2d3812 --- /dev/null +++ b/m4/fnmatchcomp.m4 @@ -0,0 +1,25 @@ +# fnmatchcomp.m4 serial 10 +dnl Copyright (C) 2005-2007 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + + +AC_DEFUN([gl_FNMATCHCOMP], +[ + gl_PREREQ_FNMATCHCOMP + + FNMATCHCOMP_H=fnmatchcomp.h + AC_LIBOBJ([fnmatchcomp]) + AC_SUBST([FNMATCHCOMP_H]) +]) + + + +# Prerequisites of lib/fnmatchcomp.*. +AC_DEFUN([gl_PREREQ_FNMATCHCOMP], +[ + AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS])dnl + AC_CHECK_FUNCS_ONCE([mblen wcstombs]) + AC_CHECK_HEADERS_ONCE([stdlib.h wctype.h]) +]) diff --git a/modules/fnmatchcomp b/modules/fnmatchcomp new file mode 100644 index 0000000..5aaf80a --- /dev/null +++ b/modules/fnmatchcomp @@ -0,0 +1,37 @@ +Description: + Allows you to compile fnmatch pattern. You can add to this pattern prefix + which will be added as prefix of any string you will match with this pattern + +Files: +lib/fnmatchcomp.c +lib/fnmatchcomp.h +lib/fnmatchcomp_loop.h +lib/fnmatchcomp_star.h +m4/fnmatchcomp.m4 + +Depends-on: +fnmatch +extensions +alloca +stdbool +wchar +wctype +memchr +memcmp +mbsrtowcs +mbsinit + +configure.ac: +gl_FNMATCHCOMP + +Makefile.am: +BUILT_SOURCES += $(FNMATCHCOMP_H) + +Include: +<fnmatchcomp.h> + +License: +LGPLv2+ + +Maintainer: +Ondrej Bilka diff --git a/modules/fnmatchcomp-tests b/modules/fnmatchcomp-tests new file mode 100644 index 0000000..2219093 --- /dev/null +++ b/modules/fnmatchcomp-tests @@ -0,0 +1,11 @@ +Files: +tests/test-fnmatchcomp.c + +Depends-on: +fnmatchcomp + +configure.ac: + +Makefile.am: +TESTS += test-fnmatchcomp +check_PROGRAMS += test-fnmatchcomp diff --git a/modules/glob b/modules/glob index b765be7..ac07549 100644 --- a/modules/glob +++ b/modules/glob @@ -22,6 +22,7 @@ strdup sys_stat unistd malloc-posix +fnmatchcomp configure.ac: gl_GLOB diff --git a/tests/test-fnmatchcomp.c b/tests/test-fnmatchcomp.c new file mode 100644 index 0000000..5579924 --- /dev/null +++ b/tests/test-fnmatchcomp.c @@ -0,0 +1,132 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <nl_types.h> +#include <langinfo.h> +#include <locale.h> +#define _GNU_SOURCE +#include <fnmatch.h> +#include <fnmatchcomp.h> +int +fnmatch2 (const char *ptr, const char *str, int flags) +{ + return fnmatch_exec (fnmatch_compile (ptr, flags), str); +} + +int +main (int argc, char *argv[]) +{ + int i; + fnmatch_state *x; + int l; + for (l = 0; l < 3; l++) + { + if (l == 0) + setlocale (LC_ALL, "C"); + if (l == 1) + setlocale (LC_ALL, "en_US.UTF-8"); + if (l == 2) + setlocale (LC_ALL, "zh_TW.BIG5"); + + int cnt = 0; + char *fooptr[20]; +#define TESTPREF(st, pat, str, fullstr, flag) if (fnmatch(pat, fullstr, flag) != fnmatch_exec(st, str)) { printf ("partial %s = ~ %s failed flags %i \n", pat, fullstr, flag); } + int j; + fooptr[0] = "*?abc*"; + + fooptr[1] = "*a[a-z]*"; + + fooptr[2] = "*a[[:alnum:], ]*"; + fooptr[3] = "*foo*"; + fooptr[4] = "*[xf]*"; + fooptr[5] = "*abcd*"; + fooptr[6] = "*a[[ = alnum = ], ]*"; + fooptr[7] = " /* o*"; + + for (j = 0; j < 8; j++) + { + + for (i = 0; i < 32; i++) + { + fnmatch_state *s = fnmatch_compile (fooptr[j], i); + fnmatch_state *home = fnmatch_prefix (s, "/home"); + fnmatch_state *bar = fnmatch_prefix (home, "/bar"); + fnmatch_state *baz = fnmatch_prefix (bar, "/baz"); + TESTPREF (home, fooptr[j], "/foo", "/home/foo", i); + TESTPREF (bar, fooptr[j], "/foo", "/home/bar/foo", i); + TESTPREF (bar, fooptr[j], "/bar", "/home/bar/bar", i); + TESTPREF (baz, fooptr[j], "/blabla", "/home/bar/baz/blabla", i); + fnmatch_free (home); + fnmatch_free (bar); + fnmatch_free (s); + fnmatch_free (baz); + } + } + int k; + for (k = 0; k < 3; k++) + { +#define TESTP(pat, str, flag) x = fnmatch_compile(pat, flag); if (fnmatch(pat, str, flag) != fnmatch_exec(x, str)) { printf ("%s = ~ %s failed flags %i \n", pat, str, flag); } fnmatch_free(x); + + for (i = 0; i < 32; i++) + { + TESTP (" */ aa*", "asdasd/aab", i); + TESTP (" */ [ab]a*", "as/asd/aab", i); + TESTP ("*ads*", ".asdvdf", i); + TESTP (" */ ?as", "asdasd/.asasd", i); + TESTP (" */ [a-z]as", "asdasd/.asasd", i); + TESTP (" */ [a-z]a*", "asdasd/aab", i); + } + /* m4 test of fnmatch */ + char const *Apat = 'A' < '\\' ? "[A-\\\\]" : "[\\\\-A]"; + char const *apat = 'a' < '\\' ? "[a-\\\\]" : "[\\\\-a]"; + static char const A_1[] = { 'A' - 1, 0 }; + static char const A01[] = { 'A' + 1, 0 }; + static char const a_1[] = { 'a' - 1, 0 }; + static char const a01[] = { 'a' + 1, 0 }; + static char const bs_1[] = { '\\' - 1, 0 }; + static char const bs01[] = { '\\' + 1, 0 }; +#define y TESTP +#define n TESTP + n ("a*", "", 0) + y ("a*", "abc", 0) + n ("d* /* 1", "d/s/1", FNM_PATHNAME) + y ("a\\bc", "abc", 0) + n ("a\\bc", "abc", FNM_NOESCAPE) + y ("*x", ".x", 0) + n ("*x", ".x", FNM_PERIOD) + y (Apat, "\\", 0) + y (Apat, "A", 0) + y (apat, "\\", 0) + y (apat, "a", 0) + n (Apat, A_1, 0) + n (apat, a_1, 0) + y (Apat, A01, 0) + y (apat, a01, 0) + y (Apat, bs_1, 0) + y (apat, bs_1, 0) + n (Apat, bs01, 0) + n (apat, bs01, 0) + y ("xxXX", "xXxX", FNM_CASEFOLD) + y ("a++(x|yy)b", "a+xyyyyxb", FNM_EXTMATCH) + n ("d* /* 1", "d/s/1", FNM_FILE_NAME) + y ("*", "x", FNM_FILE_NAME | FNM_LEADING_DIR) + y ("x*", "x/y/z", FNM_FILE_NAME | FNM_LEADING_DIR) + y ("*c*", "c/x", FNM_FILE_NAME | FNM_LEADING_DIR); + } + } + setlocale (LC_ALL, "zh_TW.BIG5"); + char tst[] = { 0xa5, 'a', 'b', 'c', 0 }; + char ptr1[] = { '*', 0xa5, 'a', '*', 0 }; + char ptr2[] = { '*', '[', 0xa5, 'b', ']', '*', 0 }; + for (i = 0; i < 32; i++) + { + TESTP ("*a*", tst, i); + TESTP ("*b*", tst, i); + TESTP ("*c*", tst, i); + TESTP (ptr1, tst, i); + TESTP (ptr2, tst, i); + } + + + return (0); +}