Changeset: 28c70933daa5 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=28c70933daa5 Added Files: sql/test/Tests/hot_snapshot_lz4_lite.py sql/test/Tests/hot_snapshot_lz4_lite.stable.err sql/test/Tests/hot_snapshot_lz4_lite.stable.out Modified Files: monetdb5/mal/Tests/tst201.malC monetdb5/modules/mal/pcre.c sql/test/Tests/All sql/test/Tests/hot_snapshot.py Branch: default Log Message:
merged with jun diffs (truncated from 741 to 300 lines): diff --git a/monetdb5/mal/Tests/tst201.malC b/monetdb5/mal/Tests/tst201.malC --- a/monetdb5/mal/Tests/tst201.malC +++ b/monetdb5/mal/Tests/tst201.malC @@ -61,14 +61,14 @@ exit MALException; v45:= algebra.slice(b,v43,v44); v46:= io.print(v45); - v75:= algebra.likeselect(b, nil:bat[:oid], "ox", nil:str, false, false); + v75:= algebra.likeselect(b, nil:bat[:oid], "ox", "", false, false); v76:= io.print(v75); - v78:= algebra.likeselect(b, nil:bat[:oid], "fo", nil:str, false, false); + v78:= algebra.likeselect(b, nil:bat[:oid], "fo", "", false, false); v79:= io.print(v78); - v81:= algebra.likeselect(b, nil:bat[:oid], "fox", nil:str, false, false); + v81:= algebra.likeselect(b, nil:bat[:oid], "fox", "", false, false); v82:= io.print(v81); - v84:= algebra.likeselect(b, nil:bat[:oid], " fox", nil:str, false, false); + v84:= algebra.likeselect(b, nil:bat[:oid], " fox", "", false, false); v85:= io.print(v84); diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c --- a/monetdb5/modules/mal/pcre.c +++ b/monetdb5/modules/mal/pcre.c @@ -87,13 +87,14 @@ mal_export str ILIKEjoin(bat *r1, bat *r mal_export str ILIKEjoin1(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate); /* current implementation assumes simple %keyword% [keyw%]* */ -typedef struct RE { +struct RE { char *k; uint32_t *w; - bool search; + bool search:1, + atend:1; size_t len; struct RE *n; -} RE; +}; /* We cannot use strcasecmp and strncasecmp since they work byte for * byte and don't deal with multibyte encodings (such as UTF-8). @@ -235,32 +236,32 @@ myucslen(const uint32_t *ucs) return i; } -static inline int -mywstrncasecmp(const char *restrict s1, const uint32_t *restrict s2, size_t n2) +static inline bool +mywstrncaseeq(const char *restrict s1, const uint32_t *restrict s2, size_t n2, bool atend) { uint32_t c1; while (n2 > 0) { size_t nn1 = utfc8touc(&c1, s1); if (nn1 == 0 || nn1 == (size_t) -1) - return -(*s2 != 0); + return (*s2 == 0); if (*s2 == 0) - return 1; + return false; if (nn1 == (size_t) -1 || nn1 == (size_t) -2) - return 0; /* actually an error that shouldn't happen */ + return true; /* actually an error that shouldn't happen */ #if SIZEOF_WCHAR_T == 2 if (c1 > 0xFFFF || *s2 > 0xFFFF) { if (c1 != *s2) - return c1 - *s2; + return false; } else #endif if (towlower((wint_t) c1) != towlower((wint_t) *s2)) - return towlower((wint_t) c1) - towlower((wint_t) *s2); + return false; s1 += nn1; n2--; s2++; } - return 0; + return !atend || *s1 == 0; } static inline int @@ -318,12 +319,12 @@ mywstrcasecmp(const char *restrict s1, c } static inline const char * -mywstrcasestr(const char *restrict haystack, const uint32_t *restrict wneedle) +mywstrcasestr(const char *restrict haystack, const uint32_t *restrict wneedle, bool atend) { size_t nlen = myucslen(wneedle); if (nlen == 0) - return haystack; + return atend ? haystack + strlen(haystack) : haystack; size_t hlen = strlen(haystack); @@ -349,7 +350,7 @@ mywstrcasestr(const char *restrict hayst break; h += j; } - if (i == nlen) + if (i == nlen && (!atend || haystack[h] == 0)) return haystack; haystack += step; hlen -= step; @@ -364,31 +365,44 @@ static bool re_simple(const char *pat, unsigned char esc) { bool escaped = false; - bool percatend = false; if (pat == 0) - return 0; + return false; if (*pat == '%') { - percatend = true; pat++; } while (*pat) { - percatend = false; if (escaped) { escaped = false; } else if ((unsigned char) *pat == esc) { escaped = true; } else if (*pat == '_') { - return 0; - } else if (*pat == '%') { - percatend = true; + return false; } pat++; } - return percatend; + return true; } static bool +re_is_pattern_properly_escaped(const char *pat, unsigned char esc) +{ + bool escaped = false; + + if (pat == 0) + return true; + while (*pat) { + if (escaped) { + escaped = false; + } else if ((unsigned char) *pat == esc) { + escaped = true; + } + pat++; + } + return escaped ? false : true; +} + +static inline bool is_strcmpable(const char *pat, const char *esc) { if (pat[strcspn(pat, "%_")]) @@ -397,15 +411,17 @@ is_strcmpable(const char *pat, const cha } static inline bool -re_match_ignore(const char *s, RE *pattern) +re_match_ignore(const char *restrict s, const struct RE *restrict pattern) { - RE *r; + const struct RE *r; for (r = pattern; r; r = r->n) { if (*r->w == 0 && (r->search || *s == 0)) return true; if (!*s || - (r->search ? (s = mywstrcasestr(s, r->w)) == NULL : mywstrncasecmp(s, r->w, r->len) != 0)) + (r->search + ? (s = mywstrcasestr(s, r->w, r->atend)) == NULL + : !mywstrncaseeq(s, r->w, r->len, r->atend))) return false; s += r->len; } @@ -413,15 +429,22 @@ re_match_ignore(const char *s, RE *patte } static inline bool -re_match_no_ignore(const char *s, RE *pattern) +re_match_no_ignore(const char *restrict s, const struct RE *restrict pattern) { - RE *r; + const struct RE *r; + size_t l; for (r = pattern; r; r = r->n) { if (*r->k == 0 && (r->search || *s == 0)) return true; if (!*s || - (r->search ? (s = strstr(s, r->k)) == NULL : strncmp(s, r->k, r->len) != 0)) + (r->search + ? (r->atend + ? (l = strlen(s)) < r->len || strcmp(s + l - r->len, r->k) != 0 + : (s = strstr(s, r->k)) == NULL) + : (r->atend + ? strcmp(s, r->k) != 0 + : strncmp(s, r->k, r->len) != 0))) return false; s += r->len; } @@ -429,13 +452,13 @@ re_match_no_ignore(const char *s, RE *pa } static void -re_destroy(RE *p) +re_destroy(struct RE *p) { if (p) { GDKfree(p->k); GDKfree(p->w); do { - RE *n = p->n; + struct RE *n = p->n; GDKfree(p); p = n; @@ -448,15 +471,15 @@ re_destroy(RE *p) * fields in the first structure are allocated, whereas in all * subsequent structures the fields point into the allocated buffer of * the first. */ -static RE * +static struct RE * re_create(const char *pat, bool caseignore, uint32_t esc) { - RE *r = (RE*)GDKmalloc(sizeof(RE)), *n = r; + struct RE *r = GDKmalloc(sizeof(struct RE)), *n = r; bool escaped = false; if (r == NULL) return NULL; - *r = (struct RE) {.search = false}; + *r = (struct RE) {.atend = true}; while (esc != '%' && *pat == '%') { pat++; /* skip % */ @@ -475,25 +498,28 @@ re_create(const char *pat, bool caseigno while (*wp) { if (escaped) { *wq++ = *wp; + n->len++; escaped = false; } else if (*wp == esc) { escaped = true; } else if (*wp == '%') { - n->len = (size_t) (wq - n->w); + n->atend = false; while (wp[1] == '%') wp++; if (wp[1]) { - n = n->n = GDKmalloc(sizeof(RE)); + n = n->n = GDKmalloc(sizeof(struct RE)); if (n == NULL) goto bailout; - *n = (struct RE) {.search = true, .w = wp + 1}; + *n = (struct RE) {.search = true, .atend = true, .w = wp + 1}; } *wq++ = 0; } else { *wq++ = *wp; + n->len++; } wp++; } + *wq = 0; } else { char *p, *q; if ((p = GDKstrdup(pat)) == NULL) { @@ -505,25 +531,28 @@ re_create(const char *pat, bool caseigno while (*p) { if (escaped) { *q++ = *p; + n->len++; escaped = false; } else if ((unsigned char) *p == esc) { escaped = true; } else if (*p == '%') { - n->len = (size_t) (q - n->k); + n->atend = false; while (p[1] == '%') p++; if (p[1]) { - n = n->n = GDKmalloc(sizeof(RE)); + n = n->n = GDKmalloc(sizeof(struct RE)); if (n == NULL) goto bailout; - *n = (struct RE) {.search = true, .k = p + 1}; + *n = (struct RE) {.search = true, .atend = true, .k = p + 1}; } *q++ = 0; } else { *q++ = *p; + n->len++; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list