Changeset: e0c37210f4b1 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e0c37210f4b1 Modified Files: sql/test/BugTracker-2020/Tests/All Branch: mtest Log Message:
merged with default diffs (truncated from 639 to 300 lines): diff --git a/clients/Tests/MAL-signatures.stable.out b/clients/Tests/MAL-signatures.stable.out --- a/clients/Tests/MAL-signatures.stable.out +++ b/clients/Tests/MAL-signatures.stable.out @@ -6638,6 +6638,7 @@ stdout of test 'MAL-signatures` in direc [ "batstr", "unicodeAt", "pattern batstr.unicodeAt(X_1:str, X_2:bat[:int], X_3:bat[:oid]):bat[:int] ", "STRbatWChrAt_strcst;", "" ] [ "batstr", "unicodeAt", "pattern batstr.unicodeAt(X_1:bat[:str], X_2:int):bat[:int] ", "STRbatWChrAtcst;", "" ] [ "batstr", "unicodeAt", "pattern batstr.unicodeAt(X_1:bat[:str], X_2:int, X_3:bat[:oid]):bat[:int] ", "STRbatWChrAtcst;", "" ] +[ "battxtsim", "similarity", "command battxtsim.similarity(X_1:bat[:str], X_2:bat[:str]):bat[:dbl] ", "fstrcmp0_impl_bulk;", "" ] [ "batudf", "fuse", "command batudf.fuse(X_1:bat[:bte], X_2:bat[:bte]):bat[:sht] ", "UDFBATfuse;", "" ] [ "batudf", "fuse", "command batudf.fuse(X_1:bat[:int], X_2:bat[:int]):bat[:lng] ", "UDFBATfuse;", "" ] [ "batudf", "fuse", "command batudf.fuse(X_1:bat[:sht], X_2:bat[:sht]):bat[:int] ", "UDFBATfuse;", "" ] diff --git a/clients/Tests/MAL-signatures.stable.out.int128 b/clients/Tests/MAL-signatures.stable.out.int128 --- a/clients/Tests/MAL-signatures.stable.out.int128 +++ b/clients/Tests/MAL-signatures.stable.out.int128 @@ -9197,6 +9197,7 @@ stdout of test 'MAL-signatures` in direc [ "batstr", "unicodeAt", "pattern batstr.unicodeAt(X_1:str, X_2:bat[:int], X_3:bat[:oid]):bat[:int] ", "STRbatWChrAt_strcst;", "" ] [ "batstr", "unicodeAt", "pattern batstr.unicodeAt(X_1:bat[:str], X_2:int):bat[:int] ", "STRbatWChrAtcst;", "" ] [ "batstr", "unicodeAt", "pattern batstr.unicodeAt(X_1:bat[:str], X_2:int, X_3:bat[:oid]):bat[:int] ", "STRbatWChrAtcst;", "" ] +[ "battxtsim", "similarity", "command battxtsim.similarity(X_1:bat[:str], X_2:bat[:str]):bat[:dbl] ", "fstrcmp0_impl_bulk;", "" ] [ "batudf", "fuse", "command batudf.fuse(X_1:bat[:bte], X_2:bat[:bte]):bat[:sht] ", "UDFBATfuse;", "" ] [ "batudf", "fuse", "command batudf.fuse(X_1:bat[:int], X_2:bat[:int]):bat[:lng] ", "UDFBATfuse;", "" ] [ "batudf", "fuse", "command batudf.fuse(X_1:bat[:lng], X_2:bat[:lng]):bat[:hge] ", "UDFBATfuse;", "" ] diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -500,7 +500,7 @@ BATappend2(BAT *b, BAT *n, BAT *s, bool struct canditer ci; BUN cnt; BUN r; - PROPrec *prop, *nprop; + PROPrec *prop = NULL, *nprop; oid hseq = n->hseqbase; char buf[64]; lng t0 = 0; @@ -548,9 +548,9 @@ BATappend2(BAT *b, BAT *n, BAT *s, bool IMPSdestroy(b); /* imprints do not support updates yet */ OIDXdestroy(b); - if ((prop = BATgetprop(b, GDK_MAX_VALUE)) != NULL) { + if (BATcount(b) == 0 || (prop = BATgetprop(b, GDK_MAX_VALUE)) != NULL) { if ((nprop = BATgetprop(n, GDK_MAX_VALUE)) != NULL) { - if (ATOMcmp(b->ttype, VALptr(&prop->v), VALptr(&nprop->v)) < 0) { + if (BATcount(b) == 0 || ATOMcmp(b->ttype, VALptr(&prop->v), VALptr(&nprop->v)) < 0) { if (s == NULL) { BATsetprop(b, GDK_MAX_VALUE, b->ttype, VALptr(&nprop->v)); if ((nprop = BATgetprop(n, GDK_MAX_POS)) != NULL) @@ -567,9 +567,9 @@ BATappend2(BAT *b, BAT *n, BAT *s, bool BATrmprop(b, GDK_MAX_POS); } } - if ((prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL) { + if (BATcount(b) == 0 || (prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL) { if ((nprop = BATgetprop(n, GDK_MIN_VALUE)) != NULL) { - if (ATOMcmp(b->ttype, VALptr(&prop->v), VALptr(&nprop->v)) > 0) { + if (BATcount(b) == 0 || ATOMcmp(b->ttype, VALptr(&prop->v), VALptr(&nprop->v)) > 0) { if (s == NULL) { BATsetprop(b, GDK_MIN_VALUE, b->ttype, VALptr(&nprop->v)); if ((nprop = BATgetprop(n, GDK_MIN_POS)) != NULL) diff --git a/monetdb5/modules/mal/txtsim.c b/monetdb5/modules/mal/txtsim.c --- a/monetdb5/modules/mal/txtsim.c +++ b/monetdb5/modules/mal/txtsim.c @@ -22,6 +22,7 @@ #include "mal.h" #include <string.h> #include "gdk.h" +#include "str.h" #include <limits.h> #include "mal_exception.h" @@ -263,7 +264,6 @@ soundex_code(char *Name, char *Key) } } - static str soundex_impl(str *res, str *Name) { @@ -346,8 +346,6 @@ CMDqgramnormalize(str *res, str *Input) * ========================================================================= */ -#define PARAMS(proto) proto - /* * Data on one input string being compared. */ @@ -362,38 +360,6 @@ struct string_data { int edit_count; }; -static struct string_data string[2]; - -static int max_edits; /* compareseq stops when edits > max_edits */ - -#ifdef MINUS_H_FLAG - -/* This corresponds to the diff -H flag. With this heuristic, for - strings with a constant small density of changes, the algorithm is - linear in the strings size. This is unlikely in typical uses of - fstrcmp, and so is usually compiled out. Besides, there is no - interface to set it true. */ -static int heuristic; - -#endif - - -/* Vector, indexed by diagonal, containing 1 + the X coordinate of the - point furthest along the given diagonal in the forward search of the - edit matrix. */ -static int *fdiag; - -/* Vector, indexed by diagonal, containing the X coordinate of the point - furthest along the given diagonal in the backward search of the edit - matrix. */ -static int *bdiag; - -/* Edit scripts longer than this are too expensive to compute. */ -static int too_expensive; - -/* Snakes bigger than this are considered `big'. */ -#define SNAKE_LIMIT 20 - struct partition { /* Midpoints of this partition. */ int xmid, ymid; @@ -405,7 +371,6 @@ struct partition { int hi_minimal; }; - /* NAME diag - find diagonal path @@ -450,10 +415,8 @@ struct partition { cause suboptimal diff output. It cannot cause incorrect diff output. */ -static int diag PARAMS((int, int, int, int, int, struct partition *)); - -static int -diag(int xoff, int xlim, int yoff, int ylim, int minimal, struct partition *part) +static inline int +diag(int xoff, int xlim, int yoff, int ylim, int minimal, struct partition *part, int too_expensive, struct string_data *string, int *fdiag, int *bdiag) { int *const fd = fdiag; /* Give the compiler a chance. */ int *const bd = bdiag; /* Additional help for the compiler. */ @@ -478,9 +441,7 @@ diag(int xoff, int xlim, int yoff, int y bd[bmid] = xlim; for (c = 1;; ++c) { int d; /* Active diagonal. */ - int big_snake; - big_snake = 0; /* Extend the top-down search by an edit step in each diagonal. */ if (fmin > dmin) fd[--fmin - 1] = -1; @@ -493,7 +454,6 @@ diag(int xoff, int xlim, int yoff, int y for (d = fmax; d >= fmin; d -= 2) { int x; int y; - int oldx; int tlo; int thi; @@ -503,14 +463,11 @@ diag(int xoff, int xlim, int yoff, int y x = tlo + 1; else x = thi; - oldx = x; y = x - d; while (x < xlim && y < ylim && xv[x] == yv[y]) { ++x; ++y; } - if (x - oldx > SNAKE_LIMIT) - big_snake = 1; fd[d] = x; if (odd && bmin <= d && d <= bmax && bd[d] <= x) { part->xmid = x; @@ -531,7 +488,6 @@ diag(int xoff, int xlim, int yoff, int y for (d = bmax; d >= bmin; d -= 2) { int x; int y; - int oldx; int tlo; int thi; @@ -540,14 +496,11 @@ diag(int xoff, int xlim, int yoff, int y x = tlo; else x = thi - 1; - oldx = x; y = x - d; while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) { --x; --y; } - if (oldx - x > SNAKE_LIMIT) - big_snake = 1; bd[d] = x; if (!odd && fmin <= d && d <= fmax && x <= fd[d]) { part->xmid = x; @@ -560,90 +513,6 @@ diag(int xoff, int xlim, int yoff, int y if (minimal) continue; -#ifdef MINUS_H_FLAG - /* Heuristic: check occasionally for a diagonal that has made lots - of progress compared with the edit distance. If we have any - such, find the one that has made the most progress and return - it as if it had succeeded. - - With this heuristic, for strings with a constant small density - of changes, the algorithm is linear in the strings size. */ - if (c > 200 && big_snake && heuristic) { - int best; - - best = 0; - for (d = fmax; d >= fmin; d -= 2) { - int dd; - int x; - int y; - int v; - - dd = d - fmid; - x = fd[d]; - y = x - d; - v = (x - xoff) * 2 - dd; - - if (v > 12 * (c + (dd < 0 ? -dd : dd))) { - if (v > best && xoff + SNAKE_LIMIT <= x && x < xlim && yoff + SNAKE_LIMIT <= y && y < ylim) { - /* We have a good enough best diagonal; now insist - that it end with a significant snake. */ - int k; - - for (k = 1; xv[x - k] == yv[y - k]; k++) { - if (k == SNAKE_LIMIT) { - best = v; - part->xmid = x; - part->ymid = y; - break; - } - } - } - } - } - if (best > 0) { - part->lo_minimal = 1; - part->hi_minimal = 0; - return 2 * c - 1; - } - best = 0; - for (d = bmax; d >= bmin; d -= 2) { - int dd; - int x; - int y; - int v; - - dd = d - bmid; - x = bd[d]; - y = x - d; - v = (xlim - x) * 2 + dd; - - if (v > 12 * (c + (dd < 0 ? -dd : dd))) { - if (v > best && xoff < x && x <= xlim - SNAKE_LIMIT && yoff < y && y <= ylim - SNAKE_LIMIT) { - /* We have a good enough best diagonal; now insist - that it end with a significant snake. */ - int k; - - for (k = 0; xv[x + k] == yv[y + k]; k++) { - if (k == SNAKE_LIMIT - 1) { - best = v; - part->xmid = x; - part->ymid = y; - break; - } - } - } - } - } - if (best > 0) { - part->lo_minimal = 0; - part->hi_minimal = 1; - return 2 * c - 1; - } - } -#else - (void) big_snake; -#endif /* MINUS_H_FLAG */ - /* Heuristic: if we've gone well beyond the call of duty, give up and report halfway between our best results so far. */ if (c >= too_expensive) { @@ -729,10 +598,8 @@ diag(int xoff, int xlim, int yoff, int y If MINIMAL is nonzero, find a minimal difference no matter how expensive it is. */ -static void compareseq PARAMS((int, int, int, int, int)); - -static void _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list