Changeset: 3a64271e8751 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/3a64271e8751
Removed Files:
        sql/test/BugTracker-2020/Tests/txtsim-parallel.Bug-7016.test
Modified Files:
        clients/Tests/MAL-signatures-hge.test
        clients/Tests/MAL-signatures.test
        monetdb5/modules/mal/txtsim.c
        sql/scripts/48_txtsim.sql
        
sql/test/BugTracker-2016/Tests/DISTINCT_with_correlated_scalar_subquery_crashes_mserver.Bug-3920.test
        
sql/test/emptydb-previous-upgrade-chain-hge/Tests/upgrade.stable.out.int128
        sql/test/emptydb-previous-upgrade-chain/Tests/upgrade.stable.out
        sql/test/emptydb-previous-upgrade-chain/Tests/upgrade.stable.out.int128
        sql/test/emptydb-previous-upgrade-hge/Tests/upgrade.stable.out.int128
        sql/test/emptydb-previous-upgrade/Tests/upgrade.stable.out
        sql/test/emptydb-previous-upgrade/Tests/upgrade.stable.out.int128
        sql/test/emptydb-upgrade-chain-hge/Tests/upgrade.stable.out.int128
        sql/test/emptydb-upgrade-chain/Tests/upgrade.stable.out
        sql/test/emptydb-upgrade-chain/Tests/upgrade.stable.out.int128
        sql/test/emptydb-upgrade-chain/Tests/upgrade.stable.out.ppc64.int128
        sql/test/emptydb-upgrade-hge/Tests/upgrade.stable.out.int128
        sql/test/emptydb-upgrade/Tests/upgrade.stable.out
        sql/test/emptydb-upgrade/Tests/upgrade.stable.out.int128
        sql/test/emptydb/Tests/check.stable.out
        sql/test/emptydb/Tests/check.stable.out.32bit
        sql/test/emptydb/Tests/check.stable.out.int128
        sql/test/miscellaneous/Tests/simple_selects.test
        
sql/test/testdb-previous-upgrade-chain-hge/Tests/upgrade.stable.out.int128
        sql/test/testdb-previous-upgrade-chain/Tests/upgrade.stable.out
        sql/test/testdb-previous-upgrade-chain/Tests/upgrade.stable.out.int128
        sql/test/testdb-previous-upgrade-hge/Tests/upgrade.stable.out.int128
        sql/test/testdb-previous-upgrade/Tests/upgrade.stable.out
        sql/test/testdb-previous-upgrade/Tests/upgrade.stable.out.int128
        sql/test/testdb-upgrade-chain-hge/Tests/upgrade.stable.out.int128
        sql/test/testdb-upgrade-chain/Tests/upgrade.stable.out
        sql/test/testdb-upgrade-chain/Tests/upgrade.stable.out.int128
        sql/test/testdb-upgrade-hge/Tests/upgrade.stable.out.int128
        sql/test/testdb-upgrade/Tests/upgrade.stable.out
        sql/test/testdb-upgrade/Tests/upgrade.stable.out.int128
Branch: default
Log Message:

Remove similarity functionality (DEPRECATED in Jun2023).


diffs (truncated from 1255 to 300 lines):

diff --git a/clients/Tests/MAL-signatures-hge.test 
b/clients/Tests/MAL-signatures-hge.test
--- a/clients/Tests/MAL-signatures-hge.test
+++ b/clients/Tests/MAL-signatures-hge.test
@@ -35568,11 +35568,6 @@ maxlevenshtein
 pattern battxtsim.maxlevenshtein(X_0:bat[:str], X_1:bat[:str], X_2:int, 
X_3:int, X_4:int):bat[:bit] 
 BATTXTSIMmaxlevenshtein;
 Same as maxlevenshtein but for BATS
-battxtsim
-similarity
-command battxtsim.similarity(X_0:bat[:str], X_1:bat[:str]):bat[:dbl] 
-fstrcmp0_impl_bulk;
-(Deprecated) Normalized edit distance between two strings
 baturl
 extractURLHost
 command baturl.extractURLHost(X_0:bat[:str], X_1:bit):bat[:str] 
@@ -51179,16 +51174,6 @@ command txtsim.qgramselfjoin(X_0:bat[:oi
 qgram_selfjoin;
 QGram self-join on ordered(!) qgram tables and sub-ordered q-gram positions
 txtsim
-similarity
-command txtsim.similarity(X_0:str, X_1:str):dbl 
-fstrcmp0_impl;
-(Deprecated) Normalized edit distance between two strings
-txtsim
-similarity
-command txtsim.similarity(X_0:str, X_1:str, X_2:dbl):dbl 
-fstrcmp_impl;
-(Deprecated) Normalized edit distance between two strings
-txtsim
 soundex
 command txtsim.soundex(X_0:str):str 
 soundex;
diff --git a/clients/Tests/MAL-signatures.test 
b/clients/Tests/MAL-signatures.test
--- a/clients/Tests/MAL-signatures.test
+++ b/clients/Tests/MAL-signatures.test
@@ -26618,11 +26618,6 @@ maxlevenshtein
 pattern battxtsim.maxlevenshtein(X_0:bat[:str], X_1:bat[:str], X_2:int, 
X_3:int, X_4:int):bat[:bit] 
 BATTXTSIMmaxlevenshtein;
 Same as maxlevenshtein but for BATS
-battxtsim
-similarity
-command battxtsim.similarity(X_0:bat[:str], X_1:bat[:str]):bat[:dbl] 
-fstrcmp0_impl_bulk;
-(Deprecated) Normalized edit distance between two strings
 baturl
 extractURLHost
 command baturl.extractURLHost(X_0:bat[:str], X_1:bit):bat[:str] 
@@ -39504,16 +39499,6 @@ command txtsim.qgramselfjoin(X_0:bat[:oi
 qgram_selfjoin;
 QGram self-join on ordered(!) qgram tables and sub-ordered q-gram positions
 txtsim
-similarity
-command txtsim.similarity(X_0:str, X_1:str):dbl 
-fstrcmp0_impl;
-(Deprecated) Normalized edit distance between two strings
-txtsim
-similarity
-command txtsim.similarity(X_0:str, X_1:str, X_2:dbl):dbl 
-fstrcmp_impl;
-(Deprecated) Normalized edit distance between two strings
-txtsim
 soundex
 command txtsim.soundex(X_0:str):str 
 soundex;
diff --git a/monetdb5/modules/mal/txtsim.c b/monetdb5/modules/mal/txtsim.c
--- a/monetdb5/modules/mal/txtsim.c
+++ b/monetdb5/modules/mal/txtsim.c
@@ -1391,484 +1391,6 @@ str_2_qgrams(bat *ret, str *val)
        return MAL_SUCCEED;
 }
 
-/* DEPRECATED (see DEPRECATED_END) */
-#define INITIAL_INT_BUFFER_LENGTH 2048
-#define CHECK_INT_BUFFER_LENGTH(BUFFER, BUFFER_LEN, NEXT_LEN, OP)              
\
-       do {                                                                    
                                                        \
-               if ((NEXT_LEN) > *BUFFER_LEN) {                                 
                        \
-                       size_t newlen = (((NEXT_LEN) + 1023) & ~1023); /* align 
to a multiple of 1024 bytes */ \
-                       int *newbuf = GDKmalloc(newlen);                        
                                \
-                       if (!newbuf)                                            
                                                \
-                               throw(MAL, OP, SQLSTATE(HY013) 
MAL_MALLOC_FAIL);                \
-                       GDKfree(*BUFFER);                                       
                                                \
-                       *BUFFER = newbuf;                                       
                                                \
-                       *BUFFER_LEN = newlen;                                   
                                        \
-               }                                                               
                                                                \
-       } while (0)
-
-struct string_data {
-       /* The string to be compared. */
-       const char *data;
-
-       /* The length of the string to be compared. */
-       int data_length;
-
-       /* The number of characters inserted or deleted. */
-       int edit_count;
-};
-
-struct partition {
-       /* Midpoints of this partition.  */
-       int xmid, ymid;
-
-       /* Nonzero if low half will be analyzed minimally.  */
-       int lo_minimal;
-
-       /* Likewise for high half.  */
-       int hi_minimal;
-};
-
-/* NAME
-   diag - find diagonal path
-
-   SYNOPSIS
-   int diag(int xoff, int xlim, int yoff, int ylim, int minimal,
-   struct partition *part);
-
-   DESCRIPTION
-   Find the midpoint of the shortest edit script for a specified
-   portion of the two strings.
-
-   Scan from the beginnings of the strings, and simultaneously from
-   the ends, doing a breadth-first search through the space of
-   edit-sequence.  When the two searches meet, we have found the
-   midpoint of the shortest edit sequence.
-
-   If MINIMAL is nonzero, find the minimal edit script regardless
-   of expense.  Otherwise, if the search is too expensive, use
-   heuristics to stop the search and report a suboptimal answer.
-
-   RETURNS
-   Set PART->(XMID,YMID) to the midpoint (XMID,YMID).  The diagonal
-   number XMID - YMID equals the number of inserted characters
-   minus the number of deleted characters (counting only characters
-   before the midpoint).  Return the approximate edit cost; this is
-   the total number of characters inserted or deleted (counting
-   only characters before the midpoint), unless a heuristic is used
-   to terminate the search prematurely.
-
-   Set PART->LEFT_MINIMAL to nonzero iff the minimal edit script
-   for the left half of the partition is known; similarly for
-   PART->RIGHT_MINIMAL.
-
-   CAVEAT
-   This function assumes that the first characters of the specified
-   portions of the two strings do not match, and likewise that the
-   last characters do not match.  The caller must trim matching
-   characters from the beginning and end of the portions it is
-   going to specify.
-
-   If we return the "wrong" partitions, the worst this can do is
-   cause suboptimal diff output.  It cannot cause incorrect diff
-   output.  */
-
-static inline int
-diag(int xoff, int xlim, int yoff, int ylim, int minimal, struct partition 
*part, int too_expensive, struct string_data *string, int *fdiag, int *bdiag)
-{
-       int *const fd = fdiag;  /* Give the compiler a chance. */
-       int *const bd = bdiag;  /* Additional help for the compiler. */
-       const char *const xv = string[0].data;  /* Still more help for the 
compiler. */
-       const char *const yv = string[1].data;  /* And more and more . . . */
-       const int dmin = xoff - ylim;   /* Minimum valid diagonal. */
-       const int dmax = xlim - yoff;   /* Maximum valid diagonal. */
-       const int fmid = xoff - yoff;   /* Center diagonal of top-down search. 
*/
-       const int bmid = xlim - ylim;   /* Center diagonal of bottom-up search. 
*/
-       int fmin = fmid;
-       int fmax = fmid;        /* Limits of top-down search. */
-       int bmin = bmid;
-       int bmax = bmid;        /* Limits of bottom-up search. */
-       int c;                  /* Cost. */
-       int odd = (fmid - bmid) & 1;
-
-       /*
-        * True if southeast corner is on an odd diagonal with respect
-        * to the northwest.
-        */
-       fd[fmid] = xoff;
-       bd[bmid] = xlim;
-       for (c = 1;; ++c) {
-               int d;          /* Active diagonal. */
-
-               /* Extend the top-down search by an edit step in each diagonal. 
*/
-               if (fmin > dmin)
-                       fd[--fmin - 1] = -1;
-               else
-                       ++fmin;
-               if (fmax < dmax)
-                       fd[++fmax + 1] = -1;
-               else
-                       --fmax;
-               for (d = fmax; d >= fmin; d -= 2) {
-                       int x;
-                       int y;
-                       int tlo;
-                       int thi;
-
-                       tlo = fd[d - 1], thi = fd[d + 1];
-
-                       if (tlo >= thi)
-                               x = tlo + 1;
-                       else
-                               x = thi;
-                       y = x - d;
-                       while (x < xlim && y < ylim && xv[x] == yv[y]) {
-                               ++x;
-                               ++y;
-                       }
-                       fd[d] = x;
-                       if (odd && bmin <= d && d <= bmax && bd[d] <= x) {
-                               part->xmid = x;
-                               part->ymid = y;
-                               part->lo_minimal = part->hi_minimal = 1;
-                               return 2 * c - 1;
-                       }
-               }
-               /* Similarly extend the bottom-up search.  */
-               if (bmin > dmin)
-                       bd[--bmin - 1] = INT_MAX;
-               else
-                       ++bmin;
-               if (bmax < dmax)
-                       bd[++bmax + 1] = INT_MAX;
-               else
-                       --bmax;
-               for (d = bmax; d >= bmin; d -= 2) {
-                       int x;
-                       int y;
-                       int tlo;
-                       int thi;
-
-                       tlo = bd[d - 1], thi = bd[d + 1];
-                       if (tlo < thi)
-                               x = tlo;
-                       else
-                               x = thi - 1;
-                       y = x - d;
-                       while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
-                               --x;
-                               --y;
-                       }
-                       bd[d] = x;
-                       if (!odd && fmin <= d && d <= fmax && x <= fd[d]) {
-                               part->xmid = x;
-                               part->ymid = y;
-                               part->lo_minimal = part->hi_minimal = 1;
-                               return 2 * c;
-                       }
-               }
-
-               if (minimal)
-                       continue;
-
-               /* Heuristic: if we've gone well beyond the call of duty, give 
up
-                  and report halfway between our best results so far.  */
-               if (c >= too_expensive) {
-                       int fxybest;
-                       int fxbest;
-                       int bxybest;
-                       int bxbest;
-
-                       /* Pacify `gcc -Wall'. */
-                       fxbest = 0;
-                       bxbest = 0;
-
-                       /* Find forward diagonal that maximizes X + Y.  */
-                       fxybest = -1;
-                       for (d = fmax; d >= fmin; d -= 2) {
-                               int x;
-                               int y;
-
-                               x = fd[d] < xlim ? fd[d] : xlim;
-                               y = x - d;
-
-                               if (ylim < y) {
-                                       x = ylim + d;
-                                       y = ylim;
-                               }
-                               if (fxybest < x + y) {
-                                       fxybest = x + y;
-                                       fxbest = x;
-                               }
-                       }
-                       /* Find backward diagonal that minimizes X + Y.  */
-                       bxybest = INT_MAX;
-                       for (d = bmax; d >= bmin; d -= 2) {
-                               int x;
-                               int y;
-
-                               x = xoff > bd[d] ? xoff : bd[d];
-                               y = x - d;
-
-                               if (y < yoff) {
-                                       x = yoff + d;
-                                       y = yoff;
-                               }
-                               if (x + y < bxybest) {
-                                       bxybest = x + y;
-                                       bxbest = x;
-                               }
-                       }
-                       /* Use the better of the two diagonals.  */
-                       if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff)) {
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to