Changeset: f9f293f6cb21 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/f9f293f6cb21
Removed Files:
        monetdb5/modules/mal/Tests/tokenizer00.maltest
        monetdb5/modules/mal/tokenizer.c
Modified Files:
        clients/Tests/MAL-signatures-hge.test
        clients/Tests/MAL-signatures.test
        common/stream/stream.h
        monetdb5/ChangeLog
        monetdb5/modules/mal/CMakeLists.txt
        monetdb5/modules/mal/Tests/All
Branch: default
Log Message:

Removed MAL tokenizer module.


diffs (truncated from 1043 to 300 lines):

diff --git a/clients/Tests/MAL-signatures-hge.test 
b/clients/Tests/MAL-signatures-hge.test
--- a/clients/Tests/MAL-signatures-hge.test
+++ b/clients/Tests/MAL-signatures-hge.test
@@ -51033,56 +51033,6 @@ user_statistics
 pattern sysmon.user_statistics() (X_0:bat[:str], X_1:bat[:lng], X_2:bat[:lng], 
X_3:bat[:timestamp], X_4:bat[:timestamp], X_5:bat[:lng], X_6:bat[:str]) 
 SYSMONstatistics;
 (empty)
-tokenizer
-append
-command tokenizer.append(X_0:str):oid 
-TKNZRappend;
-tokenize a new string and append it to the tokenizer (duplicate elimination is 
performed)
-tokenizer
-close
-command tokenizer.close():void 
-TKNZRclose;
-close the current tokenizer store
-tokenizer
-depositFile
-command tokenizer.depositFile(X_0:str):void 
-TKNZRdepositFile;
-batch insertion from a file of strings to tokenize, each string is separated 
by a new line
-tokenizer
-getCardinality
-command tokenizer.getCardinality():bat[:lng] 
-TKNZRgetCardinality;
-debugging function that returns the unique tokens at each level
-tokenizer
-getCount
-command tokenizer.getCount():bat[:lng] 
-TKNZRgetCount;
-debugging function that returns the size of the bats at each level
-tokenizer
-getIndex
-command tokenizer.getIndex():bat[:oid] 
-TKNZRgetIndex;
-administrative function that returns the INDEX bat
-tokenizer
-getLevel
-command tokenizer.getLevel(X_0:int):bat[:str] 
-TKNZRgetLevel;
-administrative function that returns the bat on level i
-tokenizer
-locate
-pattern tokenizer.locate(X_0:str):oid 
-TKNZRlocate;
-if the given string is in the store returns its oid, otherwise oid_nil
-tokenizer
-open
-command tokenizer.open(X_0:str):void 
-TKNZRopen;
-open the named tokenizer store, a new one is created if the specified name 
does not exist
-tokenizer
-take
-pattern tokenizer.take(X_0:oid):str 
-TKNZRtakeOid;
-reconstruct and returns the i-th string
 txtsim
 dameraulevenshtein
 pattern txtsim.dameraulevenshtein(X_0:str, X_1:str):int 
diff --git a/clients/Tests/MAL-signatures.test 
b/clients/Tests/MAL-signatures.test
--- a/clients/Tests/MAL-signatures.test
+++ b/clients/Tests/MAL-signatures.test
@@ -39358,56 +39358,6 @@ user_statistics
 pattern sysmon.user_statistics() (X_0:bat[:str], X_1:bat[:lng], X_2:bat[:lng], 
X_3:bat[:timestamp], X_4:bat[:timestamp], X_5:bat[:lng], X_6:bat[:str]) 
 SYSMONstatistics;
 (empty)
-tokenizer
-append
-command tokenizer.append(X_0:str):oid 
-TKNZRappend;
-tokenize a new string and append it to the tokenizer (duplicate elimination is 
performed)
-tokenizer
-close
-command tokenizer.close():void 
-TKNZRclose;
-close the current tokenizer store
-tokenizer
-depositFile
-command tokenizer.depositFile(X_0:str):void 
-TKNZRdepositFile;
-batch insertion from a file of strings to tokenize, each string is separated 
by a new line
-tokenizer
-getCardinality
-command tokenizer.getCardinality():bat[:lng] 
-TKNZRgetCardinality;
-debugging function that returns the unique tokens at each level
-tokenizer
-getCount
-command tokenizer.getCount():bat[:lng] 
-TKNZRgetCount;
-debugging function that returns the size of the bats at each level
-tokenizer
-getIndex
-command tokenizer.getIndex():bat[:oid] 
-TKNZRgetIndex;
-administrative function that returns the INDEX bat
-tokenizer
-getLevel
-command tokenizer.getLevel(X_0:int):bat[:str] 
-TKNZRgetLevel;
-administrative function that returns the bat on level i
-tokenizer
-locate
-pattern tokenizer.locate(X_0:str):oid 
-TKNZRlocate;
-if the given string is in the store returns its oid, otherwise oid_nil
-tokenizer
-open
-command tokenizer.open(X_0:str):void 
-TKNZRopen;
-open the named tokenizer store, a new one is created if the specified name 
does not exist
-tokenizer
-take
-pattern tokenizer.take(X_0:oid):str 
-TKNZRtakeOid;
-reconstruct and returns the i-th string
 txtsim
 dameraulevenshtein
 pattern txtsim.dameraulevenshtein(X_0:str, X_1:str):int 
diff --git a/common/stream/stream.h b/common/stream/stream.h
--- a/common/stream/stream.h
+++ b/common/stream/stream.h
@@ -245,7 +245,7 @@ typedef struct bstream {
 
 stream_export bstream *bstream_create(stream *rs, size_t chunk_size); // used 
all over
 stream_export void bstream_destroy(bstream *s); // all over
-stream_export ssize_t bstream_read(bstream *s, size_t size); // tablet.c, 
tokenizer.c
+stream_export ssize_t bstream_read(bstream *s, size_t size); // tablet.c
 stream_export ssize_t bstream_next(bstream *s); // all over
 
 /* Callback stream is a stream where the read and write functions are
diff --git a/monetdb5/ChangeLog b/monetdb5/ChangeLog
--- a/monetdb5/ChangeLog
+++ b/monetdb5/ChangeLog
@@ -1,3 +1,7 @@
 # ChangeLog file for MonetDB5
 # This file is updated with Maddlog
 
+* Wed Sep 13 2023 Sjoerd Mullender <sjo...@acm.org>
+- Removed the MAL tokenizer module.  It was never usable from SQL and
+  in this form never would be.
+
diff --git a/monetdb5/modules/mal/CMakeLists.txt 
b/monetdb5/modules/mal/CMakeLists.txt
--- a/monetdb5/modules/mal/CMakeLists.txt
+++ b/monetdb5/modules/mal/CMakeLists.txt
@@ -32,7 +32,6 @@ target_sources(malmodules
   mal_mapi.c
   remote.c remote.h
   txtsim.c
-  tokenizer.c
   sample.c
   querylog.c querylog.h
   sysmon.c
diff --git a/monetdb5/modules/mal/Tests/All b/monetdb5/modules/mal/Tests/All
--- a/monetdb5/modules/mal/Tests/All
+++ b/monetdb5/modules/mal/Tests/All
@@ -36,8 +36,6 @@ mapi06
 mapi07
 mapi01
 
-tokenizer00
-
 manifold
 manifoldstr
 #manifoldaggr
diff --git a/monetdb5/modules/mal/Tests/tokenizer00.maltest 
b/monetdb5/modules/mal/Tests/tokenizer00.maltest
deleted file mode 100644
--- a/monetdb5/modules/mal/Tests/tokenizer00.maltest
+++ /dev/null
@@ -1,187 +0,0 @@
-statement ok
-include tokenizer
-
-statement ok
-tokenizer.open("test")
-
-statement ok
-tokenizer.append("http://www.cwi.nl";)
-
-statement ok
-tokenizer.append("http://www.cwi.nl/";)
-
-statement ok
-tokenizer.append("http://www.cwi.nl/~lsidir";)
-
-statement ok
-tokenizer.append("http://www.cwi.nl/~mk";)
-
-statement ok
-tokenizer.append("http://www.cwi.nl/~mk";)
-
-statement ok
-tokenizer.append("http://www.ics.forth.gr";)
-
-statement ok
-tokenizer.append("http://www.ics.forth.gr/";)
-
-statement ok
-tokenizer.append("http://www.ics.forth.gr/~lsidir";)
-
-statement ok
-tokenizer.append("http://www.cook.gr/";)
-
-statement ok
-tokenizer.append("http://www.cook.gr/~lsidir";)
-
-statement ok
-tokenizer.append("http://www.cook.gr/~mk";)
-
-statement ok
-tokenizer.append("http://www.nocook.nl/~mk";)
-
-statement ok
-tokenizer.append("http://www.nocook.nl/";)
-
-statement ok
-tokenizer.append("ftp://thepiratebay.org";)
-
-statement ok
-id := 4:oid
-
-statement ok
-t  := tokenizer.take(id)
-
-query T rowsort
-io.print(t)
-----
-"http://www.ics.forth.gr/~lsidir/";
-
-statement ok
-id := tokenizer.locate(t)
-
-query T rowsort
-io.print(id)
-----
-4@0
-
-statement ok
-d:bat[:oid] := tokenizer.getIndex()
-
-query II rowsort
-io.print(d)
-----
-0
-3
-1
-4
-10
-1027
-2
-260
-3
-259
-4
-516
-5
-515
-6
-772
-7
-1028
-8
-1284
-9
-771
-
-statement ok
-level0:bat[:str] := tokenizer.getLevel(0)
-
-statement ok
-level1:bat[:str] := tokenizer.getLevel(1)
-
-statement ok
-level2:bat[:str] := tokenizer.getLevel(2)
-
-statement ok
-level3:bat[:str] := tokenizer.getLevel(3)
-
-query IT rowsort
-io.print(level0)
-----
-0
-http:
-1
-ftp:
-
-query IT rowsort
-io.print(level1)
-----
-0
-(empty)
-1
-(empty)
-
-query IT rowsort
-io.print(level2)
-----
-0
-www.cwi.nl
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to