Changeset: f9f293f6cb21 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/f9f293f6cb21 Removed Files: monetdb5/modules/mal/Tests/tokenizer00.maltest monetdb5/modules/mal/tokenizer.c Modified Files: clients/Tests/MAL-signatures-hge.test clients/Tests/MAL-signatures.test common/stream/stream.h monetdb5/ChangeLog monetdb5/modules/mal/CMakeLists.txt monetdb5/modules/mal/Tests/All Branch: default Log Message:
Removed MAL tokenizer module. diffs (truncated from 1043 to 300 lines): diff --git a/clients/Tests/MAL-signatures-hge.test b/clients/Tests/MAL-signatures-hge.test --- a/clients/Tests/MAL-signatures-hge.test +++ b/clients/Tests/MAL-signatures-hge.test @@ -51033,56 +51033,6 @@ user_statistics pattern sysmon.user_statistics() (X_0:bat[:str], X_1:bat[:lng], X_2:bat[:lng], X_3:bat[:timestamp], X_4:bat[:timestamp], X_5:bat[:lng], X_6:bat[:str]) SYSMONstatistics; (empty) -tokenizer -append -command tokenizer.append(X_0:str):oid -TKNZRappend; -tokenize a new string and append it to the tokenizer (duplicate elimination is performed) -tokenizer -close -command tokenizer.close():void -TKNZRclose; -close the current tokenizer store -tokenizer -depositFile -command tokenizer.depositFile(X_0:str):void -TKNZRdepositFile; -batch insertion from a file of strings to tokenize, each string is separated by a new line -tokenizer -getCardinality -command tokenizer.getCardinality():bat[:lng] -TKNZRgetCardinality; -debugging function that returns the unique tokens at each level -tokenizer -getCount -command tokenizer.getCount():bat[:lng] -TKNZRgetCount; -debugging function that returns the size of the bats at each level -tokenizer -getIndex -command tokenizer.getIndex():bat[:oid] -TKNZRgetIndex; -administrative function that returns the INDEX bat -tokenizer -getLevel -command tokenizer.getLevel(X_0:int):bat[:str] -TKNZRgetLevel; -administrative function that returns the bat on level i -tokenizer -locate -pattern tokenizer.locate(X_0:str):oid -TKNZRlocate; -if the given string is in the store returns its oid, otherwise oid_nil -tokenizer -open -command tokenizer.open(X_0:str):void -TKNZRopen; -open the named tokenizer store, a new one is created if the specified name does not exist -tokenizer -take -pattern tokenizer.take(X_0:oid):str -TKNZRtakeOid; -reconstruct and returns the i-th string txtsim dameraulevenshtein pattern txtsim.dameraulevenshtein(X_0:str, X_1:str):int diff --git a/clients/Tests/MAL-signatures.test b/clients/Tests/MAL-signatures.test --- a/clients/Tests/MAL-signatures.test +++ b/clients/Tests/MAL-signatures.test @@ -39358,56 +39358,6 @@ user_statistics pattern sysmon.user_statistics() (X_0:bat[:str], X_1:bat[:lng], X_2:bat[:lng], X_3:bat[:timestamp], X_4:bat[:timestamp], X_5:bat[:lng], X_6:bat[:str]) SYSMONstatistics; (empty) -tokenizer -append -command tokenizer.append(X_0:str):oid -TKNZRappend; -tokenize a new string and append it to the tokenizer (duplicate elimination is performed) -tokenizer -close -command tokenizer.close():void -TKNZRclose; -close the current tokenizer store -tokenizer -depositFile -command tokenizer.depositFile(X_0:str):void -TKNZRdepositFile; -batch insertion from a file of strings to tokenize, each string is separated by a new line -tokenizer -getCardinality -command tokenizer.getCardinality():bat[:lng] -TKNZRgetCardinality; -debugging function that returns the unique tokens at each level -tokenizer -getCount -command tokenizer.getCount():bat[:lng] -TKNZRgetCount; -debugging function that returns the size of the bats at each level -tokenizer -getIndex -command tokenizer.getIndex():bat[:oid] -TKNZRgetIndex; -administrative function that returns the INDEX bat -tokenizer -getLevel -command tokenizer.getLevel(X_0:int):bat[:str] -TKNZRgetLevel; -administrative function that returns the bat on level i -tokenizer -locate -pattern tokenizer.locate(X_0:str):oid -TKNZRlocate; -if the given string is in the store returns its oid, otherwise oid_nil -tokenizer -open -command tokenizer.open(X_0:str):void -TKNZRopen; -open the named tokenizer store, a new one is created if the specified name does not exist -tokenizer -take -pattern tokenizer.take(X_0:oid):str -TKNZRtakeOid; -reconstruct and returns the i-th string txtsim dameraulevenshtein pattern txtsim.dameraulevenshtein(X_0:str, X_1:str):int diff --git a/common/stream/stream.h b/common/stream/stream.h --- a/common/stream/stream.h +++ b/common/stream/stream.h @@ -245,7 +245,7 @@ typedef struct bstream { stream_export bstream *bstream_create(stream *rs, size_t chunk_size); // used all over stream_export void bstream_destroy(bstream *s); // all over -stream_export ssize_t bstream_read(bstream *s, size_t size); // tablet.c, tokenizer.c +stream_export ssize_t bstream_read(bstream *s, size_t size); // tablet.c stream_export ssize_t bstream_next(bstream *s); // all over /* Callback stream is a stream where the read and write functions are diff --git a/monetdb5/ChangeLog b/monetdb5/ChangeLog --- a/monetdb5/ChangeLog +++ b/monetdb5/ChangeLog @@ -1,3 +1,7 @@ # ChangeLog file for MonetDB5 # This file is updated with Maddlog +* Wed Sep 13 2023 Sjoerd Mullender <sjo...@acm.org> +- Removed the MAL tokenizer module. It was never usable from SQL and + in this form never would be. + diff --git a/monetdb5/modules/mal/CMakeLists.txt b/monetdb5/modules/mal/CMakeLists.txt --- a/monetdb5/modules/mal/CMakeLists.txt +++ b/monetdb5/modules/mal/CMakeLists.txt @@ -32,7 +32,6 @@ target_sources(malmodules mal_mapi.c remote.c remote.h txtsim.c - tokenizer.c sample.c querylog.c querylog.h sysmon.c diff --git a/monetdb5/modules/mal/Tests/All b/monetdb5/modules/mal/Tests/All --- a/monetdb5/modules/mal/Tests/All +++ b/monetdb5/modules/mal/Tests/All @@ -36,8 +36,6 @@ mapi06 mapi07 mapi01 -tokenizer00 - manifold manifoldstr #manifoldaggr diff --git a/monetdb5/modules/mal/Tests/tokenizer00.maltest b/monetdb5/modules/mal/Tests/tokenizer00.maltest deleted file mode 100644 --- a/monetdb5/modules/mal/Tests/tokenizer00.maltest +++ /dev/null @@ -1,187 +0,0 @@ -statement ok -include tokenizer - -statement ok -tokenizer.open("test") - -statement ok -tokenizer.append("http://www.cwi.nl") - -statement ok -tokenizer.append("http://www.cwi.nl/") - -statement ok -tokenizer.append("http://www.cwi.nl/~lsidir") - -statement ok -tokenizer.append("http://www.cwi.nl/~mk") - -statement ok -tokenizer.append("http://www.cwi.nl/~mk") - -statement ok -tokenizer.append("http://www.ics.forth.gr") - -statement ok -tokenizer.append("http://www.ics.forth.gr/") - -statement ok -tokenizer.append("http://www.ics.forth.gr/~lsidir") - -statement ok -tokenizer.append("http://www.cook.gr/") - -statement ok -tokenizer.append("http://www.cook.gr/~lsidir") - -statement ok -tokenizer.append("http://www.cook.gr/~mk") - -statement ok -tokenizer.append("http://www.nocook.nl/~mk") - -statement ok -tokenizer.append("http://www.nocook.nl/") - -statement ok -tokenizer.append("ftp://thepiratebay.org") - -statement ok -id := 4:oid - -statement ok -t := tokenizer.take(id) - -query T rowsort -io.print(t) ----- -"http://www.ics.forth.gr/~lsidir/" - -statement ok -id := tokenizer.locate(t) - -query T rowsort -io.print(id) ----- -4@0 - -statement ok -d:bat[:oid] := tokenizer.getIndex() - -query II rowsort -io.print(d) ----- -0 -3 -1 -4 -10 -1027 -2 -260 -3 -259 -4 -516 -5 -515 -6 -772 -7 -1028 -8 -1284 -9 -771 - -statement ok -level0:bat[:str] := tokenizer.getLevel(0) - -statement ok -level1:bat[:str] := tokenizer.getLevel(1) - -statement ok -level2:bat[:str] := tokenizer.getLevel(2) - -statement ok -level3:bat[:str] := tokenizer.getLevel(3) - -query IT rowsort -io.print(level0) ----- -0 -http: -1 -ftp: - -query IT rowsort -io.print(level1) ----- -0 -(empty) -1 -(empty) - -query IT rowsort -io.print(level2) ----- -0 -www.cwi.nl _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org