Control: tags -1 moreinfo confirmed On 2021-06-22 22:50:00 -0400, Joseph Nahmias wrote: > Package: release.debian.org > Severity: normal > User: release.debian....@packages.debian.org > Usertags: unblock > > Please unblock package dovecot-fts-xapian > > This version (1.4.9a) fixes a number of important bugs in the indexer > including: > > + fix indexing of attachments, closes: #985654 > + fix indexing of accented characters > + fix memory errors / segfaults when indexing large mailboxes > > Source debdiff from 1.4.7-1 (currently in testing) to 1.4.9a-1 is attached > here. Please let me know when approved so I can upload to unstable. > > unblock dovecot-fts-xapian/1.4.9a-1
Assuming that the upload happens soon, please go ahead and remove the moreinfo tag once the new version is available in unstable. Cheers > > Thanks, > --Joe > diffstat for dovecot-fts-xapian-1.4.7 dovecot-fts-xapian-1.4.9a > > .gitignore | 65 ++++++++++ > Makefile.am | 4 > PACKAGES/RPM/README.md | 20 +++ > PACKAGES/RPM/fts-xapian.spec | 41 ++++++ > README.md | 46 +++++-- > configure.ac | 2 > debian/changelog | 11 + > debian/watch | 4 > fts-xapian-config.h.in | 2 > src/fts-backend-xapian-functions.cpp | 175 +++++++++++++++++++++-------- > src/fts-backend-xapian.cpp | 211 > ++++++++++++++++++----------------- > src/fts-xapian-plugin.c | 2 > src/fts-xapian-plugin.h | 9 - > 13 files changed, 425 insertions(+), 167 deletions(-) > > diff -Nru -w dovecot-fts-xapian-1.4.7/.gitignore > dovecot-fts-xapian-1.4.9a/.gitignore > --- dovecot-fts-xapian-1.4.7/.gitignore 1969-12-31 19:00:00.000000000 > -0500 > +++ dovecot-fts-xapian-1.4.9a/.gitignore 2021-04-24 16:27:55.000000000 > -0400 > @@ -0,0 +1,65 @@ > +# http://www.gnu.org/software/automake > + > +Makefile.in > +/ar-lib > +/mdate-sh > +/py-compile > +/test-driver > +/ylwrap > +.deps/ > +.dirstamp > + > +# http://www.gnu.org/software/autoconf > + > +autom4te.cache > +/autoscan.log > +/autoscan-*.log > +/aclocal.m4 > +/compile > +/config.guess > +/config.h.in > +/config.log > +/config.status > +/config.sub > +/configure > +/configure.scan > +/depcomp > +/install-sh > +/missing > +/stamp-h1 > +/stamp-h2 > +/stamp.h > + > +# https://www.gnu.org/software/libtool/ > + > +/ltmain.sh > +/libtool > + > +# http://www.gnu.org/software/texinfo > + > +/texinfo.tex > + > +# http://www.gnu.org/software/m4/ > + > +m4/libtool.m4 > +m4/ltoptions.m4 > +m4/ltsugar.m4 > +m4/ltversion.m4 > +m4/lt~obsolete.m4 > + > +# Generated Makefile > +# (meta build system like autotools, > +# can automatically generate from config.status script > +# (which is called by configure script)) > +Makefile > + > +/dummy-config.h > +/dummy-config.h.in > +/fts-xapian-config.h > +/run-test.sh > + > +src/*.o > +src/*.lo > +src/*.la > + > +src/.libs/** > diff -Nru -w dovecot-fts-xapian-1.4.7/Makefile.am > dovecot-fts-xapian-1.4.9a/Makefile.am > --- dovecot-fts-xapian-1.4.7/Makefile.am 2021-01-31 14:06:29.000000000 > -0500 > +++ dovecot-fts-xapian-1.4.9a/Makefile.am 2021-04-24 16:27:55.000000000 > -0400 > @@ -2,5 +2,5 @@ > > ACLOCAL_AMFLAGS = -I m4 > > -PACKAGE_VERSION = "1.4.7" > -VERSION = "1.4.7" > +PACKAGE_VERSION = "1.4.9a" > +VERSION = "1.4.9a" > diff -Nru -w dovecot-fts-xapian-1.4.7/PACKAGES/RPM/README.md > dovecot-fts-xapian-1.4.9a/PACKAGES/RPM/README.md > --- dovecot-fts-xapian-1.4.7/PACKAGES/RPM/README.md 1969-12-31 > 19:00:00.000000000 -0500 > +++ dovecot-fts-xapian-1.4.9a/PACKAGES/RPM/README.md 2021-04-24 > 16:27:55.000000000 -0400 > @@ -0,0 +1,20 @@ > +As root: > + > + Install the development environment and required devel packages: > + -- dnf groupinstall "Development Tools" > + -- dnf install rpm-build rpm-devel rpmlint make coreutils diffutils > patch rpmdevtools > + -- dnf install dovecot-devel dovecot libicu-devel icu xapian-core > xapian-core-devel > + > +As a normal user: > + > + Create the ~/rpmbuild tree as a normal user (never build rpms as root): > + -- rpmdev-setuptree > + Place the spec file under: > + ~/rpmbuild/SPECS/fts-xapian.spec > + Place the tar.gz sources under: > + ~/rpmbuild/SOURCES/fts-xapian-1.4.9a.tar.gz > + Generate the binary rpm with: > + -- QA_RPATHS=$(( 0x0001|0x0010 )) rpmbuild -bb > ~/rpmbuild/SPECS/fts-xapian.spec > + > +Your RPM packages will be under ~/rpmbuild/RPMS/x86_64/ > + > diff -Nru -w dovecot-fts-xapian-1.4.7/PACKAGES/RPM/fts-xapian.spec > dovecot-fts-xapian-1.4.9a/PACKAGES/RPM/fts-xapian.spec > --- dovecot-fts-xapian-1.4.7/PACKAGES/RPM/fts-xapian.spec 1969-12-31 > 19:00:00.000000000 -0500 > +++ dovecot-fts-xapian-1.4.9a/PACKAGES/RPM/fts-xapian.spec 2021-04-24 > 16:27:55.000000000 -0400 > @@ -0,0 +1,41 @@ > +Name: fts-xapian > +Version: 1.4.9a > +Release: 1%{?dist} > +Summary: Dovecot FTS plugin based on Xapian > + > +License: LGPL-2.1 > +URL: https://github.com/grosjo/fts-xapian > +Source0: fts-xapian-1.4.9a.tar.gz > + > +BuildRequires: xapian-core-devel, libicu-devel, dovecot-devel > +Requires: xapian-core, xapian-core-libs, dovecot > + > +%description > +This project intends to provide a straightforward, simple and maintenance > free, way to configure FTS plugin for Dovecot, leveraging the efforts by the > Xapian.org team. > + > +This effort came after Dovecot team decided to deprecate "fts_squat" > included in the dovecot core, and due to the complexity of the Solr plugin > capabilitles, un-needed for most users. > + > + > +%prep > +%autosetup > +autoreconf -vi > +./configure --with-dovecot=/usr/lib64/dovecot > + > + > +%build > +make %{?_smp_mflags} > + > + > +%install > +%make_install > + > + > +%files > +/usr/lib64/dovecot/lib21_fts_xapian_plugin.la > +/usr/lib64/dovecot/lib21_fts_xapian_plugin.so > +/usr/lib64/dovecot/lib21_fts_xapian_plugin.a > + > + > +%changelog > +* Tue Apr 6 2021 xapian > +- > diff -Nru -w dovecot-fts-xapian-1.4.7/README.md > dovecot-fts-xapian-1.4.9a/README.md > --- dovecot-fts-xapian-1.4.7/README.md 2021-01-31 14:06:29.000000000 > -0500 > +++ dovecot-fts-xapian-1.4.9a/README.md 2021-04-24 16:27:55.000000000 > -0400 > @@ -79,12 +79,14 @@ > plugin = fts fts_xapian (...) > > fts = xapian > - fts_xapian = partial=3 full=20 attachments=0 verbose=0 > + fts_xapian = partial=3 full=20 verbose=0 > > fts_autoindex = yes > fts_enforced = yes > > fts_autoindex_exclude = \Trash > + > + fts_decoder = decode2text // To index attachements > (...) > } > > @@ -92,27 +94,48 @@ > service indexer-worker { > vsz_limit = 2G // or above (or 0 if you have rather large memory usable > on your server, which is preferred for performance) > } > + > +service decode2text { > + executable = script /usr/libexec/dovecot/decode2text.sh > + user = dovecot > + unix_listener decode2text { > + mode = 0666 > + } > +} > (...) > > ``` > -Partial & full parameters : 3 and 20 are the NGram values for header fields, > which means the keywords created for fields (To, Cc, ...) are between 3 and > 20 chars long. > -Full words are also added by default (if not longer than 245 chars, which is > the limit of Xapian capability). > + > +Indexing options > +---------------- > + > +| Option | Description | Possible values > | Default value | > +|----------------|--------------------------------|--------------------------------------|---------------| > +| partial & full | NGram values for header fields | between 3 and 20 > characters | 3 & 20 | > +| verbose | Logs verbosity | 0 (silent), 1 (verbose) > or 2 (debug) | 0 | > + > +NGrams details > +-------------- > + > +The partial & full parameters are the NGram values for header fields, which > means the keywords created for fields (To, > +Cc, ...) are between 3 and 20 chars long. Full words are also added by > default (if not longer than 245 chars, which is > +the limit of Xapian capability). > > Example: "<john@doe>" will create joh, ohn, hn@, ..., john@d, ohn@do, ..., > and finally john@doe as searchable keywords. > > -Set "verbose=1" to see verbose messages in the log, "verbose=2" for debug > -Set "attachments=1" if you want to index attachments (this works only for > text attachments) > +Index updating > +-------------- > > -Restart Dovecot: > +Just restart Dovecot: > > -``` > +```sh > sudo servicectl restart dovecot > ``` > > > -If this is not a fresh install of dovecot, you need to re-index your > mailboxes > +If this is not a fresh install of dovecot, you need to re-index your > mailboxes: > > -``` > +```sh > doveadm index -A -q \* > ``` > > @@ -121,7 +144,8 @@ > > > You shall put in a cron the following command (for daily run for instance) : > -``` > + > +```sh > doveadm fts optimize -A > ``` > > @@ -131,4 +155,6 @@ > > Please submit requests/bugs via the [GitHub issue > tracker](https://github.com/grosjo/fts-xapian/issues). > > +A Matrix Room exists also at : #xapian-dovecot:grosjo.net > + > Thanks to Aki Tuomi <aki.tu...@open-xchange.com>, Stephan Bosch > <step...@rename-it.nl>, Paul Hecker <p...@iwascoding.com> > diff -Nru -w dovecot-fts-xapian-1.4.7/configure.ac > dovecot-fts-xapian-1.4.9a/configure.ac > --- dovecot-fts-xapian-1.4.7/configure.ac 2021-01-31 14:06:29.000000000 > -0500 > +++ dovecot-fts-xapian-1.4.9a/configure.ac 2021-04-24 16:27:55.000000000 > -0400 > @@ -1,4 +1,4 @@ > -AC_INIT([Dovecot FTS Xapian], [1.4.7], [j...@grosjo.net], > [dovecot-fts-xapian]) > +AC_INIT([Dovecot FTS Xapian], [1.4.9a], [j...@grosjo.net], > [dovecot-fts-xapian]) > AC_CONFIG_AUX_DIR([.]) > AC_CONFIG_SRCDIR([src]) > AC_CONFIG_MACRO_DIR([m4]) > diff -Nru -w dovecot-fts-xapian-1.4.7/debian/changelog > dovecot-fts-xapian-1.4.9a/debian/changelog > --- dovecot-fts-xapian-1.4.7/debian/changelog 2021-01-31 21:35:02.000000000 > -0500 > +++ dovecot-fts-xapian-1.4.9a/debian/changelog 2021-06-22 > 22:40:48.000000000 -0400 > @@ -1,3 +1,14 @@ > +dovecot-fts-xapian (1.4.9a-1) unstable; urgency=medium > + > + * [2da6c89] d/watch: allow non-numbers in version > + * [18c496d] New upstream version 1.4.9a > + + fix indexing of attachments, closes: #985654 > + + fix indexing of accented characters > + + fix memory errors / segfaults when indexing large mailboxes > + + handle indexing of virtual folders > + > + -- Joseph Nahmias <je...@debian.org> Tue, 22 Jun 2021 22:40:48 -0400 > + > dovecot-fts-xapian (1.4.7-1) unstable; urgency=medium > > * [e3bdb1b] fix VCS paths > diff -Nru -w dovecot-fts-xapian-1.4.7/debian/watch > dovecot-fts-xapian-1.4.9a/debian/watch > --- dovecot-fts-xapian-1.4.7/debian/watch 2020-10-18 12:23:30.000000000 > -0400 > +++ dovecot-fts-xapian-1.4.9a/debian/watch 2021-06-22 22:10:41.000000000 > -0400 > @@ -3,6 +3,6 @@ > > version=4 > > -opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)\.tar\.gz%dovecot-fts-xapian-$1.tar.gz%" > \ > +opts="filenamemangle=s%(?:.*?)?v?(\d.*)\.tar\.gz%dovecot-fts-xapian-$1.tar.gz%" > \ > https://github.com/grosjo/fts-xapian/tags \ > - (?:.*?/)?v?(\d[\d.]*)\.tar\.gz > + (?:.*?/)?v?(\d.*)\.tar\.gz > diff -Nru -w dovecot-fts-xapian-1.4.7/fts-xapian-config.h.in > dovecot-fts-xapian-1.4.9a/fts-xapian-config.h.in > --- dovecot-fts-xapian-1.4.7/fts-xapian-config.h.in 2021-01-31 > 14:06:29.000000000 -0500 > +++ dovecot-fts-xapian-1.4.9a/fts-xapian-config.h.in 2021-04-24 > 16:27:55.000000000 -0400 > @@ -1,2 +1,2 @@ > #define FTS_XAPIAN_NAME "Dovecot FTS Xapian" > -#define FTS_XAPIAN_VERSION "1.4.7" > +#define FTS_XAPIAN_VERSION "1.4.9a" > diff -Nru -w dovecot-fts-xapian-1.4.7/src/fts-backend-xapian-functions.cpp > dovecot-fts-xapian-1.4.9a/src/fts-backend-xapian-functions.cpp > --- dovecot-fts-xapian-1.4.7/src/fts-backend-xapian-functions.cpp > 2021-01-31 14:06:29.000000000 -0500 > +++ dovecot-fts-xapian-1.4.9a/src/fts-backend-xapian-functions.cpp > 2021-04-24 16:27:55.000000000 -0400 > @@ -26,6 +26,8 @@ > > class XQuerySet > { > + private: > + icu::Transliterator *accentsConverter; > public: > char * header; > char * text; > @@ -44,6 +46,7 @@ > header=NULL; > text=NULL; > global_neg=false; > + accentsConverter=NULL; > } > > XQuerySet(bool is_and, bool is_neg, long l) > @@ -55,6 +58,7 @@ > text=NULL; > global_and=is_and; > global_neg=is_neg; > + accentsConverter=NULL; > } > > ~XQuerySet() > @@ -68,6 +72,7 @@ > } > if(qsize>0) i_free(qs); > qsize=0; qs=NULL; > + if(accentsConverter != NULL) delete(accentsConverter); > } > > void add(const char * h,const char * t) > @@ -80,11 +85,8 @@ > if(h==NULL) return; > if(t==NULL) return; > > - icu::StringPiece sp_h(h); > - icu::UnicodeString h2 = icu::UnicodeString::fromUTF8(sp_h); > - > - icu::StringPiece sp_t(t); > - icu::UnicodeString t2 = icu::UnicodeString::fromUTF8(sp_t); > + icu::UnicodeString h2 = > icu::UnicodeString::fromUTF8(icu::StringPiece(h)); > + icu::UnicodeString t2 = > icu::UnicodeString::fromUTF8(icu::StringPiece(t)); > > add(&h2,&t2,is_neg); > } > @@ -154,6 +156,19 @@ > std::string tmp1; > h->toUTF8String(tmp1); > char * h2 = i_strdup(tmp1.c_str()); > + > + if(accentsConverter == NULL) > + { > + UErrorCode status = U_ZERO_ERROR; > + accentsConverter = > icu::Transliterator::createInstance("NFD; [:M:] Remove; NFC", UTRANS_FORWARD, > status); > + if(U_FAILURE(status)) > + { > + i_error("FTS Xapian: Can not allocate ICU > translator (2)"); > + accentsConverter = NULL; > + } > + } > + if(accentsConverter != NULL) > accentsConverter->transliterate(*t); > + > std::string tmp2; > t->toUTF8String(tmp2); > char * t2 = i_strdup(tmp2.c_str()); > @@ -178,7 +193,7 @@ > } > if(i>=HDRS_NB) > { > - i_error("FTS Xapian: Unknown header (lookup) '%s'",h2); > + if(verbose>1) i_error("FTS Xapian: Unknown header > (lookup) '%s'",h2); > i_free(h2); i_free(t2); > return; > } > @@ -299,8 +314,6 @@ > > char *s = i_strdup(get_string().c_str()); > > - if(verbose>0) { i_info("FTS Xapian: Query= %s",s); } > - > qp->set_database(*db); > > Xapian::Query * q = new > Xapian::Query(qp->parse_query(s,Xapian::QueryParser::FLAG_DEFAULT));// | > Xapian::QueryParser::FLAG_PARTIAL)); > @@ -317,6 +330,7 @@ > long partial,full,hardlimit; > const char * prefix; > bool onlyone; > + icu::Transliterator *accentsConverter; > > public: > char ** data; > @@ -334,6 +348,7 @@ > hardlimit=XAPIAN_TERM_SIZELIMIT-strlen(prefix); > onlyone=false; > if(strcmp(prefix,"XMID")==0) onlyone=true; > + accentsConverter = NULL; > } > > ~XNGram() > @@ -348,14 +363,14 @@ > i_free(data); > } > data=NULL; > + if(accentsConverter != NULL) delete(accentsConverter); > } > > void add(const char * s) > { > if(s==NULL) return; > > - icu::StringPiece sp(s); > - icu::UnicodeString d = icu::UnicodeString::fromUTF8(sp); > + icu::UnicodeString d = > icu::UnicodeString::fromUTF8(icu::StringPiece(s)); > add(&d); > } > > @@ -401,6 +416,18 @@ > long l = d->length(); > if(l<partial) return; > > + if(accentsConverter == NULL) > + { > + UErrorCode status = U_ZERO_ERROR; > + accentsConverter = > icu::Transliterator::createInstance("NFD; [:M:] Remove; NFC", UTRANS_FORWARD, > status); > + if(U_FAILURE(status)) > + { > + i_error("FTS Xapian: Can not allocate ICU > translator (1)"); > + accentsConverter = NULL; > + } > + } > + if(accentsConverter != NULL) > accentsConverter->transliterate(*d); > + > if(onlyone) > { > add_stem(d); > @@ -470,6 +497,13 @@ > } > }; > > +static long fts_backend_xapian_current_time() > +{ > + struct timeval tp; > + gettimeofday(&tp, NULL); > + return tp.tv_sec * 1000 + tp.tv_usec / 1000; > +} > + > static long fts_backend_xapian_memory_used() // KB > { > FILE* file = fopen("/proc/self/status", "r"); > @@ -522,7 +556,7 @@ > return 0; > } > > -static bool fts_backend_xapian_test_memory() > +static bool fts_backend_xapian_test_memory(struct xapian_fts_backend > *backend, long add) > { > rlim_t limit; > > @@ -531,15 +565,23 @@ > long used = fts_backend_xapian_memory_used(); > long fri = fts_backend_xapian_memory_free(); // Free RAM > > + backend->nb_pushes++; > + long m2 = 2*used/backend->nb_pushes; > + if(backend->max_push < m2) backend->max_push=m2; > + m2=backend->max_push; > + > + add = long(add/1024.0); > + > if(m<1) > { > - if(verbose>0) i_info("FTS Xapian: Memory stats : Used = %ld MB, > Free = %ld MB",long(used/1024),long(fri/1024)); > - return (fri>used/2); > + if(verbose>0) i_info("FTS Xapian: Memory stats : Used = %ld MB > (%ld pushes), Free = %ld MB, Additional data %ld KB, Estimated required = %ld > MB",long(used/1024), backend->nb_pushes, long(fri/1024), add, long(m2/1024)); > + return ((fri>XAPIAN_MIN_RAM*1024)&&(fri>m2)); > + } > + else > + { > + if(verbose>0) i_info("FTS Xapian: Memory stats : Used = %ld MB > (%ld%%) (%ld pushes), Limit = %ld MB, Free = %ld MB, Additional data %ld KB, > Estimated required = %ld > MB",long(used/1024),long(used*100.0/m),backend->nb_pushes,long(m/1024),long(fri/1024), > add, long(m2/1024)); > + return ((fri>XAPIAN_MIN_RAM*1024)&&(m>(used+m2))&&(fri>m2)); > } > - > - if(verbose>0) i_info("FTS Xapian: Memory stats : Used = %ld MB (%ld%%), > Limit = %ld MB, Free = %ld > MB",long(used/1024),long(used*100.0/m),long(m/1024),long(fri/1024)); > - > - return ((m>used*3.0/2)&&(fri>used/2)); > } > > static bool fts_backend_xapian_open_readonly(struct xapian_fts_backend > *backend, Xapian::Database ** dbr) > @@ -598,9 +640,7 @@ > if(backend->old_guid != NULL) > { > /* Performance calculator*/ > - struct timeval tp; > - gettimeofday(&tp, NULL); > - long dt = tp.tv_sec * 1000 + tp.tv_usec / 1000 - > backend->perf_dt; > + long dt = fts_backend_xapian_current_time() - backend->perf_dt; > double r=0; > if(dt>0) > { > @@ -618,8 +658,12 @@ > > static void fts_backend_xapian_release(struct xapian_fts_backend *backend, > const char * reason, long commit_time) > { > + bool err=false; > + > if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_release > (%s)",reason); > > + if(commit_time<1) commit_time = fts_backend_xapian_current_time(); > + > if(backend->dbw !=NULL) > { > try > @@ -630,6 +674,7 @@ > catch(Xapian::Error e) > { > i_error("FTS Xapian: %s : %s - > %s",reason,e.get_type(),e.get_error_string()); > + err=true; > } > delete(backend->dbw); > backend->dbw = NULL; > @@ -637,12 +682,27 @@ > backend->commit_time = commit_time; > } > > + if(err) > + { > + if(verbose>0) i_info("FTS Xapian: Re-creating index database > due to error"); > + try > + { > + Xapian::WritableDatabase * db = new > Xapian::WritableDatabase(backend->db,Xapian::DB_CREATE_OR_OVERWRITE | > Xapian::DB_RETRY_LOCK | Xapian::DB_BACKEND_GLASS); > + db->close(); > + delete(db); > + } > + catch(Xapian::Error e) > + { > + i_error("FTS Xapian: Can't re-create Xapian DB (%s) %s > : %s - %s",backend->boxname,backend->db,e.get_type(),e.get_error_string()); > + } > + } > + > + backend->nb_pushes=0; > + backend->max_push=0; > + > if(verbose>0) > { > - struct timeval tp; > - gettimeofday(&tp, NULL); > - long current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000; > - i_info("FTS Xapian: Committed '%s' in %ld > ms",reason,current_time - commit_time); > + i_info("FTS Xapian: Committed '%s' in %ld > ms",reason,fts_backend_xapian_current_time() - commit_time); > } > } > > @@ -689,9 +749,7 @@ > { > Xapian::WritableDatabase * dbw; > > - struct timeval tp; > - gettimeofday(&tp, NULL); > - long dt = tp.tv_sec * 1000 + tp.tv_usec / 1000; > + long dt = fts_backend_xapian_current_time(); > > try > { > @@ -747,21 +805,18 @@ > dbw->commit(); > dbw->close(); > delete(dbw); > - gettimeofday(&tp, NULL); > - dt = tp.tv_sec * 1000 + tp.tv_usec / 1000 - dt; > + > + dt = fts_backend_xapian_current_time() - dt; > i_info("FTS Xapian: Expunging '%s' done in %.2f secs",fpath,dt/1000.0); > } > > static int fts_backend_xapian_unset_box(struct xapian_fts_backend *backend) > { > - if(verbose>1) i_info("FTS Xapian: Unset box '%s' > (%s)",backend->boxname,backend->guid); > + if(verbose>0) i_info("FTS Xapian: Unset box '%s' > (%s)",backend->boxname,backend->guid); > > - struct timeval tp; > - gettimeofday(&tp, NULL); > - long commit_time = tp.tv_sec * 1000 + tp.tv_usec / 1000; > + long commit_time = fts_backend_xapian_current_time(); > > fts_backend_xapian_oldbox(backend); > - > fts_backend_xapian_release(backend,"unset_box",commit_time); > > if(backend->db != NULL) > @@ -779,19 +834,47 @@ > return 0; > } > > +static int fts_backend_xapian_set_path(struct xapian_fts_backend *backend) > +{ > + struct mail_namespace * ns = backend->backend.ns; > + if(ns->alias_for != NULL) > + { > + if(verbose>0) i_info("FTS Xapian: Switching namespace"); > + ns = ns->alias_for; > + } > + > + const char * path = mailbox_list_get_root_forced(ns->list, > MAILBOX_LIST_PATH_TYPE_INDEX); > + > + if(backend->path != NULL) i_free(backend->path); > + backend->path = i_strconcat(path, "/" XAPIAN_FILE_PREFIX, NULL); > + > + if(verbose>0) i_info("FTS Xapian: Index path = %s",backend->path); > + > + struct stat sb; > + if(!( (stat(backend->path, &sb)==0) && S_ISDIR(sb.st_mode))) > + { > + if (mailbox_list_mkdir_root(backend->backend.ns->list, > backend->path, MAILBOX_LIST_PATH_TYPE_INDEX) < 0) > + { > + i_error("FTS Xapian: can not create > '%s'",backend->path); > + return -1; > + } > + } > + return 0; > +} > + > static int fts_backend_xapian_set_box(struct xapian_fts_backend *backend, > struct mailbox *box) > { > if (box == NULL) > { > if(backend->guid != NULL) fts_backend_xapian_unset_box(backend); > - if(verbose>0) i_info("FTS Xapian: Box is empty"); > + if(verbose>1) i_info("FTS Xapian: Box is empty"); > return 0; > } > > const char * mb; > fts_mailbox_get_guid(box, &mb ); > > - if(verbose>1) i_info("FTX Xapian: Set box '%s' (%s)",box->name,mb); > + if(verbose>0) i_info("FTS Xapian: Set box '%s' (%s)",box->name,mb); > > if((mb == NULL) || (strlen(mb)<3)) > { > @@ -807,11 +890,12 @@ > > if(backend->guid != NULL) fts_backend_xapian_unset_box(backend); > > + if(fts_backend_xapian_set_path(backend)<0) return -1; > + > struct timeval tp; > long current_time; > > - gettimeofday(&tp, NULL); > - current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000; > + current_time = fts_backend_xapian_current_time(); > > backend->commit_updates = 0; > backend->commit_time = current_time; > @@ -837,7 +921,6 @@ > } > i_free(t); > > - > /* Performance calculator*/ > backend->perf_dt = current_time; > backend->perf_uid=0; > @@ -917,15 +1000,15 @@ > { > bool ok=true; > > - if(verbose>1) i_info("FTS Xapian: fts_backend_xapian_index_hdr"); > + if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_index_hdr"); > > Xapian::WritableDatabase * dbx = backend->dbw; > long p = backend->partial; > long f = backend->full; > > - if(data->length()<p) { return true; } > + if(data->length()<p) return true; > > - if(strlen(field)<1) { return true; } > + if(strlen(field)<1) return true; > > long i=0; > while((i<HDRS_NB) && (strcmp(field,hdrs_emails[i])!=0)) > @@ -1006,7 +1089,7 @@ > } > catch (std::bad_alloc& ba) > { > - i_error("FTS Xapian: Memory error '%s'",ba.what()); > + i_info("FTS Xapian: Memory too low (hdr) > '%s'",ba.what()); > ok = false; > } > } > @@ -1020,13 +1103,13 @@ > { > bool ok = true; > > - if(verbose>1) i_info("FTS Xapian: fts_backend_xapian_index_text"); > + if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_index_text"); > > Xapian::WritableDatabase * dbx = backend->dbw; > long p = backend->partial; > long f = backend->full; > > - if(data->length()<p) { return true; } > + if(data->length()<p) return true; > > XQuerySet * xq = new XQuerySet(); > > @@ -1134,7 +1217,7 @@ > } > catch (std::bad_alloc& ba) > { > - i_error("FTS Xapian: Memory error '%s'",ba.what()); > + i_info("FTS Xapian: Memory too low (text) > '%s'",ba.what()); > ok = false; > } > } > diff -Nru -w dovecot-fts-xapian-1.4.7/src/fts-backend-xapian.cpp > dovecot-fts-xapian-1.4.9a/src/fts-backend-xapian.cpp > --- dovecot-fts-xapian-1.4.7/src/fts-backend-xapian.cpp 2021-01-31 > 14:06:29.000000000 -0500 > +++ dovecot-fts-xapian-1.4.9a/src/fts-backend-xapian.cpp 2021-04-24 > 16:27:55.000000000 -0400 > @@ -6,7 +6,9 @@ > #include "fts-xapian-plugin.h" > } > #include <dirent.h> > +#include <unicode/utypes.h> > #include <unicode/unistr.h> > +#include <unicode/translit.h> > #include <sys/time.h> > > #define XAPIAN_FILE_PREFIX "xapian-indexes" > @@ -15,6 +17,7 @@ > #define XAPIAN_COMMIT_TIMEOUT 300L > #define XAPIAN_WILDCARD "wldcrd" > #define XAPIAN_EXPUNGE_HEADER 9 > +#define XAPIAN_MIN_RAM 200L > > #define HDRS_NB 11 > static const char * hdrs_emails[HDRS_NB] = { "uid", "subject", "from", "to", > "cc", "bcc", "messageid", "listid", "body", "expungeheader", "" }; > @@ -25,9 +28,8 @@ > struct xapian_fts_backend > { > struct fts_backend backend; > - char * path; > + char * path = NULL; > long partial,full; > - bool attachments; > > char * guid; > char * boxname; > @@ -45,12 +47,16 @@ > long perf_nb; > long perf_uid; > long perf_dt; > + > + long nb_pushes; > + long max_push; > }; > > struct xapian_fts_backend_update_context > { > struct fts_backend_update_context ctx; > char * tbi_field=NULL; > + bool isattachment=false; > bool tbi_isfield; > uint32_t tbi_uid=0; > }; > @@ -69,10 +75,10 @@ > > static int fts_backend_xapian_init(struct fts_backend *_backend, const char > **error_r) > { > - if(verbose>0) i_info("fts_backend_xapian_init"); > + struct xapian_fts_backend *backend = (struct xapian_fts_backend > *)_backend; > + > + if(verbose>0) i_info("fts_backend_xapian_init : %s",_backend->name); > > - struct xapian_fts_backend *backend = > - (struct xapian_fts_backend *)_backend; > const char *const *tmp, *env; > long len; > > @@ -83,11 +89,13 @@ > backend->path = NULL; > backend->old_guid = NULL; > backend->old_boxname = NULL; > - backend->attachments = false; > verbose = 0; > backend->partial = 0; > backend->full = 0; > > + backend->nb_pushes=0; > + backend->max_push=0; > + > env = mail_user_plugin_getenv(_backend->ns->user, "fts_xapian"); > if (env == NULL) > { > @@ -114,7 +122,7 @@ > } > else if (strncmp(*tmp,"attachments=",12)==0) > { > - if(atol(*tmp + 12)>0) backend->attachments=true; > + // Legacy > } > else > { > @@ -144,28 +152,16 @@ > return -1; > } > > - const char * path = mailbox_list_get_root_forced(_backend->ns->list, > MAILBOX_LIST_PATH_TYPE_INDEX); > - backend->path = i_strconcat(path, "/" XAPIAN_FILE_PREFIX, NULL); > - > - struct stat sb; > - if(!( (stat(backend->path, &sb)==0) && S_ISDIR(sb.st_mode))) > - { > - if (mailbox_list_mkdir_root(backend->backend.ns->list, > backend->path, MAILBOX_LIST_PATH_TYPE_INDEX) < 0) > - { > - i_error("FTS Xapian: can not create > '%s'",backend->path); > - return -1; > - } > - } > + if(fts_backend_xapian_set_path(backend)<0) return -1; > > - if(verbose>0) i_info("FTS Xapian: Starting with partial=%ld full=%ld > attachments=%d > verbose=%d",backend->partial,backend->full,backend->attachments,verbose); > + if(verbose>0) i_info("FTS Xapian: Starting with partial=%ld full=%ld > verbose=%d",backend->partial,backend->full,verbose); > > return 0; > } > > static void fts_backend_xapian_deinit(struct fts_backend *_backend) > { > - struct xapian_fts_backend *backend = > - (struct xapian_fts_backend *)_backend; > + struct xapian_fts_backend *backend = (struct xapian_fts_backend > *)_backend; > > if(verbose>0) i_info("FTS Xapian: Deinit %s)",backend->path); > > @@ -188,8 +184,7 @@ > { > if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_get_last_uid"); > > - struct xapian_fts_backend *backend = > - (struct xapian_fts_backend *)_backend; > + struct xapian_fts_backend *backend = (struct xapian_fts_backend > *)_backend; > > *last_uid_r = 0; > > @@ -242,18 +237,12 @@ > > static int fts_backend_xapian_update_deinit(struct > fts_backend_update_context *_ctx) > { > - struct xapian_fts_backend_update_context *ctx = > - (struct xapian_fts_backend_update_context *)_ctx; > - struct xapian_fts_backend *backend = > - (struct xapian_fts_backend *)ctx->ctx.backend; > + struct xapian_fts_backend_update_context *ctx = (struct > xapian_fts_backend_update_context *)_ctx; > + struct xapian_fts_backend *backend = (struct xapian_fts_backend > *)ctx->ctx.backend; > > if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_update_deinit > (%s)",backend->path); > > - struct timeval tp; > - gettimeofday(&tp, NULL); > - long current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000; > - > - fts_backend_xapian_release(backend,"update_deinit", current_time); > + fts_backend_xapian_release(backend,"update_deinit",0); > > i_free(ctx); > > @@ -264,10 +253,8 @@ > { > if(verbose>0) i_info("FTS Xapian: > fts_backend_xapian_update_set_mailbox"); > > - struct xapian_fts_backend_update_context *ctx = > - (struct xapian_fts_backend_update_context *)_ctx; > - struct xapian_fts_backend *backend = > - (struct xapian_fts_backend *)ctx->ctx.backend; > + struct xapian_fts_backend_update_context *ctx = (struct > xapian_fts_backend_update_context *)_ctx; > + struct xapian_fts_backend *backend = (struct xapian_fts_backend > *)ctx->ctx.backend; > > fts_backend_xapian_set_box(backend, box); > } > @@ -276,10 +263,8 @@ > { > if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_update_expunge"); > > - struct xapian_fts_backend_update_context *ctx = > - (struct xapian_fts_backend_update_context *)_ctx; > - struct xapian_fts_backend *backend = > - (struct xapian_fts_backend *)ctx->ctx.backend; > + struct xapian_fts_backend_update_context *ctx = (struct > xapian_fts_backend_update_context *)_ctx; > + struct xapian_fts_backend *backend = (struct xapian_fts_backend > *)ctx->ctx.backend; > > if(!fts_backend_xapian_check_access(backend)) > { > @@ -332,11 +317,9 @@ > { > if(verbose>1) i_info("FTS Xapian: > fts_backend_xapian_update_set_build_key"); > > - struct xapian_fts_backend_update_context *ctx = > - (struct xapian_fts_backend_update_context *)_ctx; > + struct xapian_fts_backend_update_context *ctx = (struct > xapian_fts_backend_update_context *)_ctx; > > - struct xapian_fts_backend *backend = > - (struct xapian_fts_backend *)ctx->ctx.backend; > + struct xapian_fts_backend *backend = (struct xapian_fts_backend > *)ctx->ctx.backend; > > ctx->tbi_isfield=false; > ctx->tbi_uid=0; > @@ -364,9 +347,7 @@ > if((backend->perf_nb - backend->perf_pt)>=200) > { > backend->perf_pt = backend->perf_nb; > - struct timeval tp; > - gettimeofday(&tp, NULL); > - long dt = tp.tv_sec * 1000 + tp.tv_usec / 1000 - > backend->perf_dt; > + long dt = fts_backend_xapian_current_time() - backend->perf_dt; > double r=0; > if(dt>0) > { > @@ -384,19 +365,27 @@ > if(verbose>1) i_info("FTS Xapian: New part > (Header=%s,Type=%s,Disposition=%s)",field,type,disposition); > > // Verify content-type > - if((type != NULL) && (strncmp(type,"text",4)!=0)) > + > + if(key->type == FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY) > { > - if(verbose>0) i_info("FTS Xapian: Skipping part of type > '%s'",type); > + if(verbose>0) i_info("FTS Xapian: Skipping binary part of type > '%s'",type); > return FALSE; > } > > - // Verify content-disposition > - if((disposition != NULL) && (!backend->attachments) && > ((strstr(disposition,"filename=")!=NULL) || > (strstr(disposition,"attachment")!=NULL))) > + if((type != NULL) && (strncmp(type,"text",4)!=0) && > ((disposition==NULL) || ((strstr(disposition,"filename=")==NULL) && > (strstr(disposition,"attachment")==NULL)))) > { > - if(verbose>0) i_info("FTS Xapian: Skipping part of type '%s' > and disposition '%s'",type,disposition); > + if(verbose>0) i_info("FTS Xapian: Non-binary & non-text part of > type '%s'",type); > return FALSE; > } > > + // Verify content-disposition > + ctx->isattachment=false; > + if((disposition != NULL) && ((strstr(disposition,"filename=")!=NULL) || > (strstr(disposition,"attachment")!=NULL))) > + { > + if(verbose>0) i_info("FTS Xapian: Found part as attachment of > type '%s' and disposition '%s'",type,disposition); > + ctx->isattachment=true; > + } > + > // Fill-in field > if(field==NULL) > { > @@ -422,7 +411,7 @@ > } > if(i>=HDRS_NB) > { > - if(verbose>1) i_info("FTS Xapian: Unknown header (indexing) > '%s'",ctx->tbi_field); > + if(verbose>1) i_info("FTS Xapian: Unknown header '%s' of > part",ctx->tbi_field); > i_free(ctx->tbi_field); > ctx->tbi_field=NULL; > return FALSE; > @@ -447,10 +436,9 @@ > > static void fts_backend_xapian_update_unset_build_key(struct > fts_backend_update_context *_ctx) > { > - if(verbose>1) i_info("FTS Xapian: > fts_backend_xapian_update_unset_build_key"); > + if(verbose>0) i_info("FTS Xapian: > fts_backend_xapian_update_unset_build_key"); > > - struct xapian_fts_backend_update_context *ctx = > - (struct xapian_fts_backend_update_context *)_ctx; > + struct xapian_fts_backend_update_context *ctx = (struct > xapian_fts_backend_update_context *)_ctx; > > if(ctx->tbi_field!=NULL) > { > @@ -464,26 +452,32 @@ > { > if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_refresh"); > > - struct xapian_fts_backend *backend = > - (struct xapian_fts_backend *) _backend; > + struct xapian_fts_backend *backend = (struct xapian_fts_backend *) > _backend; > > - struct timeval tp; > - gettimeofday(&tp, NULL); > - long current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000; > - > - fts_backend_xapian_release(backend,"refresh", current_time); > + fts_backend_xapian_release(backend,"refresh", 0); > > return 0; > } > > static int fts_backend_xapian_update_build_more(struct > fts_backend_update_context *_ctx, const unsigned char *data, size_t size) > { > - if(verbose>1) i_info("FTS Xapian: > fts_backend_xapian_update_build_more"); > + struct xapian_fts_backend_update_context *ctx = (struct > xapian_fts_backend_update_context *)_ctx; > + struct xapian_fts_backend *backend = (struct xapian_fts_backend *) > ctx->ctx.backend; > > - struct xapian_fts_backend_update_context *ctx = > - (struct xapian_fts_backend_update_context *)_ctx; > - struct xapian_fts_backend *backend = > - (struct xapian_fts_backend *) ctx->ctx.backend; > + if(verbose>1) > + { > + if(ctx->isattachment) > + { > + char * t = i_strdup("NODATA"); > + if(data != NULL) { i_free(t); t = i_strndup(data,40); } > + i_info("FTS Xapian: Indexing part as attachment (data > like '%s')",t); > + i_free(t); > + } > + else > + { > + i_info("FTS Xapian: Indexing part as text"); > + } > + } > > if(ctx->tbi_uid<1) return 0; > > @@ -499,13 +493,10 @@ > return -1; > } > > - struct timeval tp; > - gettimeofday(&tp, NULL); > - long current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000; > - > - if(!fts_backend_xapian_test_memory()) > + if(!fts_backend_xapian_test_memory(backend,d2.length())) > { > - fts_backend_xapian_release(backend,"Low memory indexing", > current_time); > + if(verbose>0) i_info("FTS Xapian: Warning Low memory"); > + fts_backend_xapian_release(backend,"Low memory indexing", 0); > if(!fts_backend_xapian_check_access(backend)) > { > i_error("FTS Xapian: Buildmore: Can not open db (2)"); > @@ -518,16 +509,41 @@ > if(ctx->tbi_isfield) > { > > ok=fts_backend_xapian_index_hdr(backend,ctx->tbi_uid,ctx->tbi_field, &d2); > + if(!ok) > + { > + if(verbose>0) i_info("FTS Xapian: Flushing memory and > retrying"); > + fts_backend_xapian_release(backend,"Flushing memory > indexing hdr", 0); > + if(fts_backend_xapian_check_access(backend)) > + { > + > ok=fts_backend_xapian_index_hdr(backend,ctx->tbi_uid,ctx->tbi_field, &d2); > + } > + else > + { > + i_error("FTS Xapian: Buildmore: Can not open db > (3)"); > + } > + } > } > else > { > > ok=fts_backend_xapian_index_text(backend,ctx->tbi_uid,ctx->tbi_field, &d2); > + if(!ok) > + { > + if(verbose>0) i_info("FTS Xapian: Flushing memory and > retrying"); > + fts_backend_xapian_release(backend,"Flushing memory > indexing text", 0); > + if(fts_backend_xapian_check_access(backend)) > + { > + > ok=fts_backend_xapian_index_text(backend,ctx->tbi_uid,ctx->tbi_field, &d2); > + } > + else > + { > + i_error("FTS Xapian: Buildmore: Can not open db > (4)"); > + } > + } > } > > backend->commit_updates++; > > - gettimeofday(&tp, NULL); > - current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000; > + long current_time = fts_backend_xapian_current_time(); > > if( (!ok) || (backend->commit_updates>XAPIAN_COMMIT_ENTRIES) || > ((current_time - backend->commit_time) > XAPIAN_COMMIT_TIMEOUT*1000) ) > { > @@ -541,8 +557,7 @@ > > static int fts_backend_xapian_optimize(struct fts_backend *_backend) > { > - struct xapian_fts_backend *backend = > - (struct xapian_fts_backend *) _backend; > + struct xapian_fts_backend *backend = (struct xapian_fts_backend *) > _backend; > > i_info("FTS Xapian: fts_backend_xapian_optimize '%s'",backend->path); > > @@ -580,8 +595,7 @@ > { > if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_rescan"); > > - struct xapian_fts_backend *backend = > - (struct xapian_fts_backend *) _backend; > + struct xapian_fts_backend *backend = (struct xapian_fts_backend *) > _backend; > > struct stat sb; > if(!( (stat(backend->path, &sb)==0) && S_ISDIR(sb.st_mode))) > @@ -631,16 +645,11 @@ > { > if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_lookup"); > > - struct xapian_fts_backend *backend = > - (struct xapian_fts_backend *) _backend; > + struct xapian_fts_backend *backend = (struct xapian_fts_backend *) > _backend; > > - if(fts_backend_xapian_set_box(backend, box)<0) > - return -1; > + if(fts_backend_xapian_set_box(backend, box)<0) return -1; > > - /* Performance calc */ > - struct timeval tp; > - gettimeofday(&tp, NULL); > - long current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000; > + long current_time = fts_backend_xapian_current_time(); > > Xapian::Database * dbr; > > @@ -671,6 +680,7 @@ > XResultSet * r=fts_backend_xapian_query(dbr,qs); > > long n=r->size; > + if(verbose>0) { i_info("FTS Xapian: QUery '%s' -> %ld > results",qs->get_string().c_str(),n); } > > i_array_init(&(result->definite_uids),r->size); > > @@ -696,9 +706,7 @@ > /* Performance calc */ > if(verbose>0) > { > - gettimeofday(&tp, NULL); > - long dt = tp.tv_sec * 1000 + tp.tv_usec / 1000 - current_time; > - i_info("FTS Xapian: %ld results in %ld ms",n,dt); > + i_info("FTS Xapian: %ld results in %ld > ms",n,fts_backend_xapian_current_time() - current_time); > } > return 0; > } > @@ -707,10 +715,8 @@ > { > if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_lookup_multi"); > > - struct xapian_fts_backend *backend = > - (struct xapian_fts_backend *) _backend; > - > ARRAY(struct fts_result) box_results; > + > struct fts_result *box_result; > int i; > > @@ -719,12 +725,22 @@ > { > box_result = array_append_space(&box_results); > box_result->box = boxes[i]; > - if(fts_backend_xapian_lookup(_backend, boxes[i], args, flags, > box_result)<1) return -1; > + if(fts_backend_xapian_lookup(_backend, boxes[i], args, flags, > box_result)<0) > + { > + void* p=&box_results; > + p_free(result->pool, p); > + return -1; > + } > } > + > + array_append_zero(&box_results); > + result->box_results = array_idx_modifiable(&box_results, 0); > + > return 0; > } > > -struct fts_backend fts_backend_xapian = { > +struct fts_backend fts_backend_xapian = > +{ > .name = "xapian", > .flags = FTS_BACKEND_FLAG_BUILD_FULL_WORDS, > .v = { > @@ -748,4 +764,3 @@ > NULL > } > }; > - > diff -Nru -w dovecot-fts-xapian-1.4.7/src/fts-xapian-plugin.c > dovecot-fts-xapian-1.4.9a/src/fts-xapian-plugin.c > --- dovecot-fts-xapian-1.4.7/src/fts-xapian-plugin.c 2021-01-31 > 14:06:29.000000000 -0500 > +++ dovecot-fts-xapian-1.4.9a/src/fts-xapian-plugin.c 2021-04-24 > 16:27:55.000000000 -0400 > @@ -7,13 +7,11 @@ > > void fts_xapian_plugin_init(struct module *module ATTR_UNUSED) > { > - //i_warning("fts_xapian_plugin_init"); > fts_backend_register(&fts_backend_xapian); > } > > void fts_xapian_plugin_deinit(void) > { > - //i_warning("fts_xapian_plugin_deinit"); > fts_backend_unregister(fts_backend_xapian.name); > } > > diff -Nru -w dovecot-fts-xapian-1.4.7/src/fts-xapian-plugin.h > dovecot-fts-xapian-1.4.9a/src/fts-xapian-plugin.h > --- dovecot-fts-xapian-1.4.7/src/fts-xapian-plugin.h 2021-01-31 > 14:06:29.000000000 -0500 > +++ dovecot-fts-xapian-1.4.9a/src/fts-xapian-plugin.h 2021-04-24 > 16:27:55.000000000 -0400 > @@ -5,13 +5,12 @@ > #define FTS_XAPIAN_PLUGIN_H > > #include "lib.h" > -#include "mail-storage-private.h" > -#include "mailbox-list-private.h" > -#include "mail-search.h" > +#include "fts-api-private.h" > #include "fts-api.h" > -#include "module-context.h" > +#include "mail-search.h" > +#include "mail-storage-private.h" > #include "mail-user.h" > -#include "fts-api-private.h" > +#include "module-context.h" > #include "restrict-process-size.h" > > extern const char *fts_xapian_plugin_dependencies[]; -- Sebastian Ramacher
signature.asc
Description: PGP signature