i18npool/source/search/textsearch.cxx | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-)
New commits: commit 8c26876fea085a1bc847abba63dffa97a9499c1d Author: Herbert Dürr <h...@apache.org> Date: Fri Sep 7 12:27:55 2012 +0000 i#120598 better emulation of regexp word-start and word-end operators The emulation of the word-start and word-end operators provided the previous regexp engine can be approximated much better by using the ICU-regexp exngines powerful look-around feature. Patch-by: Herbert Duerr Found-by: ldgold...@yahoo.com (cherry picked from commit ec7ef30693f10315ce80a8f5d7325a0e40855e66) Change-Id: If375d6d5bb93b3873f657673f7581f0884b3b35e diff --git a/i18npool/source/search/textsearch.cxx b/i18npool/source/search/textsearch.cxx index 075cd21..072c0be 100644 --- a/i18npool/source/search/textsearch.cxx +++ b/i18npool/source/search/textsearch.cxx @@ -681,13 +681,20 @@ void TextSearch::RESrchPrepare( const ::com::sun::star::util::SearchOptions& rOp IcuUniString aIcuSearchPatStr( (const UChar*)rPatternStr.getStr(), rPatternStr.getLength()); #ifndef DISABLE_WORDBOUND_EMULATION // for conveniance specific syntax elements of the old regex engine are emulated - // by using regular word boundary matching \b to replace \< and \> - static const IcuUniString aChevronPattern( "\\\\<|\\\\>", -1, IcuUniString::kInvariant); - static const IcuUniString aChevronReplace( "\\\\b", -1, IcuUniString::kInvariant); - static RegexMatcher aChevronMatcher( aChevronPattern, 0, nIcuErr); - aChevronMatcher.reset( aIcuSearchPatStr); - aIcuSearchPatStr = aChevronMatcher.replaceAll( aChevronReplace, nIcuErr); - aChevronMatcher.reset(); + // - by replacing \< with "word-break followed by a look-ahead word-char" + static const IcuUniString aChevronPatternB( "\\\\<", -1, IcuUniString::kInvariant); + static const IcuUniString aChevronReplaceB( "\\\\b(?=\\\\w)", -1, IcuUniString::kInvariant); + static RegexMatcher aChevronMatcherB( aChevronPatternB, 0, nIcuErr); + aChevronMatcherB.reset( aIcuSearchPatStr); + aIcuSearchPatStr = aChevronMatcherB.replaceAll( aChevronReplaceB, nIcuErr); + aChevronMatcherB.reset(); + // - by replacing \> with "look-behind word-char followed by a word-break" + static const IcuUniString aChevronPatternE( "\\\\>", -1, IcuUniString::kInvariant); + static const IcuUniString aChevronReplaceE( "(?<=\\\\w)\\\\b", -1, IcuUniString::kInvariant); + static RegexMatcher aChevronMatcherE( aChevronPatternE, 0, nIcuErr); + aChevronMatcherE.reset( aIcuSearchPatStr); + aIcuSearchPatStr = aChevronMatcherE.replaceAll( aChevronReplaceE, nIcuErr); + aChevronMatcherE.reset(); #endif pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr); if( nIcuErr)
_______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits