i18npool/qa/cppunit/test_textsearch.cxx | 88 ++++++++++++++++++++++++++++++++ i18npool/source/search/textsearch.cxx | 40 +++++++++++--- 2 files changed, 120 insertions(+), 8 deletions(-)
New commits: commit 3a02490e1a04c32e18ce5bad5f3c3cb70501a7a4 Author: László Németh <nem...@numbertext.org> AuthorDate: Wed Jun 26 17:53:21 2024 +0200 Commit: László Németh <nem...@numbertext.org> CommitDate: Thu Jun 27 10:22:11 2024 +0200 tdf#138258 i18npool: allow ASCII double quote to match typographic quote Similar to the straight (typewriter or ASCII) apostrophe, straight double quotation mark (") matches its typographic variants now, like other word processors do. Note: regex search doesn't use this matching, similar to the apostrophe search. Follow-up to commit d40f2d02df26e216f367b5da3f9546b73f250469 "tdf#117643 Writer: fix apostrophe search regression". Change-Id: If6a3ee00750828583cd0cfc4aa7f7b656ea9bd1e Reviewed-on: https://gerrit.libreoffice.org/c/core/+/169605 Reviewed-by: László Németh <nem...@numbertext.org> Tested-by: Jenkins diff --git a/i18npool/qa/cppunit/test_textsearch.cxx b/i18npool/qa/cppunit/test_textsearch.cxx index 269380a90910..38cc099b7c95 100644 --- a/i18npool/qa/cppunit/test_textsearch.cxx +++ b/i18npool/qa/cppunit/test_textsearch.cxx @@ -38,6 +38,7 @@ public: void testSearches(); void testWildcardSearch(); void testApostropheSearch(); + void testQuotationMarkSearch(); void testTdf138410(); CPPUNIT_TEST_SUITE(TestTextSearch); @@ -45,6 +46,7 @@ public: CPPUNIT_TEST(testSearches); CPPUNIT_TEST(testWildcardSearch); CPPUNIT_TEST(testApostropheSearch); + CPPUNIT_TEST(testQuotationMarkSearch); CPPUNIT_TEST(testTdf138410); CPPUNIT_TEST_SUITE_END(); private: @@ -404,6 +406,92 @@ void TestTextSearch::testApostropheSearch() CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); } +void TestTextSearch::testQuotationMarkSearch() +{ + // A) find typographic quotation marks also by using ASCII ones + OUString str( u"“x”, „y‟, ‘z’, ‚a‛"_ustr ); + sal_Int32 startPos = 0, endPos = str.getLength(); + + // set options + util::SearchOptions aOptions; + aOptions.algorithmType = util::SearchAlgorithms_ABSOLUTE; + aOptions.searchFlag = util::SearchFlags::ALL_IGNORE_CASE; + aOptions.searchString = "\"x\""; + aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_CASE + | TransliterationFlags::IGNORE_WIDTH); + m_xSearch->setOptions( aOptions ); + + util::SearchResult aRes; + + // search forward + aRes = m_xSearch->searchForward( str, startPos, endPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(0), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] ); + + // search backwards + aRes = m_xSearch->searchBackward( str, endPos, startPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(0), aRes.endOffset[0] ); + + // B) + aOptions.searchString = "\"y\""; + m_xSearch->setOptions( aOptions ); + + // search forward + aRes = m_xSearch->searchForward( str, startPos, endPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(5), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(8), aRes.endOffset[0] ); + + // search backwards + aRes = m_xSearch->searchBackward( str, endPos, startPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(8), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(5), aRes.endOffset[0] ); + + // C) + aOptions.searchString = "'z'"; + m_xSearch->setOptions( aOptions ); + + // search forward + aRes = m_xSearch->searchForward( str, startPos, endPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(10), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(13), aRes.endOffset[0] ); + + // search backwards + aRes = m_xSearch->searchBackward( str, endPos, startPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(13), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(10), aRes.endOffset[0] ); + + // D) + aOptions.searchString = "'a'"; + m_xSearch->setOptions( aOptions ); + + // search forward + aRes = m_xSearch->searchForward( str, startPos, endPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(15), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(18), aRes.endOffset[0] ); + + // search backwards + aRes = m_xSearch->searchBackward( str, endPos, startPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(18), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(15), aRes.endOffset[0] ); +} + void TestTextSearch::testTdf138410() { OUString str(u"\u0643\u064f\u062a\u064f\u0628 \u0643\u062a\u0628"_ustr); diff --git a/i18npool/source/search/textsearch.cxx b/i18npool/source/search/textsearch.cxx index 816e162c1e6e..dbb49f494781 100644 --- a/i18npool/source/search/textsearch.cxx +++ b/i18npool/source/search/textsearch.cxx @@ -93,6 +93,30 @@ bool isSimpleRegexTrans( TransliterationFlags n ) { return bool(maskSimpleRegexTrans(n)); } + +bool isReplacePunctuation( OUString &rStr ) +{ + return rStr.indexOf(u'\u2018') > -1 || + rStr.indexOf(u'\u2019') > -1 || + rStr.indexOf(u'\u201A') > -1 || + rStr.indexOf(u'\u201B') > -1 || + rStr.indexOf(u'\u201C') > -1 || + rStr.indexOf(u'\u201D') > -1 || + rStr.indexOf(u'\u201E') > -1 || + rStr.indexOf(u'\u201F') > -1; +} + +OUString replacePunctuation( OUString &rStr ) +{ + return rStr.replace(u'\u2018', '\'') + .replace(u'\u2019', '\'') + .replace(u'\u201A', '\'') + .replace(u'\u201B', '\'') + .replace(u'\u201C', '"') + .replace(u'\u201D', '"') + .replace(u'\u201E', '"') + .replace(u'\u201F', '"'); +} }; TextSearch::TextSearch(const Reference < XComponentContext > & rxContext) @@ -139,10 +163,10 @@ void TextSearch::setOptions2( const SearchOptions2& rOptions ) // match is not case-altered, leave case-(in)sensitive to regex engine. transliterateFlags &= ~TransliterationFlags::IGNORE_CASE; } - else if ( aSrchPara.searchString.indexOf('\'') > - 1 ) + else if ( aSrchPara.searchString.indexOf('\'') > - 1 || aSrchPara.searchString.indexOf('"') > - 1 ) { bSearchApostrophe = true; - bReplaceApostrophe = aSrchPara.searchString.indexOf(u'\u2019') > -1; + bReplaceApostrophe = isReplacePunctuation(aSrchPara.searchString); } // Create Transliteration class @@ -215,7 +239,7 @@ void TextSearch::setOptions2( const SearchOptions2& rOptions ) } if ( bReplaceApostrophe ) - sSrchStr = sSrchStr.replace(u'\u2019', '\''); + sSrchStr = replacePunctuation(sSrchStr); // Take the new SearchOptions2::AlgorithmType2 field and ignore // SearchOptions::algorithmType @@ -308,7 +332,7 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta // in non-regex mode, allow searching typographical apostrophe with the ASCII one // to avoid regression after using automatic conversion to U+2019 during typing in Writer - bool bReplaceApostrophe = bSearchApostrophe && in_str.indexOf(u'\u2019') > -1; + bool bReplaceApostrophe = bSearchApostrophe && isReplacePunctuation(in_str); bUsePrimarySrchStr = true; @@ -340,7 +364,7 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta in_str = xTranslit->transliterate(searchStr, nInStartPos, nInEndPos - nInStartPos, offset); if ( bReplaceApostrophe ) - in_str = in_str.replace(u'\u2019', '\''); + in_str = replacePunctuation(in_str); // JP 20.6.2001: also the start and end positions must be corrected! sal_Int32 newStartPos = @@ -447,7 +471,7 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st // in non-regex mode, allow searching typographical apostrophe with the ASCII one // to avoid regression after using automatic conversion to U+2019 during typing in Writer - bool bReplaceApostrophe = bSearchApostrophe && in_str.indexOf(u'\u2019') > -1; + bool bReplaceApostrophe = bSearchApostrophe && isReplacePunctuation(in_str); bUsePrimarySrchStr = true; @@ -458,7 +482,7 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st in_str = xTranslit->transliterate( searchStr, endPos, startPos - endPos, offset ); if ( bReplaceApostrophe ) - in_str = in_str.replace(u'\u2019', '\''); + in_str = replacePunctuation(in_str); // JP 20.6.2001: also the start and end positions must be corrected! sal_Int32 const newStartPos = (startPos < searchStr.getLength()) @@ -508,7 +532,7 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st else { if ( bReplaceApostrophe ) - in_str = in_str.replace(u'\u2019', '\''); + in_str = replacePunctuation(in_str); sres = (this->*fnBackward)( in_str, startPos, endPos ); }