i18nutil/qa/cppunit/test_kashida.cxx | 33 +++++++++++++++++---- i18nutil/source/utility/kashida.cxx | 55 ++++++++++++++++++++++++----------- 2 files changed, 65 insertions(+), 23 deletions(-)
New commits: commit a3b0ef4088183c4a3b2ec3fef08ef91314eaef54 Author: Jonathan Clark <jonat...@libreoffice.org> AuthorDate: Tue Sep 24 16:33:36 2024 -0600 Commit: Jonathan Clark <jonat...@libreoffice.org> CommitDate: Wed Sep 25 03:54:35 2024 +0200 tdf#163105 Restore some missing kashida opportunities This change partially reverts the fix for tdf#65344, to allow more candidate kashida insertion positions. The final position fallback from the original ruleset has been restored. Instead, tdf#65344 is now fixed by explicitly prohibiting kashida under a final-position Yeh. Change-Id: I9fe8c656768777c160205ad9892f2fe916a2a926 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/173887 Tested-by: Jenkins Reviewed-by: Jonathan Clark <jonat...@libreoffice.org> diff --git a/i18nutil/qa/cppunit/test_kashida.cxx b/i18nutil/qa/cppunit/test_kashida.cxx index 99ce2a1a969c..55f943a318a4 100644 --- a/i18nutil/qa/cppunit/test_kashida.cxx +++ b/i18nutil/qa/cppunit/test_kashida.cxx @@ -22,9 +22,13 @@ class KashidaTest : public CppUnit::TestFixture { public: void testCharacteristic(); + void testFinalYeh(); + void testNoZwnjExpansion(); CPPUNIT_TEST_SUITE(KashidaTest); CPPUNIT_TEST(testCharacteristic); + CPPUNIT_TEST(testFinalYeh); + CPPUNIT_TEST(testNoZwnjExpansion); CPPUNIT_TEST_SUITE_END(); }; @@ -32,24 +36,41 @@ void KashidaTest::testCharacteristic() { // Characteristic tests for kashida candidate selection. // Uses words from sample documents. - CPPUNIT_ASSERT(!GetWordKashidaPosition(u"متن"_ustr).has_value()); - CPPUNIT_ASSERT_EQUAL(sal_Int32(3), GetWordKashidaPosition(u"فارسی"_ustr).value().nIndex); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"متن"_ustr).value().nIndex); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"فارسی"_ustr).value().nIndex); CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"با"_ustr).value().nIndex); CPPUNIT_ASSERT_EQUAL(sal_Int32(3), GetWordKashidaPosition(u"نویسه"_ustr).value().nIndex); CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"کشیده"_ustr).value().nIndex); CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"برای"_ustr).value().nIndex); - CPPUNIT_ASSERT(!GetWordKashidaPosition(u"چینش"_ustr).has_value()); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), GetWordKashidaPosition(u"چینش"_ustr).value().nIndex); CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"بهتر"_ustr).value().nIndex); - CPPUNIT_ASSERT(!GetWordKashidaPosition(u"ببببب"_ustr).has_value()); - CPPUNIT_ASSERT(!GetWordKashidaPosition(u"بپپپپ"_ustr).has_value()); + CPPUNIT_ASSERT_EQUAL(sal_Int32(3), GetWordKashidaPosition(u"ببببب"_ustr).value().nIndex); + CPPUNIT_ASSERT_EQUAL(sal_Int32(3), GetWordKashidaPosition(u"بپپپپ"_ustr).value().nIndex); CPPUNIT_ASSERT_EQUAL(sal_Int32(3), GetWordKashidaPosition(u"تطویل"_ustr).value().nIndex); - CPPUNIT_ASSERT(!GetWordKashidaPosition(u"بپ"_ustr).has_value()); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"بپ"_ustr).value().nIndex); CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"تطوی"_ustr).value().nIndex); CPPUNIT_ASSERT_EQUAL(sal_Int32(3), GetWordKashidaPosition(u"تحویل"_ustr).value().nIndex); CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"تشویل"_ustr).value().nIndex); CPPUNIT_ASSERT_EQUAL(sal_Int32(3), GetWordKashidaPosition(u"تمثیل"_ustr).value().nIndex); } +// tdf#65344: Do not insert kashida before a final Yeh +void KashidaTest::testFinalYeh() +{ + CPPUNIT_ASSERT(!GetWordKashidaPosition(u"نیمِي"_ustr).has_value()); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"كرسي"_ustr).value().nIndex); +} + +// #i98410#: Do not insert kashida under a ZWNJ +void KashidaTest::testNoZwnjExpansion() +{ + CPPUNIT_ASSERT_EQUAL(sal_Int32(3), GetWordKashidaPosition(u"نویسه"_ustr).value().nIndex); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"نویس\u200Cه"_ustr).value().nIndex); + + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"متن"_ustr).value().nIndex); + CPPUNIT_ASSERT(!GetWordKashidaPosition(u"مت\u200Cن"_ustr).has_value()); +} + CPPUNIT_TEST_SUITE_REGISTRATION(KashidaTest); } diff --git a/i18nutil/source/utility/kashida.cxx b/i18nutil/source/utility/kashida.cxx index dbf2b818abf1..c4aea0069dd5 100644 --- a/i18nutil/source/utility/kashida.cxx +++ b/i18nutil/source/utility/kashida.cxx @@ -41,6 +41,12 @@ namespace 7. before the final form of other characters that can be connected. */ +/* + The LibreOffice implementation modifies the above rules, as follows: + + - tdf#65344: Kashida must not be inserted before the final form of Yeh. +*/ + #define IS_JOINING_GROUP(c, g) (u_getIntPropertyValue((c), UCHAR_JOINING_GROUP) == U_JG_##g) #define isAinChar(c) IS_JOINING_GROUP((c), AIN) #define isAlefChar(c) IS_JOINING_GROUP((c), ALEF) @@ -145,6 +151,26 @@ std::optional<i18nutil::KashidaPosition> i18nutil::GetWordKashidaPosition(const --nWordLen; } + auto fnTryInsertBefore = [&rWord, &nIdx, &nPrevIdx, &nKashidaPos, &nPriorityLevel, + &nWordLen](sal_Int32 nNewPriority) { + // Exclusions: + + // #i98410#: prevent ZWNJ expansion + if (rWord[nPrevIdx] == 0x200C || rWord[nPrevIdx + 1] == 0x200C) + { + return; + } + + // tdf#65344: Do not insert kashida before a final Yeh + if (nIdx == (nWordLen - 1) && isYehChar(rWord[nIdx])) + { + return; + } + + nKashidaPos = nPrevIdx; + nPriorityLevel = nNewPriority; + }; + while (nIdx < nWordLen) { cCh = rWord[nIdx]; @@ -153,19 +179,18 @@ std::optional<i18nutil::KashidaPosition> i18nutil::GetWordKashidaPosition(const // after user inserted kashida if (0x640 == cCh) { + // Always respect a user-inserted kashida nKashidaPos = nIdx; nPriorityLevel = 0; } // 2. Priority: // after a Seen or Sad - if (nPriorityLevel >= 1 && nIdx < nWordLen - 1) + if (nPriorityLevel >= 1) { - if (isSeenOrSadChar(cCh) - && (rWord[nIdx + 1] != 0x200C)) // #i98410#: prevent ZWNJ expansion + if (isSeenOrSadChar(cPrevCh)) { - nKashidaPos = nIdx; - nPriorityLevel = 1; + fnTryInsertBefore(1); } } @@ -182,8 +207,7 @@ std::optional<i18nutil::KashidaPosition> i18nutil::GetWordKashidaPosition(const // check if character is connectable to previous character, if (CanConnectToPrev(cCh, cPrevCh)) { - nKashidaPos = nPrevIdx; - nPriorityLevel = 2; + fnTryInsertBefore(2); } } } @@ -202,8 +226,7 @@ std::optional<i18nutil::KashidaPosition> i18nutil::GetWordKashidaPosition(const // check if character is connectable to previous character, if (CanConnectToPrev(cCh, cPrevCh)) { - nKashidaPos = nPrevIdx; - nPriorityLevel = 3; + fnTryInsertBefore(3); } } } @@ -222,8 +245,7 @@ std::optional<i18nutil::KashidaPosition> i18nutil::GetWordKashidaPosition(const // check if character is connectable to previous character, if (CanConnectToPrev(cCh, cPrevCh)) { - nKashidaPos = nPrevIdx; - nPriorityLevel = 4; + fnTryInsertBefore(4); } } } @@ -242,8 +264,7 @@ std::optional<i18nutil::KashidaPosition> i18nutil::GetWordKashidaPosition(const // check if character is connectable to previous character, if (CanConnectToPrev(cCh, cPrevCh)) { - nKashidaPos = nPrevIdx; - nPriorityLevel = 5; + fnTryInsertBefore(5); } } } @@ -251,15 +272,15 @@ std::optional<i18nutil::KashidaPosition> i18nutil::GetWordKashidaPosition(const // other connecting possibilities if (nPriorityLevel >= 6 && nIdx > 0) { - // Reh, Zain - if (isRehChar(cCh)) + // Reh, Zain (right joining) final form may appear in the middle of word + // All others except Yeh - only at end of word + if (isRehChar(cCh) || (0x60C <= cCh && 0x6FE >= cCh && nIdx == nWordLen - 1)) { SAL_WARN_IF(0 == cPrevCh, "i18n", "No previous character"); // check if character is connectable to previous character, if (CanConnectToPrev(cCh, cPrevCh)) { - nKashidaPos = nPrevIdx; - nPriorityLevel = 6; + fnTryInsertBefore(6); } } }