i18nutil/qa/cppunit/test_kashida.cxx |   33 +++++++++++++++++----
 i18nutil/source/utility/kashida.cxx  |   55 ++++++++++++++++++++++++-----------
 2 files changed, 65 insertions(+), 23 deletions(-)

New commits:
commit a3b0ef4088183c4a3b2ec3fef08ef91314eaef54
Author:     Jonathan Clark <jonat...@libreoffice.org>
AuthorDate: Tue Sep 24 16:33:36 2024 -0600
Commit:     Jonathan Clark <jonat...@libreoffice.org>
CommitDate: Wed Sep 25 03:54:35 2024 +0200

    tdf#163105 Restore some missing kashida opportunities
    
    This change partially reverts the fix for tdf#65344, to allow more
    candidate kashida insertion positions.
    
    The final position fallback from the original ruleset has been restored.
    Instead, tdf#65344 is now fixed by explicitly prohibiting kashida under
    a final-position Yeh.
    
    Change-Id: I9fe8c656768777c160205ad9892f2fe916a2a926
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/173887
    Tested-by: Jenkins
    Reviewed-by: Jonathan Clark <jonat...@libreoffice.org>

diff --git a/i18nutil/qa/cppunit/test_kashida.cxx 
b/i18nutil/qa/cppunit/test_kashida.cxx
index 99ce2a1a969c..55f943a318a4 100644
--- a/i18nutil/qa/cppunit/test_kashida.cxx
+++ b/i18nutil/qa/cppunit/test_kashida.cxx
@@ -22,9 +22,13 @@ class KashidaTest : public CppUnit::TestFixture
 {
 public:
     void testCharacteristic();
+    void testFinalYeh();
+    void testNoZwnjExpansion();
 
     CPPUNIT_TEST_SUITE(KashidaTest);
     CPPUNIT_TEST(testCharacteristic);
+    CPPUNIT_TEST(testFinalYeh);
+    CPPUNIT_TEST(testNoZwnjExpansion);
     CPPUNIT_TEST_SUITE_END();
 };
 
@@ -32,24 +36,41 @@ void KashidaTest::testCharacteristic()
 {
     // Characteristic tests for kashida candidate selection.
     // Uses words from sample documents.
-    CPPUNIT_ASSERT(!GetWordKashidaPosition(u"متن"_ustr).has_value());
-    CPPUNIT_ASSERT_EQUAL(sal_Int32(3), 
GetWordKashidaPosition(u"فارسی"_ustr).value().nIndex);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), 
GetWordKashidaPosition(u"متن"_ustr).value().nIndex);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), 
GetWordKashidaPosition(u"فارسی"_ustr).value().nIndex);
     CPPUNIT_ASSERT_EQUAL(sal_Int32(0), 
GetWordKashidaPosition(u"با"_ustr).value().nIndex);
     CPPUNIT_ASSERT_EQUAL(sal_Int32(3), 
GetWordKashidaPosition(u"نویسه"_ustr).value().nIndex);
     CPPUNIT_ASSERT_EQUAL(sal_Int32(1), 
GetWordKashidaPosition(u"کشیده"_ustr).value().nIndex);
     CPPUNIT_ASSERT_EQUAL(sal_Int32(0), 
GetWordKashidaPosition(u"برای"_ustr).value().nIndex);
-    CPPUNIT_ASSERT(!GetWordKashidaPosition(u"چینش"_ustr).has_value());
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2), 
GetWordKashidaPosition(u"چینش"_ustr).value().nIndex);
     CPPUNIT_ASSERT_EQUAL(sal_Int32(1), 
GetWordKashidaPosition(u"بهتر"_ustr).value().nIndex);
-    CPPUNIT_ASSERT(!GetWordKashidaPosition(u"ببببب"_ustr).has_value());
-    CPPUNIT_ASSERT(!GetWordKashidaPosition(u"بپپپپ"_ustr).has_value());
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(3), 
GetWordKashidaPosition(u"ببببب"_ustr).value().nIndex);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(3), 
GetWordKashidaPosition(u"بپپپپ"_ustr).value().nIndex);
     CPPUNIT_ASSERT_EQUAL(sal_Int32(3), 
GetWordKashidaPosition(u"تطویل"_ustr).value().nIndex);
-    CPPUNIT_ASSERT(!GetWordKashidaPosition(u"بپ"_ustr).has_value());
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), 
GetWordKashidaPosition(u"بپ"_ustr).value().nIndex);
     CPPUNIT_ASSERT_EQUAL(sal_Int32(1), 
GetWordKashidaPosition(u"تطوی"_ustr).value().nIndex);
     CPPUNIT_ASSERT_EQUAL(sal_Int32(3), 
GetWordKashidaPosition(u"تحویل"_ustr).value().nIndex);
     CPPUNIT_ASSERT_EQUAL(sal_Int32(1), 
GetWordKashidaPosition(u"تشویل"_ustr).value().nIndex);
     CPPUNIT_ASSERT_EQUAL(sal_Int32(3), 
GetWordKashidaPosition(u"تمثیل"_ustr).value().nIndex);
 }
 
+// tdf#65344: Do not insert kashida before a final Yeh
+void KashidaTest::testFinalYeh()
+{
+    CPPUNIT_ASSERT(!GetWordKashidaPosition(u"نیمِي"_ustr).has_value());
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), 
GetWordKashidaPosition(u"كرسي"_ustr).value().nIndex);
+}
+
+// #i98410#: Do not insert kashida under a ZWNJ
+void KashidaTest::testNoZwnjExpansion()
+{
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(3), 
GetWordKashidaPosition(u"نویسه"_ustr).value().nIndex);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), 
GetWordKashidaPosition(u"نویس\u200Cه"_ustr).value().nIndex);
+
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), 
GetWordKashidaPosition(u"متن"_ustr).value().nIndex);
+    CPPUNIT_ASSERT(!GetWordKashidaPosition(u"مت\u200Cن"_ustr).has_value());
+}
+
 CPPUNIT_TEST_SUITE_REGISTRATION(KashidaTest);
 }
 
diff --git a/i18nutil/source/utility/kashida.cxx 
b/i18nutil/source/utility/kashida.cxx
index dbf2b818abf1..c4aea0069dd5 100644
--- a/i18nutil/source/utility/kashida.cxx
+++ b/i18nutil/source/utility/kashida.cxx
@@ -41,6 +41,12 @@ namespace
    7. before the final form of other characters that can be connected.
 */
 
+/*
+   The LibreOffice implementation modifies the above rules, as follows:
+
+   - tdf#65344: Kashida must not be inserted before the final form of Yeh.
+*/
+
 #define IS_JOINING_GROUP(c, g) (u_getIntPropertyValue((c), 
UCHAR_JOINING_GROUP) == U_JG_##g)
 #define isAinChar(c) IS_JOINING_GROUP((c), AIN)
 #define isAlefChar(c) IS_JOINING_GROUP((c), ALEF)
@@ -145,6 +151,26 @@ std::optional<i18nutil::KashidaPosition> 
i18nutil::GetWordKashidaPosition(const
         --nWordLen;
     }
 
+    auto fnTryInsertBefore = [&rWord, &nIdx, &nPrevIdx, &nKashidaPos, 
&nPriorityLevel,
+                              &nWordLen](sal_Int32 nNewPriority) {
+        // Exclusions:
+
+        // #i98410#: prevent ZWNJ expansion
+        if (rWord[nPrevIdx] == 0x200C || rWord[nPrevIdx + 1] == 0x200C)
+        {
+            return;
+        }
+
+        // tdf#65344: Do not insert kashida before a final Yeh
+        if (nIdx == (nWordLen - 1) && isYehChar(rWord[nIdx]))
+        {
+            return;
+        }
+
+        nKashidaPos = nPrevIdx;
+        nPriorityLevel = nNewPriority;
+    };
+
     while (nIdx < nWordLen)
     {
         cCh = rWord[nIdx];
@@ -153,19 +179,18 @@ std::optional<i18nutil::KashidaPosition> 
i18nutil::GetWordKashidaPosition(const
         // after user inserted kashida
         if (0x640 == cCh)
         {
+            // Always respect a user-inserted kashida
             nKashidaPos = nIdx;
             nPriorityLevel = 0;
         }
 
         // 2. Priority:
         // after a Seen or Sad
-        if (nPriorityLevel >= 1 && nIdx < nWordLen - 1)
+        if (nPriorityLevel >= 1)
         {
-            if (isSeenOrSadChar(cCh)
-                && (rWord[nIdx + 1] != 0x200C)) // #i98410#: prevent ZWNJ 
expansion
+            if (isSeenOrSadChar(cPrevCh))
             {
-                nKashidaPos = nIdx;
-                nPriorityLevel = 1;
+                fnTryInsertBefore(1);
             }
         }
 
@@ -182,8 +207,7 @@ std::optional<i18nutil::KashidaPosition> 
i18nutil::GetWordKashidaPosition(const
                 // check if character is connectable to previous character,
                 if (CanConnectToPrev(cCh, cPrevCh))
                 {
-                    nKashidaPos = nPrevIdx;
-                    nPriorityLevel = 2;
+                    fnTryInsertBefore(2);
                 }
             }
         }
@@ -202,8 +226,7 @@ std::optional<i18nutil::KashidaPosition> 
i18nutil::GetWordKashidaPosition(const
                 // check if character is connectable to previous character,
                 if (CanConnectToPrev(cCh, cPrevCh))
                 {
-                    nKashidaPos = nPrevIdx;
-                    nPriorityLevel = 3;
+                    fnTryInsertBefore(3);
                 }
             }
         }
@@ -222,8 +245,7 @@ std::optional<i18nutil::KashidaPosition> 
i18nutil::GetWordKashidaPosition(const
                     // check if character is connectable to previous character,
                     if (CanConnectToPrev(cCh, cPrevCh))
                     {
-                        nKashidaPos = nPrevIdx;
-                        nPriorityLevel = 4;
+                        fnTryInsertBefore(4);
                     }
                 }
             }
@@ -242,8 +264,7 @@ std::optional<i18nutil::KashidaPosition> 
i18nutil::GetWordKashidaPosition(const
                 // check if character is connectable to previous character,
                 if (CanConnectToPrev(cCh, cPrevCh))
                 {
-                    nKashidaPos = nPrevIdx;
-                    nPriorityLevel = 5;
+                    fnTryInsertBefore(5);
                 }
             }
         }
@@ -251,15 +272,15 @@ std::optional<i18nutil::KashidaPosition> 
i18nutil::GetWordKashidaPosition(const
         // other connecting possibilities
         if (nPriorityLevel >= 6 && nIdx > 0)
         {
-            // Reh, Zain
-            if (isRehChar(cCh))
+            // Reh, Zain (right joining) final form may appear in the middle 
of word
+            // All others except Yeh - only at end of word
+            if (isRehChar(cCh) || (0x60C <= cCh && 0x6FE >= cCh && nIdx == 
nWordLen - 1))
             {
                 SAL_WARN_IF(0 == cPrevCh, "i18n", "No previous character");
                 // check if character is connectable to previous character,
                 if (CanConnectToPrev(cCh, cPrevCh))
                 {
-                    nKashidaPos = nPrevIdx;
-                    nPriorityLevel = 6;
+                    fnTryInsertBefore(6);
                 }
             }
         }

Reply via email to