i18nutil/qa/cppunit/test_scriptchangescanner.cxx |   23 ++++++++++++++++++-----
 i18nutil/source/utility/scriptchangescanner.cxx  |   23 ++++++++++++++++++-----
 2 files changed, 36 insertions(+), 10 deletions(-)

New commits:
commit 9b505f583954c88ce7b72a07c9bfd65d78d863ef
Author:     Jonathan Clark <jonat...@libreoffice.org>
AuthorDate: Mon Jan 6 03:36:31 2025 -0700
Commit:     Jonathan Clark <jonat...@libreoffice.org>
CommitDate: Mon Jan 6 13:45:19 2025 +0100

    tdf#66791 sw: Apply first-seen script type to leading weak characters
    
    Previously, leading weak characters were always assigned a script type
    derived from the application language. This change updates the algorithm
    to instead use the logical-order first-seen non-weak script type.
    
    Change-Id: I5f6732c6faa1eb35aff51e98e82a87c8008f70ab
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/179824
    Tested-by: Jenkins
    Reviewed-by: Jonathan Clark <jonat...@libreoffice.org>

diff --git a/i18nutil/qa/cppunit/test_scriptchangescanner.cxx 
b/i18nutil/qa/cppunit/test_scriptchangescanner.cxx
index e0726a45d922..1b033d94218a 100644
--- a/i18nutil/qa/cppunit/test_scriptchangescanner.cxx
+++ b/i18nutil/qa/cppunit/test_scriptchangescanner.cxx
@@ -27,6 +27,7 @@ public:
     void testTrivial();
     void testTrivialAppLang();
     void testWeakAtStart();
+    void testOnlyWeak();
     void testStrongChange();
     void testMongolianAfterNNBSP();
     void testNonspacingMark();
@@ -44,6 +45,7 @@ public:
     CPPUNIT_TEST(testTrivial);
     CPPUNIT_TEST(testTrivialAppLang);
     CPPUNIT_TEST(testWeakAtStart);
+    CPPUNIT_TEST(testOnlyWeak);
     CPPUNIT_TEST(testStrongChange);
     CPPUNIT_TEST(testMongolianAfterNNBSP);
     CPPUNIT_TEST(testNonspacingMark);
@@ -101,21 +103,32 @@ void ScriptChangeScannerTest::testTrivialAppLang()
 
 void ScriptChangeScannerTest::testWeakAtStart()
 {
+    // The first-seen script type is used for weak characters at the start
     auto aText = u"“x”"_ustr;
     auto pDirScanner = MakeDirectionChangeScanner(aText, 0);
     auto pScanner = MakeScriptChangeScanner(aText, 
css::i18n::ScriptType::COMPLEX, *pDirScanner);
 
     CPPUNIT_ASSERT(!pScanner->AtEnd());
-    CPPUNIT_ASSERT_EQUAL(css::i18n::ScriptType::COMPLEX, 
pScanner->Peek().m_nScriptType);
+    CPPUNIT_ASSERT_EQUAL(css::i18n::ScriptType::LATIN, 
pScanner->Peek().m_nScriptType);
     CPPUNIT_ASSERT_EQUAL(sal_Int32(0), pScanner->Peek().m_nStartIndex);
-    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), pScanner->Peek().m_nEndIndex);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(3), pScanner->Peek().m_nEndIndex);
 
     pScanner->Advance();
 
+    CPPUNIT_ASSERT(pScanner->AtEnd());
+}
+
+void ScriptChangeScannerTest::testOnlyWeak()
+{
+    // The application language is used for text containing only weak 
characters
+    auto aText = u"“”"_ustr;
+    auto pDirScanner = MakeDirectionChangeScanner(aText, 0);
+    auto pScanner = MakeScriptChangeScanner(aText, 
css::i18n::ScriptType::COMPLEX, *pDirScanner);
+
     CPPUNIT_ASSERT(!pScanner->AtEnd());
-    CPPUNIT_ASSERT_EQUAL(css::i18n::ScriptType::LATIN, 
pScanner->Peek().m_nScriptType);
-    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), pScanner->Peek().m_nStartIndex);
-    CPPUNIT_ASSERT_EQUAL(sal_Int32(3), pScanner->Peek().m_nEndIndex);
+    CPPUNIT_ASSERT_EQUAL(css::i18n::ScriptType::COMPLEX, 
pScanner->Peek().m_nScriptType);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), pScanner->Peek().m_nStartIndex);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2), pScanner->Peek().m_nEndIndex);
 
     pScanner->Advance();
 
diff --git a/i18nutil/source/utility/scriptchangescanner.cxx 
b/i18nutil/source/utility/scriptchangescanner.cxx
index 2cb8d9d96819..456af39bb495 100644
--- a/i18nutil/source/utility/scriptchangescanner.cxx
+++ b/i18nutil/source/utility/scriptchangescanner.cxx
@@ -132,7 +132,7 @@ private:
     ScriptChange m_stCurr;
     DirectionChangeScanner* m_pDirScanner;
     const OUString& m_rText;
-    sal_Int16 m_nPrevScript;
+    sal_Int16 m_nPrevScript = css::i18n::ScriptType::WEAK;
     sal_Int32 m_nIndex = 0;
     bool m_bAtEnd = false;
     bool m_bApplyAsianToWeakQuotes = false;
@@ -142,16 +142,23 @@ public:
                               DirectionChangeScanner* pDirScanner)
         : m_pDirScanner(pDirScanner)
         , m_rText(rText)
-        , m_nPrevScript(nDefaultScriptType)
     {
         // tdf#66791: For compatibility with other programs, the Asian script 
is
         // applied to any weak-script quote characters if the enclosing 
paragraph
         // contains Chinese- or Japanese-script characters.
+        // In the original Writer algorithm, the application language is used 
for
+        // all leading weak characters (#94331#). This implementation deviates 
by
+        // instead using the first-seen non-weak script.
         sal_Int32 nCjBase = 0;
         while (nCjBase < m_rText.getLength())
         {
             auto nChar = m_rText.iterateCodePoints(&nCjBase);
             auto nScript = GetScriptClass(nChar);
+            if (m_nPrevScript == css::i18n::ScriptType::WEAK)
+            {
+                m_nPrevScript = nScript;
+            }
+
             if (nScript == css::i18n::ScriptType::COMPLEX)
             {
                 m_bApplyAsianToWeakQuotes = false;
@@ -172,12 +179,18 @@ public:
             }
         }
 
-        // In the original Writer algorithm, the application language is used 
for
-        // all leading weak characters. Make a change record for those 
characters.
+        // Fall back to the application language for leading weak characters 
if a
+        // better candidate was not found.
+        if (m_nPrevScript == css::i18n::ScriptType::WEAK)
+        {
+            m_nPrevScript = nDefaultScriptType;
+        }
+
+        // Make a change record for leading weak characters.
         Advance();
         if (m_stCurr.m_nStartIndex == m_stCurr.m_nEndIndex)
         {
-            // The text does not start with application-language leading 
characters.
+            // The text does not start with weak characters.
             // Initialize with a non-empty record.
             Advance();
         }

Reply via email to