i18nutil/qa/cppunit/test_kashida.cxx        |   45 +++++++++++++-
 i18nutil/source/utility/kashida.cxx         |   88 +++++++++++++++++++++++++---
 sw/source/core/inc/scriptinfo.hxx           |    9 +-
 sw/source/core/text/itradj.cxx              |    8 +-
 sw/source/core/text/porlay.cxx              |   12 ++-
 sw/source/core/text/portxt.cxx              |    2 
 sw/source/core/txtnode/fntcache.cxx         |    7 +-
 vcl/qa/cppunit/pdfexport/data/tdf140767.odt |binary
 vcl/qa/cppunit/pdfexport/pdfexport2.cxx     |   61 +++++++++++++++++++
 9 files changed, 206 insertions(+), 26 deletions(-)

New commits:
commit c3c29d31d77ff93aa50634cfd51c62d12dc0f6ec
Author:     Jonathan Clark <jonat...@libreoffice.org>
AuthorDate: Thu Sep 26 02:59:26 2024 -0600
Commit:     Jonathan Clark <jonat...@libreoffice.org>
CommitDate: Fri Sep 27 03:50:01 2024 +0200

    tdf#140767 Implemented Syriac justification
    
    This change extends kashida justification to Syriac, using custom
    insertion rules.
    
    Change-Id: I7508d2c32e95abb12a098e989c7153828ba81c87
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/173990
    Tested-by: Jenkins
    Reviewed-by: Jonathan Clark <jonat...@libreoffice.org>

diff --git a/i18nutil/qa/cppunit/test_kashida.cxx 
b/i18nutil/qa/cppunit/test_kashida.cxx
index 46b40c2a5b7a..e0526c4c8f93 100644
--- a/i18nutil/qa/cppunit/test_kashida.cxx
+++ b/i18nutil/qa/cppunit/test_kashida.cxx
@@ -26,6 +26,7 @@ public:
     void testFinalYeh();
     void testNoZwnjExpansion();
     void testExcludeInvalid();
+    void testSyriac();
 
     CPPUNIT_TEST_SUITE(KashidaTest);
     CPPUNIT_TEST(testCharacteristic);
@@ -33,6 +34,7 @@ public:
     CPPUNIT_TEST(testFinalYeh);
     CPPUNIT_TEST(testNoZwnjExpansion);
     CPPUNIT_TEST(testExcludeInvalid);
+    CPPUNIT_TEST(testSyriac);
     CPPUNIT_TEST_SUITE_END();
 };
 
@@ -69,7 +71,9 @@ void KashidaTest::testManualKashida()
 // tdf#65344: Do not insert kashida before a final Yeh
 void KashidaTest::testFinalYeh()
 {
-    CPPUNIT_ASSERT(!GetWordKashidaPosition(u"نیمِي"_ustr).has_value());
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), 
GetWordKashidaPosition(u"يييي"_ustr).value().nIndex);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), 
GetWordKashidaPosition(u"ييي"_ustr).value().nIndex);
+    CPPUNIT_ASSERT(!GetWordKashidaPosition(u"يي"_ustr).has_value());
 
     // Should always insert kashida after Seen, even before a final Yeh
     CPPUNIT_ASSERT_EQUAL(sal_Int32(2), 
GetWordKashidaPosition(u"كرسي"_ustr).value().nIndex);
@@ -82,7 +86,7 @@ void KashidaTest::testNoZwnjExpansion()
     CPPUNIT_ASSERT_EQUAL(sal_Int32(0), 
GetWordKashidaPosition(u"نویس\u200Cه"_ustr).value().nIndex);
 
     CPPUNIT_ASSERT_EQUAL(sal_Int32(1), 
GetWordKashidaPosition(u"متن"_ustr).value().nIndex);
-    CPPUNIT_ASSERT(!GetWordKashidaPosition(u"مت\u200Cن"_ustr).has_value());
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), 
GetWordKashidaPosition(u"مت\u200Cن"_ustr).value().nIndex);
 }
 
 // tdf#163105: Do not insert kashida if the position is invalid
@@ -111,6 +115,43 @@ void KashidaTest::testExcludeInvalid()
     CPPUNIT_ASSERT(!GetWordKashidaPosition(u"نویسه"_ustr, aValid).has_value());
 }
 
+// tdf#140767: Kashida justification for Syriac
+void KashidaTest::testSyriac()
+{
+    // - Prefer user-inserted kashida
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2), 
GetWordKashidaPosition(u"ܥܥـܥܥܥܥ"_ustr).value().nIndex);
+
+    std::vector<bool> aValid;
+    aValid.resize(7, true);
+
+    // - Start from end and work toward midpoint, then reverse direction
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(5),
+                         GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, 
aValid).value().nIndex);
+    aValid[5] = false;
+
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(4),
+                         GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, 
aValid).value().nIndex);
+    aValid[4] = false;
+
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(0),
+                         GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, 
aValid).value().nIndex);
+    aValid[0] = false;
+
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1),
+                         GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, 
aValid).value().nIndex);
+    aValid[1] = false;
+
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2),
+                         GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, 
aValid).value().nIndex);
+    aValid[2] = false;
+
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(3),
+                         GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, 
aValid).value().nIndex);
+    aValid[3] = false;
+
+    CPPUNIT_ASSERT(!GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, 
aValid).has_value());
+}
+
 CPPUNIT_TEST_SUITE_REGISTRATION(KashidaTest);
 }
 
diff --git a/i18nutil/source/utility/kashida.cxx 
b/i18nutil/source/utility/kashida.cxx
index 6a6c7adde690..a992e5c8e643 100644
--- a/i18nutil/source/utility/kashida.cxx
+++ b/i18nutil/source/utility/kashida.cxx
@@ -133,10 +133,9 @@ bool CanConnectToPrev(sal_Unicode cCh, sal_Unicode cPrevCh)
 
     return bRet;
 }
-}
 
 std::optional<i18nutil::KashidaPosition>
-i18nutil::GetWordKashidaPosition(const OUString& rWord, const 
std::vector<bool>& pValidPositions)
+GetWordKashidaPositionArabic(const OUString& rWord, const std::vector<bool>& 
pValidPositions)
 {
     sal_Int32 nIdx = 0;
     sal_Int32 nPrevIdx = 0;
@@ -148,9 +147,6 @@ i18nutil::GetWordKashidaPosition(const OUString& rWord, 
const std::vector<bool>&
 
     sal_Int32 nWordLen = rWord.getLength();
 
-    SAL_WARN_IF(!pValidPositions.empty() && pValidPositions.size() != 
static_cast<size_t>(nWordLen),
-                "i18n", "Kashida valid position array wrong size");
-
     // ignore trailing vowel chars
     while (nWordLen && isTransparentChar(rWord[nWordLen - 1]))
     {
@@ -298,8 +294,8 @@ i18nutil::GetWordKashidaPosition(const OUString& rWord, 
const std::vector<bool>&
             }
         }
 
-        // 8. If valid position data exists, use the last legal position
-        if (nPriorityLevel >= 7 && nIdx > 0 && !pValidPositions.empty())
+        // 8. Try any valid position
+        if (nPriorityLevel >= 7 && nIdx > 0)
         {
             fnTryInsertBefore(7);
         }
@@ -317,10 +313,86 @@ i18nutil::GetWordKashidaPosition(const OUString& rWord, 
const std::vector<bool>&
 
     if (-1 != nKashidaPos)
     {
-        return KashidaPosition{ nKashidaPos };
+        return i18nutil::KashidaPosition{ nKashidaPos };
     }
 
     return std::nullopt;
 }
 
+std::optional<i18nutil::KashidaPosition>
+GetWordKashidaPositionSyriac(const OUString& rWord, const std::vector<bool>& 
pValidPositions)
+{
+    sal_Int32 nWordLen = rWord.getLength();
+
+    // Search for a user-inserted kashida
+    for (sal_Int32 i = nWordLen - 1; i >= 0; --i)
+    {
+        if (0x640 == rWord[i])
+        {
+            return i18nutil::KashidaPosition{ i };
+        }
+    }
+
+    // Always insert kashida from the outside-in:
+    // - First, work from the end of the word toward the midpoint
+    // - Then, work from the beginning of the word toward the midpoint
+
+    sal_Int32 nWordMidpoint = nWordLen / 2;
+
+    auto fnPositionValid = [&pValidPositions](sal_Int32 nIdx) {
+        // Exclusions:
+
+        // tdf#163105: Do not insert kashida if the position is invalid
+        if (!pValidPositions.empty() && !pValidPositions[nIdx])
+        {
+            return false;
+        }
+
+        return true;
+    };
+
+    // End to midpoint
+    for (sal_Int32 i = nWordLen - 2; i > nWordMidpoint; --i)
+    {
+        if (fnPositionValid(i))
+        {
+            return i18nutil::KashidaPosition{ i };
+        }
+    }
+
+    // Beginning to midpoint
+    for (sal_Int32 i = 0; i <= nWordMidpoint; ++i)
+    {
+        if (fnPositionValid(i))
+        {
+            return i18nutil::KashidaPosition{ i };
+        }
+    }
+
+    return std::nullopt;
+}
+}
+
+std::optional<i18nutil::KashidaPosition>
+i18nutil::GetWordKashidaPosition(const OUString& rWord, const 
std::vector<bool>& pValidPositions)
+{
+    sal_Int32 nWordLen = rWord.getLength();
+
+    SAL_WARN_IF(!pValidPositions.empty() && pValidPositions.size() != 
static_cast<size_t>(nWordLen),
+                "i18n", "Kashida valid position array wrong size");
+
+    for (sal_Int32 nIdx = 0; nIdx < nWordLen; ++nIdx)
+    {
+        auto cCh = rWord[nIdx];
+
+        if ((cCh >= 0x700 && cCh <= 0x74F) || (cCh >= 0x860 && cCh <= 0x86A))
+        {
+            // This word contains Syriac characters.
+            return GetWordKashidaPositionSyriac(rWord, pValidPositions);
+        }
+    }
+
+    return GetWordKashidaPositionArabic(rWord, pValidPositions);
+}
+
 /* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s 
cinkeys+=0=break: */
diff --git a/sw/source/core/inc/scriptinfo.hxx 
b/sw/source/core/inc/scriptinfo.hxx
index ae37779b6f71..60cf9c542eab 100644
--- a/sw/source/core/inc/scriptinfo.hxx
+++ b/sw/source/core/inc/scriptinfo.hxx
@@ -347,16 +347,17 @@ public:
 */
     bool IsKashidaLine(TextFrameIndex nCharIdx) const;
 
-/** Checks if text is Arabic text.
+/** Checks if text is in a script that allows kashida justification.
 
-     @descr  Checks if text is Arabic text.
+     @descr  Checks if text is in a language that allows kashida justification.
      @param  rText
                  The text to check
      @param  nStt
                  Start index of the text
-     @return Returns if the language is an Arabic language
+     @return Returns true if the script is Arabic or Syriac
  */
-    static bool IsArabicText(const OUString& rText, TextFrameIndex nStt, 
TextFrameIndex nLen);
+    static bool IsKashidaScriptText(const OUString& rText, TextFrameIndex nStt,
+                                    TextFrameIndex nLen);
 
 /** Performs a thai justification on the kerning array
 
diff --git a/sw/source/core/text/itradj.cxx b/sw/source/core/text/itradj.cxx
index 1a22579c24f1..c072025e7a96 100644
--- a/sw/source/core/text/itradj.cxx
+++ b/sw/source/core/text/itradj.cxx
@@ -168,7 +168,8 @@ static bool lcl_CheckKashidaPositions(SwScriptInfo& rSI, 
SwTextSizeInfo& rInf, S
 
         // Fetch the set of valid positions from VCL, where possible
         aValidPositions.clear();
-        if ( SwScriptInfo::IsArabicText( rInf.GetText(), 
TextFrameIndex{aScanner.GetBegin()}, TextFrameIndex{aScanner.GetLen()} ) )
+        if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), TextFrameIndex{ 
aScanner.GetBegin() },
+                                              TextFrameIndex{ 
aScanner.GetLen() }))
         {
             rItr.SeekAndChgAttrIter(TextFrameIndex{ aScanner.GetBegin() }, 
rInf.GetRefDev());
 
@@ -250,7 +251,7 @@ static bool lcl_CheckKashidaPositions(SwScriptInfo& rSI, 
SwTextSizeInfo& rInf, S
             }
 
             sal_Int32 nKashidasDropped = 0;
-            if ( !SwScriptInfo::IsArabicText( rInf.GetText(), nIdx, nNext - 
nIdx ) )
+            if (!SwScriptInfo::IsKashidaScriptText(rInf.GetText(), nIdx, nNext 
- nIdx))
             {
                 nKashidasDropped = nKashidasInAttr;
                 rKashidas -= nKashidasDropped;
@@ -314,7 +315,8 @@ static bool lcl_CheckKashidaWidth ( SwScriptInfo& rSI, 
SwTextSizeInfo& rInf, SwT
             sal_Int32 nKashidasInAttr = rSI.KashidaJustify(nullptr, nullptr, 
nIdx, nNext - nIdx);
 
             tools::Long nFontMinKashida = rInf.GetRefDev()->GetMinKashida();
-            if ( nFontMinKashida && nKashidasInAttr > 0 && 
SwScriptInfo::IsArabicText( rInf.GetText(), nIdx, nNext - nIdx ) )
+            if (nFontMinKashida && nKashidasInAttr > 0
+                && SwScriptInfo::IsKashidaScriptText(rInf.GetText(), nIdx, 
nNext - nIdx))
             {
                 sal_Int32 nKashidasDropped = 0;
                 while ( rKashidas && nGluePortion && nKashidasInAttr > 0 &&
diff --git a/sw/source/core/text/porlay.cxx b/sw/source/core/text/porlay.cxx
index c18969fc709c..37cb23961cff 100644
--- a/sw/source/core/text/porlay.cxx
+++ b/sw/source/core/text/porlay.cxx
@@ -2194,16 +2194,18 @@ sal_Int32 SwScriptInfo::KashidaJustify( KernArray* 
pKernArray,
     return 0;
 }
 
-// Checks if the current text is 'Arabic' text. Note that only the first
+// Checks if the text is in Arabic or Syriac. Note that only the first
 // character has to be checked because a ctl portion only contains one
 // script, see NewTextPortion
-bool SwScriptInfo::IsArabicText(const OUString& rText,
+bool SwScriptInfo::IsKashidaScriptText(const OUString& rText,
         TextFrameIndex const nStt, TextFrameIndex const nLen)
 {
     using namespace ::com::sun::star::i18n;
     static const ScriptTypeList typeList[] = {
-        { UnicodeScript_kArabic, UnicodeScript_kArabic, 
sal_Int16(UnicodeScript_kArabic) },        // 11,
-        { UnicodeScript_kScriptCount, UnicodeScript_kScriptCount, 
sal_Int16(UnicodeScript_kScriptCount) }    // 88
+        { UnicodeScript_kArabic, UnicodeScript_kArabic, 
sal_Int16(UnicodeScript_kArabic) }, // 11,
+        { UnicodeScript_kSyriac, UnicodeScript_kSyriac, 
sal_Int16(UnicodeScript_kSyriac) }, // 12,
+        { UnicodeScript_kScriptCount, UnicodeScript_kScriptCount,
+          sal_Int16(UnicodeScript_kScriptCount) } // 88
     };
 
     // go forward if current position does not hold a regular character:
@@ -2229,7 +2231,7 @@ bool SwScriptInfo::IsArabicText(const OUString& rText,
     {
         const sal_Unicode cCh = rText[nIdx];
         const sal_Int16 type = unicode::getUnicodeScriptType( cCh, typeList, 
sal_Int16(UnicodeScript_kScriptCount) );
-        return type == sal_Int16(UnicodeScript_kArabic);
+        return type == sal_Int16(UnicodeScript_kArabic) || type == 
sal_Int16(UnicodeScript_kSyriac);
     }
     return false;
 }
diff --git a/sw/source/core/text/portxt.cxx b/sw/source/core/text/portxt.cxx
index b310ee706cb5..147eeb10064b 100644
--- a/sw/source/core/text/portxt.cxx
+++ b/sw/source/core/text/portxt.cxx
@@ -116,7 +116,7 @@ static TextFrameIndex lcl_AddSpace(const SwTextSizeInfo 
&rInf,
     // Kashida Justification: Insert Kashidas
     if ( nEnd > nPos && pSI && COMPLEX == nScript )
     {
-        if ( SwScriptInfo::IsArabicText( *pStr, nPos, nEnd - nPos ) && 
pSI->CountKashida() )
+        if (SwScriptInfo::IsKashidaScriptText(*pStr, nPos, nEnd - nPos) && 
pSI->CountKashida())
         {
             const sal_Int32 nKashRes = pSI->KashidaJustify(nullptr, nullptr, 
nPos, nEnd - nPos);
             // i60591: need to check result of KashidaJustify
diff --git a/sw/source/core/txtnode/fntcache.cxx 
b/sw/source/core/txtnode/fntcache.cxx
index 8b68e29b9ec3..5ca414fc1c68 100644
--- a/sw/source/core/txtnode/fntcache.cxx
+++ b/sw/source/core/txtnode/fntcache.cxx
@@ -1127,7 +1127,8 @@ void SwFntObj::DrawText( SwDrawTextInfo &rInf )
                 // Kashida Justification
                 if ( SwFontScript::CTL == nActual && nSpaceAdd )
                 {
-                    if ( SwScriptInfo::IsArabicText( rInf.GetText(), 
rInf.GetIdx(), rInf.GetLen() ) )
+                    if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), 
rInf.GetIdx(),
+                                                          rInf.GetLen()))
                     {
                         aKashidaArray.resize(aKernArray.size(), false);
                         if ( pSI && pSI->CountKashida() &&
@@ -1344,7 +1345,7 @@ void SwFntObj::DrawText( SwDrawTextInfo &rInf )
             // Kashida Justification
             if ( SwFontScript::CTL == nActual && nSpaceAdd )
             {
-                if ( SwScriptInfo::IsArabicText( rInf.GetText(), 
rInf.GetIdx(), rInf.GetLen() ) )
+                if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), 
rInf.GetIdx(), rInf.GetLen()))
                 {
                     aKashidaArray.resize(aKernArray.size(), false);
                     if ( pSI && pSI->CountKashida() &&
@@ -1838,7 +1839,7 @@ TextFrameIndex 
SwFntObj::GetModelPositionForViewPoint(SwDrawTextInfo &rInf)
         // Kashida Justification
         if ( SwFontScript::CTL == nActual && rInf.GetSpace() )
         {
-            if ( SwScriptInfo::IsArabicText( rInf.GetText(), rInf.GetIdx(), 
rInf.GetLen() ) )
+            if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), 
rInf.GetIdx(), rInf.GetLen()))
             {
                 if ( pSI && pSI->CountKashida() &&
                     pSI->KashidaJustify( &aKernArray, nullptr, rInf.GetIdx(), 
rInf.GetLen(),
diff --git a/vcl/qa/cppunit/pdfexport/data/tdf140767.odt 
b/vcl/qa/cppunit/pdfexport/data/tdf140767.odt
new file mode 100644
index 000000000000..ab6efe6b83ef
Binary files /dev/null and b/vcl/qa/cppunit/pdfexport/data/tdf140767.odt differ
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx 
b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx
index de387cd31850..5e59ec0373f1 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx
@@ -5761,6 +5761,67 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest2, 
testTdf163105Editeng)
     CPPUNIT_ASSERT_LESS(170.0, aRect.at(2).getWidth());
 }
 
+CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf140767SyriacJustification)
+{
+    saveAsPDF(u"tdf140767.odt");
+
+    auto pPdfDocument = parsePDFExport();
+    CPPUNIT_ASSERT_EQUAL(1, pPdfDocument->getPageCount());
+
+    auto pPdfPage = pPdfDocument->openPage(/*nIndex*/ 0);
+    CPPUNIT_ASSERT(pPdfPage);
+    auto pTextPage = pPdfPage->getTextPage();
+    CPPUNIT_ASSERT(pTextPage);
+
+    int nPageObjectCount = pPdfPage->getObjectCount();
+
+    CPPUNIT_ASSERT_EQUAL(11, nPageObjectCount);
+
+    std::vector<OUString> aText;
+    std::vector<basegfx::B2DRectangle> aRect;
+
+    int nTextObjectCount = 0;
+    for (int i = 0; i < nPageObjectCount; ++i)
+    {
+        auto pPageObject = pPdfPage->getObject(i);
+        CPPUNIT_ASSERT_MESSAGE("no object", pPageObject != nullptr);
+        if (pPageObject->getType() == vcl::pdf::PDFPageObjectType::Text)
+        {
+            aText.push_back(pPageObject->getText(pTextPage));
+            aRect.push_back(pPageObject->getBounds());
+            ++nTextObjectCount;
+        }
+    }
+
+    CPPUNIT_ASSERT_EQUAL(11, nTextObjectCount);
+
+    std::cout << "Strings" << std::endl;
+    for (auto const& em : aText)
+    {
+        std::cout << em << std::endl;
+        for (sal_Int32 i = 0; i < em.getLength(); ++i)
+        {
+            std::cout << std::hex << static_cast<uint32_t>(em[i]) << " ";
+        }
+        std::cout << std::endl;
+    }
+
+    CPPUNIT_ASSERT_EQUAL(u"ܝ"_ustr, aText.at(0).trim());
+    CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(1).trim());
+    CPPUNIT_ASSERT_EQUAL(u"ܺܛ"_ustr, aText.at(2).trim());
+    CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(3).trim());
+    CPPUNIT_ASSERT_EQUAL(u"ܰܚ"_ustr, aText.at(4).trim());
+    CPPUNIT_ASSERT_EQUAL(u"ܕ"_ustr, aText.at(5).trim()); // This span is 
whitespace justified
+    CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(6).trim());
+    CPPUNIT_ASSERT_EQUAL(u"ܰܓ"_ustr, aText.at(7).trim());
+    CPPUNIT_ASSERT_EQUAL(u"ܒ"_ustr, aText.at(8).trim());
+    CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(9).trim());
+    CPPUNIT_ASSERT_EQUAL(u"ܰܐ"_ustr, aText.at(10).trim());
+
+    // Without kashida justification, this space will be 224.328
+    CPPUNIT_ASSERT_LESS(90.0, aRect.at(5).getWidth());
+}
+
 } // end anonymous namespace
 
 CPPUNIT_PLUGIN_IMPLEMENT();

Reply via email to