editeng/source/editeng/impedit4.cxx                   |   36 +++++++++---------
 sc/qa/unit/copy_paste_test.cxx                        |   28 ++++++++++++++
 sc/qa/unit/data/xlsx/tdf122716_font_with_charset.xlsx |binary
 3 files changed, 47 insertions(+), 17 deletions(-)

New commits:
commit 0c1ae785e3fb3a800f6b7743a03245dca6c01f14
Author:     Mike Kaganski <mike.kagan...@collabora.com>
AuthorDate: Tue Nov 5 16:08:50 2024 +0500
Commit:     Mike Kaganski <mike.kagan...@collabora.com>
CommitDate: Wed Nov 6 08:32:16 2024 +0100

    tdf#122716: take encoding defined for font into account
    
    Before this, the non-ASCII Windows-1252 characters get exported to RTF
    without Unicode markup, regardless of the font-defined charset; and on
    import to Writer (and other compliant RTF readers), this 8-bit markup
    was interpreted using the font data, producing different characters.
    
    Change-Id: I2032930b6585287fde3eb3b5e6abed0298d29330
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/176048
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com>

diff --git a/editeng/source/editeng/impedit4.cxx 
b/editeng/source/editeng/impedit4.cxx
index 22e99ccf894f..b4ba8ea835a0 100644
--- a/editeng/source/editeng/impedit4.cxx
+++ b/editeng/source/editeng/impedit4.cxx
@@ -293,6 +293,15 @@ void ImpEditEngine::WriteXML(SvStream& rOutput, const 
EditSelection& rSel)
     SvxWriteXML( *GetEditEnginePtr(), rOutput, aESel );
 }
 
+static size_t GetFontIndex(const SfxPoolItem& rItem,
+                           const std::vector<std::unique_ptr<SvxFontItem>>& 
rFontTable)
+{
+    for (size_t i = 0; i < rFontTable.size(); ++i)
+        if (*rFontTable[i] == rItem)
+            return i;
+    return 0;
+}
+
 ErrCode ImpEditEngine::WriteRTF( SvStream& rOutput, EditSelection aSel, bool 
bClipboard )
 {
     assert( IsUpdateLayout() && "WriteRTF for UpdateMode = sal_False!" );
@@ -381,10 +390,6 @@ ErrCode ImpEditEngine::WriteRTF( SvStream& rOutput, 
EditSelection aSel, bool bCl
 
         rtl_TextEncoding eChrSet = pFontItem->GetCharSet();
         // tdf#47679 OpenSymbol is not encoded in Symbol Encoding
-        // and anyway we always attempt to write as eDestEnc
-        // of RTL_TEXTENCODING_MS_1252 and pay no attention
-        // on export what encoding we claim to use for these
-        // fonts.
         if (IsOpenSymbol(pFontItem->GetFamilyName()))
         {
             SAL_WARN_IF(eChrSet == RTL_TEXTENCODING_SYMBOL, "editeng", 
"OpenSymbol should not have charset of RTL_TEXTENCODING_SYMBOL in new 
documents");
@@ -687,10 +692,17 @@ ErrCode ImpEditEngine::WriteRTF( SvStream& rOutput, 
EditSelection aSel, bool bCl
                 aAttribItems.Clear();
                 sal_uInt16 nScriptTypeI18N = GetI18NScriptType( EditPaM( 
pNode, nIndex+1 ) );
                 SvtScriptType nScriptType = 
SvtLanguageOptions::FromI18NToSvtScriptType(nScriptTypeI18N);
+                rtl_TextEncoding actEncoding = eDestEnc;
                 if ( !n || IsScriptChange( EditPaM( pNode, nIndex ) ) )
                 {
                     SfxItemSet aAttribs = GetAttribs( nNode, nIndex+1, 
nIndex+1 );
-                    aAttribItems.Insert( &aAttribs.Get( GetScriptItemId( 
EE_CHAR_FONTINFO, nScriptType ) ) );
+                    auto& item = 
aAttribs.Get(GetScriptItemId(EE_CHAR_FONTINFO, nScriptType));
+                    aAttribItems.Insert(&item);
+                    // The actual encoding that RTF uses for the portion is 
defined by the font
+                    if (auto i = GetFontIndex(item, aFontTable);
+                        i < aFontTable.size()
+                        && aFontTable[i]->GetCharSet() != 
RTL_TEXTENCODING_DONTKNOW)
+                        actEncoding = aFontTable[i]->GetCharSet();
                     aAttribItems.Insert( &aAttribs.Get( GetScriptItemId( 
EE_CHAR_FONTHEIGHT, nScriptType ) ) );
                     aAttribItems.Insert( &aAttribs.Get( GetScriptItemId( 
EE_CHAR_WEIGHT, nScriptType ) ) );
                     aAttribItems.Insert( &aAttribs.Get( GetScriptItemId( 
EE_CHAR_ITALIC, nScriptType ) ) );
@@ -711,7 +723,7 @@ ErrCode ImpEditEngine::WriteRTF( SvStream& rOutput, 
EditSelection aSel, bool bCl
                     nE = nEndPos;
 
                 OUString aRTFStr = EditDoc::GetParaAsString( pNode, nS, nE);
-                RTFOutFuncs::Out_String( rOutput, aRTFStr, eDestEnc );
+                RTFOutFuncs::Out_String(rOutput, aRTFStr, actEncoding);
                 rOutput.WriteChar( '}' );
             }
             if ( bFinishPortion )
@@ -847,18 +859,8 @@ void ImpEditEngine::WriteItemAsRTF( const SfxPoolItem& 
rItem, SvStream& rOutput,
         case EE_CHAR_FONTINFO_CJK:
         case EE_CHAR_FONTINFO_CTL:
         {
-            sal_uInt32 n = 0;
-            for (size_t i = 0; i < rFontTable.size(); ++i)
-            {
-                if (*rFontTable[i] == rItem)
-                {
-                    n = i;
-                    break;
-                }
-            }
-
             rOutput.WriteOString( OOO_STRING_SVTOOLS_RTF_F );
-            rOutput.WriteNumberAsString( n );
+            rOutput.WriteNumberAsString(GetFontIndex(rItem, rFontTable));
         }
         break;
         case EE_CHAR_FONTHEIGHT:
diff --git a/sc/qa/unit/copy_paste_test.cxx b/sc/qa/unit/copy_paste_test.cxx
index 41d764e82381..b1691fc23d5d 100644
--- a/sc/qa/unit/copy_paste_test.cxx
+++ b/sc/qa/unit/copy_paste_test.cxx
@@ -45,6 +45,7 @@ public:
     void tdf113500_autofillMixed();
     void tdf137625_autofillMergedUserlist();
     void tdf137624_autofillMergedMixed();
+    void tdf122716_rtf_portion_encoding();
 
     CPPUNIT_TEST_SUITE(ScCopyPasteTest);
     CPPUNIT_TEST(testCopyPasteXLS);
@@ -62,6 +63,7 @@ public:
     CPPUNIT_TEST(tdf113500_autofillMixed);
     CPPUNIT_TEST(tdf137625_autofillMergedUserlist);
     CPPUNIT_TEST(tdf137624_autofillMergedMixed);
+    CPPUNIT_TEST(tdf122716_rtf_portion_encoding);
     CPPUNIT_TEST_SUITE_END();
 
 private:
@@ -775,6 +777,32 @@ void ScCopyPasteTest::tdf137624_autofillMergedMixed()
     }
 }
 
+void ScCopyPasteTest::tdf122716_rtf_portion_encoding()
+{
+    // Given a document with an explicitly defined "204" (Russian) charset for 
a font,
+    // and a cell having contents of "Šampūnas", which has character "Š" 
representable
+    // in Windows-1252 (RTF default), but not in Windows-1251 (i.e. charset 
204):
+    createScDoc("xlsx/tdf122716_font_with_charset.xlsx");
+    ScModelObj* pModelObj = 
comphelper::getFromUnoTunnel<ScModelObj>(mxComponent);
+    // Obtain a transferable, similar to what happens on copy to clipboard:
+    auto xTransferable = pModelObj->getSelection();
+    // Get the RTF data:
+    auto rtf_any = xTransferable->getTransferData({ u"text/rtf"_ustr, {}, {} 
});
+    css::uno::Sequence<sal_Int8> rtf_bytes;
+    CPPUNIT_ASSERT(rtf_any >>= rtf_bytes);
+    OString rtf_string(reinterpret_cast<const 
char*>(rtf_bytes.getConstArray()),
+                       rtf_bytes.getLength());
+    // Check that the font with charset was actually emitted
+    CPPUNIT_ASSERT(rtf_string.indexOf("\fcharset204 Liberation Sans;") >= 0);
+    // Make sure that Unicode markup is emitted for the non-Ascii characters.
+    // Without the fix, "\u352" wasn't there, because the export was using 
Windows-1252
+    // encoding unconditionally, even though the exported font defined a 
different one;
+    // so the exported characters only had Unicode markup, when not 
representable in the
+    // Windows-1252 encoding, and "Š" got exported as "\'8a". On import to 
Writer, font
+    // encoding was used, and "\'8a" was interpreted as a Cyrillic alphabet 
character.
+    CPPUNIT_ASSERT(rtf_string.indexOf("\u352\'3famp\u363\'3fnas") >= 0);
+}
+
 ScCopyPasteTest::ScCopyPasteTest()
       : ScModelTestBase(u"/sc/qa/unit/data/"_ustr)
 {
diff --git a/sc/qa/unit/data/xlsx/tdf122716_font_with_charset.xlsx 
b/sc/qa/unit/data/xlsx/tdf122716_font_with_charset.xlsx
new file mode 100644
index 000000000000..6c2326e3ed28
Binary files /dev/null and 
b/sc/qa/unit/data/xlsx/tdf122716_font_with_charset.xlsx differ

Reply via email to