sdext/source/pdfimport/test/tests.cxx              |   30 ++++++++++---
 sdext/source/pdfimport/tree/writertreevisiting.cxx |   46 +++++++++++++++++++--
 sdext/source/pdfimport/tree/writertreevisiting.hxx |    4 +
 3 files changed, 70 insertions(+), 10 deletions(-)

New commits:
commit f6004e1c457ddab5e0c91e6159875d25130b108a
Author:     Kevin Suo <suokunl...@126.com>
AuthorDate: Sat Oct 15 19:43:54 2022 +0800
Commit:     Thorsten Behrens <thorsten.behr...@allotropia.de>
CommitDate: Wed Oct 19 21:34:13 2022 +0200

    tdf#151546: RTL text is reversed (Writer pdfimport)
    
    This is a followup to commit 69e9925ded584113e52f84ef0ed7c224079fa061
    for the fix of tdf#104597.
    
    The Writer pdf import filter code is similar than the Draw part.
    However, many fixes to the Draw part was not done in the Writer part 
historically.
    
    This patch ports the fix of text run in the Draw part to the Writer
    part. There is a todo related to continuous spaces issue which should
    be fixed separately.
    
    Also use CPPUNIT_ASSERT_EQUAL_MESSAGE for the output of xml content
    instread of using std::cout, in case of unit test failure.
    
    Change-Id: Id013700524750e6e5283d85eeab72d8075f16f1b
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/141420
    Tested-by: Thorsten Behrens <thorsten.behr...@allotropia.de>
    Reviewed-by: Thorsten Behrens <thorsten.behr...@allotropia.de>

diff --git a/sdext/source/pdfimport/test/tests.cxx 
b/sdext/source/pdfimport/test/tests.cxx
index 25c12a23901c..7cff15a36d0f 100644
--- a/sdext/source/pdfimport/test/tests.cxx
+++ b/sdext/source/pdfimport/test/tests.cxx
@@ -799,36 +799,54 @@ namespace
                     new OutputWrapString(aOutput),
                     nullptr));
 
-            // std::cout << aOutput << std::endl;
             xmlDocUniquePtr pXmlDoc(xmlParseDoc(reinterpret_cast<xmlChar const 
*>(aOutput.getStr())));
 
             // Test for امُ عَلَيْكَ
             // TODO: How to get the "عَلَيْكَ" in xpath, as shown after the 
<text:s> tag?
             OString xpath = 
"//draw:frame[@draw:transform='matrix(917.222222222222 0 0 917.222222222222 
14821.9583333333 2159.23861112778)']/draw:text-box/text:p/text:span";
             OUString sContent = getXPathContent(pXmlDoc, xpath); // 
u"\nا\nُ\nم\n"
-            CPPUNIT_ASSERT_EQUAL(OUString(u"اُم"), sContent.replaceAll("\n", 
""));
+            CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"اُم"), 
sContent.replaceAll("\n", ""));
 
             // Test for ٱلَّسَل‬ . It appears in the 3rd frame, i.e. after the 
امُ عَلَيْكَ which is in the 2nd frame (from left to right)
             // thus these two frames together appear as ٱلَّسَل امُ عَلَيْكَ 
in Draw‬.
             xpath = "//draw:frame[@draw:transform='matrix(917.222222222222 0 0 
917.222222222222 17420.1666666667 
2159.23861112778)']/draw:text-box/text:p/text:span";
             sContent = getXPathContent(pXmlDoc, xpath);
-            CPPUNIT_ASSERT_EQUAL(OUString(u"ٱلَّسَل"), 
sContent.replaceAll("\n", ""));
+            CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), 
OUString(u"ٱلَّسَل"), sContent.replaceAll("\n", ""));
 
             // Test for "LibreOffice LTR"
             // TODO: How to get the "LTR" as shown after the <text:s> tag?
             xpath = "//draw:frame[@draw:transform='matrix(917.222222222222 0 0 
917.222222222222 12779.375 5121.79583335)']/draw:text-box/text:p/text:span";
             sContent = getXPathContent(pXmlDoc, xpath);
-            CPPUNIT_ASSERT_EQUAL(OUString(u"LibreOffice"), 
sContent.replaceAll("\n", ""));
+            CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), 
OUString(u"LibreOffice"), sContent.replaceAll("\n", ""));
 
             /* Test for Chinese characters */
             // Use last() instead of matrix below, because the matrix may be 
different on different OS due to fallback of Chinese fonts.
             xpath = "//draw:frame[last()]/draw:text-box/text:p/text:span";
             sContent = getXPathContent(pXmlDoc, xpath);
-            CPPUNIT_ASSERT_EQUAL(OUString(u"中文测试,中文"), 
sContent.replaceAll("\n", ""));
+            CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), 
OUString(u"中文测试,中文"), sContent.replaceAll("\n", ""));
+
+            // Test pdf text run in the Writer PDF import filter
+            xAdaptor->setTreeVisitorFactory(createWriterTreeVisitorFactory());
+            OString aOutput2;
+            
xAdaptor->odfConvert(m_directories.getURLFromSrc(u"/sdext/source/pdfimport/test/testdocs/tdf104597_textrun.pdf"),
+                    new OutputWrapString(aOutput2),
+                    nullptr);
+            // FIXME: the same draw:frame is duplicated in the xml output,
+            // e.g. there are two draw:frame with draw:z-index="3" with the 
same content.
+            xmlDocUniquePtr pXmlDoc2(xmlParseDoc(reinterpret_cast<xmlChar 
const *>(aOutput2.getStr())));
+            xpath = 
"//draw:frame[@draw:z-index='3'][1]/draw:text-box/text:p/text:span";
+            sContent = getXPathContent(pXmlDoc2, xpath).replaceAll("\n", "");
+            CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput2.getStr(), 
OUString(u"ٱلَّسَل"), sContent);
+            xpath = 
"//draw:frame[@draw:z-index='2'][1]/draw:text-box/text:p/text:span";
+            sContent = getXPathContent(pXmlDoc2, xpath).replaceAll("\n", "");
+            // need to use اُم rather than اُم َعَلْيَك here, because this 
node may be different on different systems
+            CPPUNIT_ASSERT_EQUAL(true, sContent.match(u"اُم"));
+            xpath = "//draw:frame[last()]/draw:text-box/text:p/text:span";
+            sContent = getXPathContent(pXmlDoc2, xpath);
+            CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput2.getStr(), 
OUString(u"中文测试,中文"), sContent.replaceAll("\n", ""));
 #endif
         }
 
-
         CPPUNIT_TEST_SUITE(PDFITest);
         CPPUNIT_TEST(testXPDFParser);
         CPPUNIT_TEST(testOdfWriterExport);
diff --git a/sdext/source/pdfimport/tree/writertreevisiting.cxx 
b/sdext/source/pdfimport/tree/writertreevisiting.cxx
index 3e21932eb6c9..2ece5307bd53 100644
--- a/sdext/source/pdfimport/tree/writertreevisiting.cxx
+++ b/sdext/source/pdfimport/tree/writertreevisiting.cxx
@@ -31,12 +31,28 @@
 
 #include <basegfx/polygon/b2dpolypolygontools.hxx>
 #include <osl/diagnose.h>
+#include <com/sun/star/i18n/CharacterClassification.hpp>
+#include <com/sun/star/i18n/DirectionProperty.hpp>
+#include <comphelper/string.hxx>
 
 using namespace ::com::sun::star;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::i18n;
+using namespace ::com::sun::star::uno;
 
 namespace pdfi
 {
 
+const Reference< XCharacterClassification >& 
WriterXmlEmitter::GetCharacterClassification()
+{
+    if ( !mxCharClass.is() )
+    {
+        Reference< XComponentContext > xContext( m_rEmitContext.m_xContext, 
uno::UNO_SET_THROW );
+        mxCharClass = CharacterClassification::create(xContext);
+    }
+    return mxCharClass;
+}
+
 void WriterXmlEmitter::visit( HyperlinkElement& elem, const std::list< 
std::unique_ptr<Element> >::const_iterator&   )
 {
     if( elem.Children.empty() )
@@ -72,8 +88,31 @@ void WriterXmlEmitter::visit( TextElement& elem, const 
std::list< std::unique_pt
             m_rEmitContext.rStyles.getStyleName( elem.StyleId );
     }
 
+    OUString str(elem.Text.toString());
+
+    // Check for RTL
+    bool isRTL = false;
+    Reference< i18n::XCharacterClassification > xCC( 
GetCharacterClassification() );
+    if( xCC.is() )
+    {
+        for(int i=1; i< elem.Text.getLength(); i++)
+        {
+            i18n::DirectionProperty nType = 
static_cast<i18n::DirectionProperty>(xCC->getCharacterDirection( str, i ));
+            if ( nType == i18n::DirectionProperty_RIGHT_TO_LEFT           ||
+                 nType == i18n::DirectionProperty_RIGHT_TO_LEFT_ARABIC    ||
+                 nType == i18n::DirectionProperty_RIGHT_TO_LEFT_EMBEDDING ||
+                 nType == i18n::DirectionProperty_RIGHT_TO_LEFT_OVERRIDE
+                )
+                isRTL = true;
+        }
+    }
+
+    if (isRTL)  // If so, reverse string
+        str = ::comphelper::string::reverseString(str);
+
     m_rEmitContext.rEmitter.beginTag( "text:span", aProps );
-    m_rEmitContext.rEmitter.write( elem.Text.makeStringAndClear() );
+    // TODO: reserve continuous spaces, see DrawXmlEmitter::visit( 
TextElement& elem...)
+    m_rEmitContext.rEmitter.write(str);
     auto this_it = elem.Children.begin();
     while( this_it != elem.Children.end() && this_it->get() != &elem )
     {
@@ -797,13 +836,12 @@ void WriterXmlOptimizer::optimizeTextElements(Element& 
rParent)
                     }
                 }
                 // concatenate consecutive text elements unless there is a
-                // font or text color or matrix change, leave a new span in 
that case
+                // font or text color change, leave a new span in that case
                 if( pCur->FontId == pNext->FontId &&
                     rCurGC.FillColor.Red == rNextGC.FillColor.Red &&
                     rCurGC.FillColor.Green == rNextGC.FillColor.Green &&
                     rCurGC.FillColor.Blue == rNextGC.FillColor.Blue &&
-                    rCurGC.FillColor.Alpha == rNextGC.FillColor.Alpha &&
-                    rCurGC.Transformation == rNextGC.Transformation
+                    rCurGC.FillColor.Alpha == rNextGC.FillColor.Alpha
                     )
                 {
                     pCur->updateGeometryWith( pNext );
diff --git a/sdext/source/pdfimport/tree/writertreevisiting.hxx 
b/sdext/source/pdfimport/tree/writertreevisiting.hxx
index 1c1507f13349..e473c27372e6 100644
--- a/sdext/source/pdfimport/tree/writertreevisiting.hxx
+++ b/sdext/source/pdfimport/tree/writertreevisiting.hxx
@@ -24,6 +24,8 @@
 
 #include <pdfihelper.hxx>
 
+#include <com/sun/star/i18n/XCharacterClassification.hpp>
+
 namespace pdfi
 {
     struct DrawElement;
@@ -80,12 +82,14 @@ namespace pdfi
     class WriterXmlEmitter : public ElementTreeVisitor
     {
     private:
+        css::uno::Reference< css::i18n::XCharacterClassification > mxCharClass;
         EmitContext& m_rEmitContext ;
         static void fillFrameProps( DrawElement&       rElem,
                              PropertyMap&       rProps,
                              const EmitContext& rEmitContext );
 
     public:
+        const css::uno::Reference<css::i18n::XCharacterClassification >& 
GetCharacterClassification();
         explicit WriterXmlEmitter(EmitContext& rEmitContext) :
             m_rEmitContext(rEmitContext)
         {}

Reply via email to