sdext/source/pdfimport/test/testdocs/testSpace.pdf |binary
 sdext/source/pdfimport/test/tests.cxx              |   38 +++++++++++++++++++++
 sdext/source/pdfimport/tree/writertreevisiting.cxx |   28 +++++++++++++--
 3 files changed, 63 insertions(+), 3 deletions(-)

New commits:
commit c2e2997f452b93b400d541c2d0b2ee396a889007
Author:     Kevin Suo <suokunl...@126.com>
AuthorDate: Wed Oct 19 19:08:27 2022 +0800
Commit:     Noel Grandin <noel.gran...@collabora.co.uk>
CommitDate: Tue Nov 8 08:40:29 2022 +0100

    sdext.pdfimport - Wirter: add handling for continuous space characters
    
    This was done for Draw in sdext/source/pdfimport/tree/drawtreevisiting.cxx,
    but was not done for Writer. Without this, continuous spaces in PDF will
    show only one space on pdfimport using the Writer pdfimport filter.
    
    Change-Id: I2279d9b1750e07f5743aeba80a3fd553bc037d13
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/141527
    Tested-by: Jenkins
    Reviewed-by: Noel Grandin <noel.gran...@collabora.co.uk>

diff --git a/sdext/source/pdfimport/test/testdocs/testSpace.pdf 
b/sdext/source/pdfimport/test/testdocs/testSpace.pdf
new file mode 100644
index 000000000000..3c94f31ea15b
Binary files /dev/null and b/sdext/source/pdfimport/test/testdocs/testSpace.pdf 
differ
diff --git a/sdext/source/pdfimport/test/tests.cxx 
b/sdext/source/pdfimport/test/tests.cxx
index 71661ae9e6d5..786815941445 100644
--- a/sdext/source/pdfimport/test/tests.cxx
+++ b/sdext/source/pdfimport/test/tests.cxx
@@ -841,6 +841,43 @@ namespace
 #endif
         }
 
+        void testSpaces()
+        {
+#if HAVE_FEATURE_POPPLER
+            rtl::Reference<pdfi::PDFIRawAdaptor> xAdaptor(new 
pdfi::PDFIRawAdaptor(OUString(), getComponentContext()));
+            xAdaptor->setTreeVisitorFactory(createWriterTreeVisitorFactory());
+
+            OString aOutput;
+            
xAdaptor->odfConvert(m_directories.getURLFromSrc(u"/sdext/source/pdfimport/test/testdocs/testSpace.pdf"),
+                    new OutputWrapString(aOutput),
+                    nullptr);
+            xmlDocUniquePtr pXmlDoc(xmlParseDoc(reinterpret_cast<xmlChar const 
*>(aOutput.getStr())));
+
+            // Space test: there are 10 spaces, each space is expressed as a 
<text:s text:c="1" ...>,
+            // thus the 10th text:s should exist and the attribute "text:c" 
should be "1".
+            OString xpath = 
"//draw:frame[@draw:z-index='1'][1]/draw:text-box/text:p/text:span/text:s[10]";
+            OUString  sContent = getXPath(pXmlDoc, xpath, "c");
+            CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString("1"), 
sContent);
+
+            // Tab test: there are 10 tabs. Text before and after the tabs are 
shown in different draw frames.
+            // With the Liberation Serif font, the horizontal position of the 
first frame is 20.03mm and the
+            // second frame is 94.12mm.
+            xpath = "//draw:frame[@draw:z-index='2'][1]";
+            sContent = getXPath(pXmlDoc, xpath, "transform");
+            CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), 
OUString("translate( 20.03mm 25.05mm )"), sContent);
+            xpath = "//draw:frame[@draw:z-index='3'][1]";
+            sContent = getXPath(pXmlDoc, xpath, "transform");
+            CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), 
OUString("translate( 94.12mm 25.05mm )"), sContent);
+
+            // Non-breaking space test: there are 10 NBSpaces, which are 
treated as the same as normal space in PDF,
+            // thus each is expressed as a <text:s text:c="1" ...>.
+            // The 10th text:s should exist and the attribute "text:c" should 
be "1".
+            xpath = 
"//draw:frame[@draw:z-index='4'][1]/draw:text-box/text:p/text:span/text:s[10]";
+            sContent = getXPath(pXmlDoc, xpath, "c");
+            CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString("1"), 
sContent);
+#endif
+        }
+
         CPPUNIT_TEST_SUITE(PDFITest);
         CPPUNIT_TEST(testXPDFParser);
         CPPUNIT_TEST(testOdfWriterExport);
@@ -853,6 +890,7 @@ namespace
         CPPUNIT_TEST(testTdf78427_FontWeight_MyraidProSemibold);
         CPPUNIT_TEST(testTdf143959_nameFromFontFile);
         CPPUNIT_TEST(testTdf104597_textrun);
+        CPPUNIT_TEST(testSpaces);
         CPPUNIT_TEST_SUITE_END();
     };
 
diff --git a/sdext/source/pdfimport/tree/writertreevisiting.cxx 
b/sdext/source/pdfimport/tree/writertreevisiting.cxx
index deabf365088b..510689be1588 100644
--- a/sdext/source/pdfimport/tree/writertreevisiting.cxx
+++ b/sdext/source/pdfimport/tree/writertreevisiting.cxx
@@ -81,7 +81,11 @@ void WriterXmlEmitter::visit( TextElement& elem, const 
std::list< std::unique_pt
     if( elem.Text.isEmpty() )
         return;
 
-    PropertyMap aProps;
+    PropertyMap aProps = {};
+    const sal_Unicode strSpace = 0x0020;
+    const sal_Unicode strNbSpace = 0x00A0;
+    const sal_Unicode tabSpace = 0x0009;
+
     if( elem.StyleId != -1 )
     {
         aProps[ OUString( "text:style-name" ) ] =
@@ -111,8 +115,26 @@ void WriterXmlEmitter::visit( TextElement& elem, const 
std::list< std::unique_pt
         str = ::comphelper::string::reverseString(str);
 
     m_rEmitContext.rEmitter.beginTag( "text:span", aProps );
-    // TODO: reserve continuous spaces, see DrawXmlEmitter::visit( 
TextElement& elem...)
-    m_rEmitContext.rEmitter.write(str);
+
+    sal_Unicode strToken;
+    for (int i = 0; i < elem.Text.getLength(); i++)
+    {
+        strToken = str[i];
+        if (strToken == strSpace || strToken == strNbSpace)
+        {
+            aProps["text:c"] = "1";
+            m_rEmitContext.rEmitter.beginTag("text:s", aProps);
+            m_rEmitContext.rEmitter.endTag("text:s");
+        }
+        else if (strToken == tabSpace)
+        {
+            m_rEmitContext.rEmitter.beginTag("text:tab", aProps);
+            m_rEmitContext.rEmitter.endTag("text:tab");
+        }
+        else
+            m_rEmitContext.rEmitter.write(OUString(strToken));
+    }
+
     auto this_it = elem.Children.begin();
     while( this_it != elem.Children.end() && this_it->get() != &elem )
     {

Reply via email to