vcl/inc/pdf/pdfwriter_impl.hxx                        |   14 -
 vcl/qa/cppunit/pdfexport/data/FilledUpForm.pdf        |binary
 vcl/qa/cppunit/pdfexport/data/FilledUpForm_Source.odt |binary
 vcl/qa/cppunit/pdfexport/pdfexport2.cxx               |   66 +++++
 vcl/source/gdi/pdfwriter_impl.cxx                     |  203 +++++++++++-------
 5 files changed, 210 insertions(+), 73 deletions(-)

New commits:
commit fb08f549d9f8d0282585500f2b9faba809165f94
Author:     Tomaž Vajngerl <tomaz.vajng...@collabora.co.uk>
AuthorDate: Tue Dec 3 23:32:47 2024 +0900
Commit:     Miklos Vajna <vmik...@collabora.com>
CommitDate: Mon Dec 9 15:55:34 2024 +0100

    pdf: fix saving external PDF with form fields
    
    When opening a PDF with filled forms with PDFium import and then
    save/exprot the PDF again, it should retain the forms. Before
    this patch it didn't retain the forms.
    
    This patch copies the forms / widget annotations from the external
    PDF into the newly created PDF.
    
    Change-Id: I3d7fdcfe2c4ec0f716425c349b0bf621809fd249
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/177765
    Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoff...@gmail.com>
    Reviewed-by: Miklos Vajna <vmik...@collabora.com>

diff --git a/vcl/inc/pdf/pdfwriter_impl.hxx b/vcl/inc/pdf/pdfwriter_impl.hxx
index 7a90b61d56cf..a35f6f0b5458 100644
--- a/vcl/inc/pdf/pdfwriter_impl.hxx
+++ b/vcl/inc/pdf/pdfwriter_impl.hxx
@@ -479,6 +479,11 @@ struct PDFScreen : public PDFAnnotation
     }
 };
 
+struct PDFWidgetCopy
+{
+    sal_Int32 m_nObject = -1;
+};
+
 struct PDFWidget : public PDFAnnotation
 {
     PDFWriter::WidgetType       m_eType;
@@ -759,9 +764,10 @@ private:
     /* structure elements (object ids) that should have ID */
     std::unordered_set<sal_Int32> m_StructElemObjsWithID;
 
-    /* contains all widgets used in the PDF
-     */
-    std::vector<PDFWidget>              m_aWidgets;
+    /* contains all widgets used in the PDF */
+    std::vector<PDFWidget> m_aWidgets;
+    std::vector<PDFWidgetCopy> m_aCopiedWidgets;
+
     /* maps radio group id to index of radio group control in m_aWidgets */
     std::map< sal_Int32, sal_Int32 >    m_aRadioGroupWidgets;
     /* unordered_map for field names, used to ensure unique field names */
@@ -882,6 +888,8 @@ private:
     /// Writes the form XObject proxy for the image.
     void writeReferenceXObject(const ReferenceXObjectEmit& rEmit);
 
+    void mergeAnnotationsFromExternalPage(filter::PDFObjectElement* pPage,  
std::map<sal_Int32, sal_Int32>& rCopiedResourcesMap);
+
     /* tries to find the bitmap by its id and returns its emit data if exists,
        else creates a new emit data block */
     const BitmapEmit& createBitmapEmit( const BitmapEx& rBitmapEx, const 
Graphic& rGraphic, std::list<BitmapEmit>& rBitmaps, ResourceDict& 
rResourceDict, std::list<StreamRedirect>& rOutputStreams );
diff --git a/vcl/qa/cppunit/pdfexport/data/FilledUpForm.pdf 
b/vcl/qa/cppunit/pdfexport/data/FilledUpForm.pdf
new file mode 100644
index 000000000000..7032f8099411
Binary files /dev/null and b/vcl/qa/cppunit/pdfexport/data/FilledUpForm.pdf 
differ
diff --git a/vcl/qa/cppunit/pdfexport/data/FilledUpForm_Source.odt 
b/vcl/qa/cppunit/pdfexport/data/FilledUpForm_Source.odt
new file mode 100644
index 000000000000..42efe7f45dd7
Binary files /dev/null and 
b/vcl/qa/cppunit/pdfexport/data/FilledUpForm_Source.odt differ
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx 
b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx
index bc23346a955d..3dc780f2cce7 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx
@@ -5026,6 +5026,72 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest2, 
testRexportXnViewColorspace)
     CPPUNIT_ASSERT_EQUAL("DeviceRGB"_ostr, pColorSpaceElement->GetValue());
 }
 
+CPPUNIT_TEST_FIXTURE(PdfExportTest2, testFormRoundtrip)
+{
+    // Loads and saves a PDF with filled forms. This checks the forms survive 
the round-trip.
+
+    // We need to enable PDFium import (and make sure to disable after the 
test)
+    bool bResetEnvVar = false;
+    if (getenv("LO_IMPORT_USE_PDFIUM") == nullptr)
+    {
+        bResetEnvVar = true;
+        osl_setEnvironment(u"LO_IMPORT_USE_PDFIUM"_ustr.pData, 
u"1"_ustr.pData);
+    }
+    comphelper::ScopeGuard aPDFiumEnvVarGuard([&]() {
+        if (bResetEnvVar)
+            osl_clearEnvironment(u"LO_IMPORT_USE_PDFIUM"_ustr.pData);
+    });
+
+    // Need to properly set the PDF export options
+    aMediaDescriptor["FilterName"] <<= OUString("draw_pdf_Export");
+    uno::Sequence<beans::PropertyValue> aFilterData(
+        comphelper::InitPropertySequence({ { "UseTaggedPDF", uno::Any(true) } 
}));
+    aMediaDescriptor["FilterData"] <<= aFilterData;
+
+    saveAsPDF(u"FilledUpForm.pdf");
+
+    // Parse the round-tripped document with PDFium
+    auto pPdfDocument = parsePDFExport();
+    // Should be 1 page
+    CPPUNIT_ASSERT_EQUAL(1, pPdfDocument->getPageCount());
+    std::unique_ptr<vcl::pdf::PDFiumPage> pPage = pPdfDocument->openPage(0);
+    std::unique_ptr<vcl::pdf::PDFiumPageObject> pPageObject = 
pPage->getObject(1);
+
+    // 5 annotations means 5 form fields
+    CPPUNIT_ASSERT_EQUAL(5, pPage->getAnnotationCount());
+
+    // Check each form
+    {
+        std::unique_ptr<vcl::pdf::PDFiumAnnotation> pAnnotation = 
pPage->getAnnotation(0);
+        CPPUNIT_ASSERT_EQUAL(vcl::pdf::PDFFormFieldType::CheckBox,
+                             
pAnnotation->getFormFieldType(pPdfDocument.get()));
+    }
+
+    {
+        std::unique_ptr<vcl::pdf::PDFiumAnnotation> pAnnotation = 
pPage->getAnnotation(1);
+        CPPUNIT_ASSERT_EQUAL(vcl::pdf::PDFFormFieldType::ComboBox,
+                             
pAnnotation->getFormFieldType(pPdfDocument.get()));
+    }
+
+    {
+        std::unique_ptr<vcl::pdf::PDFiumAnnotation> pAnnotation = 
pPage->getAnnotation(2);
+        CPPUNIT_ASSERT_EQUAL(vcl::pdf::PDFFormFieldType::TextField,
+                             
pAnnotation->getFormFieldType(pPdfDocument.get()));
+    }
+
+    {
+        std::unique_ptr<vcl::pdf::PDFiumAnnotation> pAnnotation = 
pPage->getAnnotation(3);
+        CPPUNIT_ASSERT_EQUAL(vcl::pdf::PDFFormFieldType::TextField,
+                             
pAnnotation->getFormFieldType(pPdfDocument.get()));
+    }
+
+    {
+        std::unique_ptr<vcl::pdf::PDFiumAnnotation> pAnnotation = 
pPage->getAnnotation(4);
+        CPPUNIT_ASSERT_EQUAL(vcl::pdf::PDFFormFieldType::TextField,
+                             
pAnnotation->getFormFieldType(pPdfDocument.get()));
+    }
+}
+
 } // end anonymous namespace
 
 CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx 
b/vcl/source/gdi/pdfwriter_impl.cxx
index 66edafd9bc4c..3c462434292f 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -5617,39 +5617,46 @@ bool PDFWriterImpl::emitCatalog()
     {
         aLine.append( "/MarkInfo<</Marked true>>
" );
     }
-    if( !m_aWidgets.empty() )
+
+    if (!m_aWidgets.empty() || !m_aCopiedWidgets.empty())
     {
-        aLine.append( "/AcroForm<</Fields[
" );
-        int nWidgets = m_aWidgets.size();
-        int nOut = 0;
-        for( int j = 0; j < nWidgets; j++ )
+        aLine.append("/AcroForm");
+        aLine.append("<<");
+        aLine.append("/Fields");
+        aLine.append("[ ");
+
+        for (auto const& rWidget : m_aWidgets)
         {
             // output only root fields
-            if( m_aWidgets[j].m_nParent < 1 )
+            if (rWidget.m_nParent < 1)
             {
-                aLine.append( m_aWidgets[j].m_nObject );
-                aLine.append( (nOut++ % 5)==4 ? " 0 R
" : " 0 R " );
+                appendObjectReference(rWidget.m_nObject, aLine);
             }
         }
-        aLine.append( "
]" );
+        // Add widgets that were copied from an external PDF
+        for (auto const& rCopiedWidget : m_aCopiedWidgets)
+        {
+            appendObjectReference(rCopiedWidget.m_nObject, aLine);
+        }
+        aLine.append(" ]
");
 
+        bool bSigned = false;
 #if HAVE_FEATURE_NSS
         if (m_nSignatureObject != -1)
-            aLine.append( "/SigFlags 3");
+        {
+            aLine.append("/SigFlags 3 ");
+            bSigned = true;
+        }
 #endif
+        aLine.append("/DR ");
+        appendObjectReference(getResourceDictObj(), aLine);
 
-        aLine.append( "/DR " );
-        aLine.append( getResourceDictObj() );
-        aLine.append( " 0 R" );
-        // NeedAppearances must not be used if PDF is signed
-        if (m_nPDFA_Version > 0
-#if HAVE_FEATURE_NSS
-            || ( m_nSignatureObject != -1 )
-#endif
-            )
-            aLine.append( ">>
" );
-        else
-            aLine.append( "/NeedAppearances true>>
" );
+        // NeedAppearances must not be used if PDF is signed, PDF/A is used or
+        // we have copied widgets (can't guarantee we have appearance streams 
in this case)
+        if (m_nPDFA_Version == 0 && !bSigned && m_aCopiedWidgets.empty())
+            aLine.append("/NeedAppearances true ");
+
+        aLine.append(">>
");
     }
 
     //check if there is a Metadata object
@@ -9288,6 +9295,14 @@ void PDFWriterImpl::writeReferenceXObject(const 
ReferenceXObjectEmit& rEmit)
             return;
         }
 
+        // Get the copied resource map, so we can use that to skip objects we 
already copied
+        auto& rCopiedResourcesMap = rExternalPDFStream.getCopiedResources();
+
+        // Add page mapping to the copied resources map.
+        // Needed if we reference the current page and we want to prevent 
copying the page
+        // if it is referenced.
+        rCopiedResourcesMap.emplace(pPage->GetObjectValue(), 
m_aPages.back().m_nPageObject);
+
         double aOrigin[2] = { 0.0, 0.0 };
 
         // tdf#160714 use crop box for bounds of embedded PDF object
@@ -9334,52 +9349,8 @@ void PDFWriterImpl::writeReferenceXObject(const 
ReferenceXObjectEmit& rEmit)
             return;
         }
 
-        // Merge link annotations from pPage to our page.
-        std::vector<filter::PDFObjectElement*> aAnnots;
-        if (auto pArray = 
dynamic_cast<filter::PDFArrayElement*>(pPage->Lookup("Annots"_ostr)))
-        {
-            for (const auto pElement : pArray->GetElements())
-            {
-                auto pReference = 
dynamic_cast<filter::PDFReferenceElement*>(pElement);
-                if (!pReference)
-                {
-                    continue;
-                }
-
-                filter::PDFObjectElement* pObject = pReference->LookupObject();
-                if (!pObject)
-                {
-                    continue;
-                }
-
-                auto pType = 
dynamic_cast<filter::PDFNameElement*>(pObject->Lookup("Type"_ostr));
-                if (!pType || pType->GetValue() != "Annot")
-                {
-                    continue;
-                }
-
-                auto pSubtype = 
dynamic_cast<filter::PDFNameElement*>(pObject->Lookup("Subtype"_ostr));
-                if (!pSubtype || pSubtype->GetValue() != "Link")
-                {
-                    continue;
-                }
-
-                // Reference to a link annotation object, remember it.
-                aAnnots.push_back(pObject);
-            }
-        }
-        if (!aAnnots.empty())
-        {
-            PDFObjectCopier aCopier(*this);
-            SvMemoryStream& rDocBuffer = pPage->GetDocument().GetEditBuffer();
-            std::map<sal_Int32, sal_Int32> aMap;
-            for (const auto& pAnnot : aAnnots)
-            {
-                // Copy over the annotation and refer to its new id.
-                sal_Int32 nNewId = aCopier.copyExternalResource(rDocBuffer, 
*pAnnot, aMap);
-                m_aPages.back().m_aAnnotations.push_back(nNewId);
-            }
-        }
+        // Merge link and widget annotations from pPage to our page.
+        mergeAnnotationsFromExternalPage(pPage, rCopiedResourcesMap);
 
         nWrappedFormObject = createObject();
         // Write the form XObject wrapped below. This is a separate object from
@@ -9430,8 +9401,7 @@ void PDFWriterImpl::writeReferenceXObject(const 
ReferenceXObjectEmit& rEmit)
         }
 
         PDFObjectCopier aCopier(*this);
-        auto & rResources = rExternalPDFStream.getCopiedResources();
-        aCopier.copyPageResources(pPage, aLine, rResources);
+        aCopier.copyPageResources(pPage, aLine, rCopiedResourcesMap);
 
         aLine.append(" /BBox [ ");
         aLine.append(aOrigin[0]);
@@ -9583,6 +9553,99 @@ void PDFWriterImpl::writeReferenceXObject(const 
ReferenceXObjectEmit& rEmit)
     CHECK_RETURN2(writeBuffer(aLine));
 }
 
+namespace
+{
+
+sal_Int32 getRootParent(filter::PDFObjectElement* pObject)
+{
+    auto* pReference = 
dynamic_cast<filter::PDFReferenceElement*>(pObject->Lookup("Parent"_ostr));
+    if (!pReference)
+        return pObject->GetObjectValue();
+
+    auto* pParent = pReference->LookupObject();
+    return getRootParent(pParent);
+}
+
+} // end anonymous
+
+void PDFWriterImpl::mergeAnnotationsFromExternalPage(filter::PDFObjectElement* 
pPage,  std::map<sal_Int32, sal_Int32>& rCopiedResourcesMap)
+{
+    auto* pResult = pPage->Lookup("Annots"_ostr);
+    filter::PDFArrayElement* pArray = nullptr;
+    // If the Annots array is a reference - get the array from the referenced 
object
+    auto pAnnotsReference = 
dynamic_cast<filter::PDFReferenceElement*>(pResult);
+    if (pAnnotsReference)
+    {
+        filter::PDFObjectElement* pObject = pAnnotsReference->LookupObject();
+        pArray = pObject->GetArray();
+    }
+    else
+    {
+        // Not a reference so is it an array
+        pArray = dynamic_cast<filter::PDFArrayElement*>(pResult);
+    }
+
+    // Have we found our /Annots array?
+    if (!pArray)
+        return;
+
+    std::unordered_set<sal_Int32> aAlreadyCopied;
+    PDFObjectCopier aCopier(*this);
+    SvMemoryStream& rDocBuffer = pPage->GetDocument().GetEditBuffer();
+
+    for (const auto pElement : pArray->GetElements())
+    {
+        auto pReference = dynamic_cast<filter::PDFReferenceElement*>(pElement);
+        if (!pReference)
+            continue;
+
+        filter::PDFObjectElement* pObject = pReference->LookupObject();
+        if (!pObject)
+            continue;
+
+        // Get the /Type and the /Subtype
+        auto pType = 
dynamic_cast<filter::PDFNameElement*>(pObject->Lookup("Type"_ostr));
+        auto pSubtype = 
dynamic_cast<filter::PDFNameElement*>(pObject->Lookup("Subtype"_ostr));
+
+        // Is it a /Annot we want to copy?
+        if (pType && pType->GetValue() == "Annot" && pSubtype)
+        {
+            bool bIsLink = pSubtype->GetValue() == "Link";
+            bool bIsWidget = pSubtype->GetValue() == "Widget";
+
+            // is link or widget
+            if (!bIsLink && !bIsWidget)
+                continue;
+
+            // Copy over the annotation and refer to its new id.
+            sal_Int32 nNewId = aCopier.copyExternalResource(rDocBuffer, 
*pObject, rCopiedResourcesMap);
+            m_aPages.back().m_aAnnotations.push_back(nNewId);
+
+            if (!bIsWidget)
+                continue;
+
+            // Find the root
+            sal_Int32 nRootID = getRootParent(pObject);
+
+            auto aIterator = rCopiedResourcesMap.find(nRootID);
+            if (aIterator == rCopiedResourcesMap.end()) // Can't find the 
mapped ID ?
+                continue;
+
+            nNewId = aIterator->second;
+
+            // Ignore if we added the ID already
+            if (aAlreadyCopied.find(nNewId) == aAlreadyCopied.end())
+            {
+                // Add new entry into copied widgets vector
+                auto& rCopiedWidget = m_aCopiedWidgets.emplace_back();
+                rCopiedWidget.m_nObject = nNewId;
+                aAlreadyCopied.emplace(nNewId);
+            }
+        }
+    }
+
+}
+
 bool PDFWriterImpl::writeBitmapObject( const BitmapEmit& rObject, bool bMask )
 {
     if (rObject.m_aReferenceXObject.hasExternalPDFData() && 
!m_aContext.UseReferenceXObject)

Reply via email to