vcl/inc/pdf/pdfwriter_impl.hxx | 14 - vcl/qa/cppunit/pdfexport/data/FilledUpForm.pdf |binary vcl/qa/cppunit/pdfexport/data/FilledUpForm_Source.odt |binary vcl/qa/cppunit/pdfexport/pdfexport2.cxx | 66 +++++ vcl/source/gdi/pdfwriter_impl.cxx | 203 +++++++++++------- 5 files changed, 210 insertions(+), 73 deletions(-)
New commits: commit fb08f549d9f8d0282585500f2b9faba809165f94 Author: Tomaž Vajngerl <tomaz.vajng...@collabora.co.uk> AuthorDate: Tue Dec 3 23:32:47 2024 +0900 Commit: Miklos Vajna <vmik...@collabora.com> CommitDate: Mon Dec 9 15:55:34 2024 +0100 pdf: fix saving external PDF with form fields When opening a PDF with filled forms with PDFium import and then save/exprot the PDF again, it should retain the forms. Before this patch it didn't retain the forms. This patch copies the forms / widget annotations from the external PDF into the newly created PDF. Change-Id: I3d7fdcfe2c4ec0f716425c349b0bf621809fd249 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/177765 Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoff...@gmail.com> Reviewed-by: Miklos Vajna <vmik...@collabora.com> diff --git a/vcl/inc/pdf/pdfwriter_impl.hxx b/vcl/inc/pdf/pdfwriter_impl.hxx index 7a90b61d56cf..a35f6f0b5458 100644 --- a/vcl/inc/pdf/pdfwriter_impl.hxx +++ b/vcl/inc/pdf/pdfwriter_impl.hxx @@ -479,6 +479,11 @@ struct PDFScreen : public PDFAnnotation } }; +struct PDFWidgetCopy +{ + sal_Int32 m_nObject = -1; +}; + struct PDFWidget : public PDFAnnotation { PDFWriter::WidgetType m_eType; @@ -759,9 +764,10 @@ private: /* structure elements (object ids) that should have ID */ std::unordered_set<sal_Int32> m_StructElemObjsWithID; - /* contains all widgets used in the PDF - */ - std::vector<PDFWidget> m_aWidgets; + /* contains all widgets used in the PDF */ + std::vector<PDFWidget> m_aWidgets; + std::vector<PDFWidgetCopy> m_aCopiedWidgets; + /* maps radio group id to index of radio group control in m_aWidgets */ std::map< sal_Int32, sal_Int32 > m_aRadioGroupWidgets; /* unordered_map for field names, used to ensure unique field names */ @@ -882,6 +888,8 @@ private: /// Writes the form XObject proxy for the image. void writeReferenceXObject(const ReferenceXObjectEmit& rEmit); + void mergeAnnotationsFromExternalPage(filter::PDFObjectElement* pPage, std::map<sal_Int32, sal_Int32>& rCopiedResourcesMap); + /* tries to find the bitmap by its id and returns its emit data if exists, else creates a new emit data block */ const BitmapEmit& createBitmapEmit( const BitmapEx& rBitmapEx, const Graphic& rGraphic, std::list<BitmapEmit>& rBitmaps, ResourceDict& rResourceDict, std::list<StreamRedirect>& rOutputStreams ); diff --git a/vcl/qa/cppunit/pdfexport/data/FilledUpForm.pdf b/vcl/qa/cppunit/pdfexport/data/FilledUpForm.pdf new file mode 100644 index 000000000000..7032f8099411 Binary files /dev/null and b/vcl/qa/cppunit/pdfexport/data/FilledUpForm.pdf differ diff --git a/vcl/qa/cppunit/pdfexport/data/FilledUpForm_Source.odt b/vcl/qa/cppunit/pdfexport/data/FilledUpForm_Source.odt new file mode 100644 index 000000000000..42efe7f45dd7 Binary files /dev/null and b/vcl/qa/cppunit/pdfexport/data/FilledUpForm_Source.odt differ diff --git a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx index bc23346a955d..3dc780f2cce7 100644 --- a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx +++ b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx @@ -5026,6 +5026,72 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest2, testRexportXnViewColorspace) CPPUNIT_ASSERT_EQUAL("DeviceRGB"_ostr, pColorSpaceElement->GetValue()); } +CPPUNIT_TEST_FIXTURE(PdfExportTest2, testFormRoundtrip) +{ + // Loads and saves a PDF with filled forms. This checks the forms survive the round-trip. + + // We need to enable PDFium import (and make sure to disable after the test) + bool bResetEnvVar = false; + if (getenv("LO_IMPORT_USE_PDFIUM") == nullptr) + { + bResetEnvVar = true; + osl_setEnvironment(u"LO_IMPORT_USE_PDFIUM"_ustr.pData, u"1"_ustr.pData); + } + comphelper::ScopeGuard aPDFiumEnvVarGuard([&]() { + if (bResetEnvVar) + osl_clearEnvironment(u"LO_IMPORT_USE_PDFIUM"_ustr.pData); + }); + + // Need to properly set the PDF export options + aMediaDescriptor["FilterName"] <<= OUString("draw_pdf_Export"); + uno::Sequence<beans::PropertyValue> aFilterData( + comphelper::InitPropertySequence({ { "UseTaggedPDF", uno::Any(true) } })); + aMediaDescriptor["FilterData"] <<= aFilterData; + + saveAsPDF(u"FilledUpForm.pdf"); + + // Parse the round-tripped document with PDFium + auto pPdfDocument = parsePDFExport(); + // Should be 1 page + CPPUNIT_ASSERT_EQUAL(1, pPdfDocument->getPageCount()); + std::unique_ptr<vcl::pdf::PDFiumPage> pPage = pPdfDocument->openPage(0); + std::unique_ptr<vcl::pdf::PDFiumPageObject> pPageObject = pPage->getObject(1); + + // 5 annotations means 5 form fields + CPPUNIT_ASSERT_EQUAL(5, pPage->getAnnotationCount()); + + // Check each form + { + std::unique_ptr<vcl::pdf::PDFiumAnnotation> pAnnotation = pPage->getAnnotation(0); + CPPUNIT_ASSERT_EQUAL(vcl::pdf::PDFFormFieldType::CheckBox, + pAnnotation->getFormFieldType(pPdfDocument.get())); + } + + { + std::unique_ptr<vcl::pdf::PDFiumAnnotation> pAnnotation = pPage->getAnnotation(1); + CPPUNIT_ASSERT_EQUAL(vcl::pdf::PDFFormFieldType::ComboBox, + pAnnotation->getFormFieldType(pPdfDocument.get())); + } + + { + std::unique_ptr<vcl::pdf::PDFiumAnnotation> pAnnotation = pPage->getAnnotation(2); + CPPUNIT_ASSERT_EQUAL(vcl::pdf::PDFFormFieldType::TextField, + pAnnotation->getFormFieldType(pPdfDocument.get())); + } + + { + std::unique_ptr<vcl::pdf::PDFiumAnnotation> pAnnotation = pPage->getAnnotation(3); + CPPUNIT_ASSERT_EQUAL(vcl::pdf::PDFFormFieldType::TextField, + pAnnotation->getFormFieldType(pPdfDocument.get())); + } + + { + std::unique_ptr<vcl::pdf::PDFiumAnnotation> pAnnotation = pPage->getAnnotation(4); + CPPUNIT_ASSERT_EQUAL(vcl::pdf::PDFFormFieldType::TextField, + pAnnotation->getFormFieldType(pPdfDocument.get())); + } +} + } // end anonymous namespace CPPUNIT_PLUGIN_IMPLEMENT(); diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx index 66edafd9bc4c..3c462434292f 100644 --- a/vcl/source/gdi/pdfwriter_impl.cxx +++ b/vcl/source/gdi/pdfwriter_impl.cxx @@ -5617,39 +5617,46 @@ bool PDFWriterImpl::emitCatalog() { aLine.append( "/MarkInfo<</Marked true>> " ); } - if( !m_aWidgets.empty() ) + + if (!m_aWidgets.empty() || !m_aCopiedWidgets.empty()) { - aLine.append( "/AcroForm<</Fields[ " ); - int nWidgets = m_aWidgets.size(); - int nOut = 0; - for( int j = 0; j < nWidgets; j++ ) + aLine.append("/AcroForm"); + aLine.append("<<"); + aLine.append("/Fields"); + aLine.append("[ "); + + for (auto const& rWidget : m_aWidgets) { // output only root fields - if( m_aWidgets[j].m_nParent < 1 ) + if (rWidget.m_nParent < 1) { - aLine.append( m_aWidgets[j].m_nObject ); - aLine.append( (nOut++ % 5)==4 ? " 0 R " : " 0 R " ); + appendObjectReference(rWidget.m_nObject, aLine); } } - aLine.append( " ]" ); + // Add widgets that were copied from an external PDF + for (auto const& rCopiedWidget : m_aCopiedWidgets) + { + appendObjectReference(rCopiedWidget.m_nObject, aLine); + } + aLine.append(" ] "); + bool bSigned = false; #if HAVE_FEATURE_NSS if (m_nSignatureObject != -1) - aLine.append( "/SigFlags 3"); + { + aLine.append("/SigFlags 3 "); + bSigned = true; + } #endif + aLine.append("/DR "); + appendObjectReference(getResourceDictObj(), aLine); - aLine.append( "/DR " ); - aLine.append( getResourceDictObj() ); - aLine.append( " 0 R" ); - // NeedAppearances must not be used if PDF is signed - if (m_nPDFA_Version > 0 -#if HAVE_FEATURE_NSS - || ( m_nSignatureObject != -1 ) -#endif - ) - aLine.append( ">> " ); - else - aLine.append( "/NeedAppearances true>> " ); + // NeedAppearances must not be used if PDF is signed, PDF/A is used or + // we have copied widgets (can't guarantee we have appearance streams in this case) + if (m_nPDFA_Version == 0 && !bSigned && m_aCopiedWidgets.empty()) + aLine.append("/NeedAppearances true "); + + aLine.append(">> "); } //check if there is a Metadata object @@ -9288,6 +9295,14 @@ void PDFWriterImpl::writeReferenceXObject(const ReferenceXObjectEmit& rEmit) return; } + // Get the copied resource map, so we can use that to skip objects we already copied + auto& rCopiedResourcesMap = rExternalPDFStream.getCopiedResources(); + + // Add page mapping to the copied resources map. + // Needed if we reference the current page and we want to prevent copying the page + // if it is referenced. + rCopiedResourcesMap.emplace(pPage->GetObjectValue(), m_aPages.back().m_nPageObject); + double aOrigin[2] = { 0.0, 0.0 }; // tdf#160714 use crop box for bounds of embedded PDF object @@ -9334,52 +9349,8 @@ void PDFWriterImpl::writeReferenceXObject(const ReferenceXObjectEmit& rEmit) return; } - // Merge link annotations from pPage to our page. - std::vector<filter::PDFObjectElement*> aAnnots; - if (auto pArray = dynamic_cast<filter::PDFArrayElement*>(pPage->Lookup("Annots"_ostr))) - { - for (const auto pElement : pArray->GetElements()) - { - auto pReference = dynamic_cast<filter::PDFReferenceElement*>(pElement); - if (!pReference) - { - continue; - } - - filter::PDFObjectElement* pObject = pReference->LookupObject(); - if (!pObject) - { - continue; - } - - auto pType = dynamic_cast<filter::PDFNameElement*>(pObject->Lookup("Type"_ostr)); - if (!pType || pType->GetValue() != "Annot") - { - continue; - } - - auto pSubtype = dynamic_cast<filter::PDFNameElement*>(pObject->Lookup("Subtype"_ostr)); - if (!pSubtype || pSubtype->GetValue() != "Link") - { - continue; - } - - // Reference to a link annotation object, remember it. - aAnnots.push_back(pObject); - } - } - if (!aAnnots.empty()) - { - PDFObjectCopier aCopier(*this); - SvMemoryStream& rDocBuffer = pPage->GetDocument().GetEditBuffer(); - std::map<sal_Int32, sal_Int32> aMap; - for (const auto& pAnnot : aAnnots) - { - // Copy over the annotation and refer to its new id. - sal_Int32 nNewId = aCopier.copyExternalResource(rDocBuffer, *pAnnot, aMap); - m_aPages.back().m_aAnnotations.push_back(nNewId); - } - } + // Merge link and widget annotations from pPage to our page. + mergeAnnotationsFromExternalPage(pPage, rCopiedResourcesMap); nWrappedFormObject = createObject(); // Write the form XObject wrapped below. This is a separate object from @@ -9430,8 +9401,7 @@ void PDFWriterImpl::writeReferenceXObject(const ReferenceXObjectEmit& rEmit) } PDFObjectCopier aCopier(*this); - auto & rResources = rExternalPDFStream.getCopiedResources(); - aCopier.copyPageResources(pPage, aLine, rResources); + aCopier.copyPageResources(pPage, aLine, rCopiedResourcesMap); aLine.append(" /BBox [ "); aLine.append(aOrigin[0]); @@ -9583,6 +9553,99 @@ void PDFWriterImpl::writeReferenceXObject(const ReferenceXObjectEmit& rEmit) CHECK_RETURN2(writeBuffer(aLine)); } +namespace +{ + +sal_Int32 getRootParent(filter::PDFObjectElement* pObject) +{ + auto* pReference = dynamic_cast<filter::PDFReferenceElement*>(pObject->Lookup("Parent"_ostr)); + if (!pReference) + return pObject->GetObjectValue(); + + auto* pParent = pReference->LookupObject(); + return getRootParent(pParent); +} + +} // end anonymous + +void PDFWriterImpl::mergeAnnotationsFromExternalPage(filter::PDFObjectElement* pPage, std::map<sal_Int32, sal_Int32>& rCopiedResourcesMap) +{ + auto* pResult = pPage->Lookup("Annots"_ostr); + filter::PDFArrayElement* pArray = nullptr; + // If the Annots array is a reference - get the array from the referenced object + auto pAnnotsReference = dynamic_cast<filter::PDFReferenceElement*>(pResult); + if (pAnnotsReference) + { + filter::PDFObjectElement* pObject = pAnnotsReference->LookupObject(); + pArray = pObject->GetArray(); + } + else + { + // Not a reference so is it an array + pArray = dynamic_cast<filter::PDFArrayElement*>(pResult); + } + + // Have we found our /Annots array? + if (!pArray) + return; + + std::unordered_set<sal_Int32> aAlreadyCopied; + PDFObjectCopier aCopier(*this); + SvMemoryStream& rDocBuffer = pPage->GetDocument().GetEditBuffer(); + + for (const auto pElement : pArray->GetElements()) + { + auto pReference = dynamic_cast<filter::PDFReferenceElement*>(pElement); + if (!pReference) + continue; + + filter::PDFObjectElement* pObject = pReference->LookupObject(); + if (!pObject) + continue; + + // Get the /Type and the /Subtype + auto pType = dynamic_cast<filter::PDFNameElement*>(pObject->Lookup("Type"_ostr)); + auto pSubtype = dynamic_cast<filter::PDFNameElement*>(pObject->Lookup("Subtype"_ostr)); + + // Is it a /Annot we want to copy? + if (pType && pType->GetValue() == "Annot" && pSubtype) + { + bool bIsLink = pSubtype->GetValue() == "Link"; + bool bIsWidget = pSubtype->GetValue() == "Widget"; + + // is link or widget + if (!bIsLink && !bIsWidget) + continue; + + // Copy over the annotation and refer to its new id. + sal_Int32 nNewId = aCopier.copyExternalResource(rDocBuffer, *pObject, rCopiedResourcesMap); + m_aPages.back().m_aAnnotations.push_back(nNewId); + + if (!bIsWidget) + continue; + + // Find the root + sal_Int32 nRootID = getRootParent(pObject); + + auto aIterator = rCopiedResourcesMap.find(nRootID); + if (aIterator == rCopiedResourcesMap.end()) // Can't find the mapped ID ? + continue; + + nNewId = aIterator->second; + + // Ignore if we added the ID already + if (aAlreadyCopied.find(nNewId) == aAlreadyCopied.end()) + { + // Add new entry into copied widgets vector + auto& rCopiedWidget = m_aCopiedWidgets.emplace_back(); + rCopiedWidget.m_nObject = nNewId; + aAlreadyCopied.emplace(nNewId); + } + } + } + +} + bool PDFWriterImpl::writeBitmapObject( const BitmapEmit& rObject, bool bMask ) { if (rObject.m_aReferenceXObject.hasExternalPDFData() && !m_aContext.UseReferenceXObject)