sw/qa/extras/htmlexport/data/reqif-ole-data.xhtml | 2 - sw/qa/extras/htmlexport/data/reqif-ole-img.xhtml | 2 - sw/qa/extras/htmlexport/htmlexport.cxx | 11 ++++---- sw/qa/extras/htmlimport/data/data.ole | 1 sw/qa/extras/htmlimport/data/ole-data.xhtml | 7 +++++ sw/qa/extras/htmlimport/htmlimport.cxx | 27 ++++++++++++++++++++++ sw/source/filter/html/htmlgrin.cxx | 8 ++++++ sw/source/filter/html/htmlplug.cxx | 16 +++++++++---- sw/source/filter/html/swhtml.hxx | 3 ++ 9 files changed, 65 insertions(+), 12 deletions(-)
New commits: commit d68724ce0ebc3308616503fcbf002870fdf0c920 Author: Miklos Vajna <vmik...@collabora.com> AuthorDate: Tue May 24 14:58:47 2022 +0200 Commit: Miklos Vajna <vmik...@collabora.com> CommitDate: Wed May 25 13:30:35 2022 +0200 sw XHTML import: handle non-image, non-RTF objects as clickable images A pair of XHTML <object> elements is meant to be interpreted as "native data" (outer) and "fallback/preview image" (inner). In practice we map non-PNG + PNG pairs to images and OLE2 data + PNG pairs to OLE2 embedded objects. This setup works for OLE2 data and images, but XHTML producers can also put other raw data to the outer <object> like PDF, DOCX, XLSX, etc. These were mapped to ODummyEmbeddedObject, which preserves data, but you can't interact with the data. Fix the lack of interaction by changing how non-OLE2, non-image data is handled: map them to images where the image has a URL to the native data. This way the OS running Writer can decide how to handle that data. This required changing some existing tests where the intention was to have simple test data for OLE2 data, but that data was not OLE2, so the resulting doc model had images, but embedded objects were expected. Such tests now have OLE2 data instead. (cherry picked from commit 56769d3982e6afb075cb6d833662f066437fab6a) Conflicts: sw/qa/extras/htmlimport/htmlimport.cxx Change-Id: I0287ce2d9a02904e28cef619ff9f6e1f354d6147 diff --git a/sw/qa/extras/htmlexport/data/reqif-ole-data.xhtml b/sw/qa/extras/htmlexport/data/reqif-ole-data.xhtml index 9e0cfaa378b4..5f9c8940008e 100644 --- a/sw/qa/extras/htmlexport/data/reqif-ole-data.xhtml +++ b/sw/qa/extras/htmlexport/data/reqif-ole-data.xhtml @@ -1,3 +1,3 @@ <reqif-xhtml:div> - <reqif-xhtml:object data="reqif-ole-data.ole" type="text/rtf"/> + <reqif-xhtml:object data="ole2.ole" type="text/rtf"/> </reqif-xhtml:div> diff --git a/sw/qa/extras/htmlexport/data/reqif-ole-img.xhtml b/sw/qa/extras/htmlexport/data/reqif-ole-img.xhtml index 6217412ae597..df3f4a048ec6 100644 --- a/sw/qa/extras/htmlexport/data/reqif-ole-img.xhtml +++ b/sw/qa/extras/htmlexport/data/reqif-ole-img.xhtml @@ -1,5 +1,5 @@ <reqif-xhtml:div><reqif-xhtml:br/> - <reqif-xhtml:object data="reqif-ole-data.ole" type="text/rtf"> + <reqif-xhtml:object data="ole2.ole" type="text/rtf"> <reqif-xhtml:object data="reqif-ole-img.png?test=true" type="image/png">OLE Object</reqif-xhtml:object> </reqif-xhtml:object> </reqif-xhtml:div> diff --git a/sw/qa/extras/htmlexport/htmlexport.cxx b/sw/qa/extras/htmlexport/htmlexport.cxx index 8e78b288566f..42c13ccac3f6 100644 --- a/sw/qa/extras/htmlexport/htmlexport.cxx +++ b/sw/qa/extras/htmlexport/htmlexport.cxx @@ -660,13 +660,12 @@ DECLARE_HTMLEXPORT_ROUNDTRIP_TEST(testReqIfOleImg, "reqif-ole-img.xhtml") // Check mime/media types. CPPUNIT_ASSERT_EQUAL(OUString("image/png"), getProperty<OUString>(xGraphic, "MimeType")); - uno::Reference<document::XStorageBasedDocument> xStorageProvider(mxComponent, uno::UNO_QUERY); - uno::Reference<embed::XStorage> xStorage = xStorageProvider->getDocumentStorage(); - auto aStreamName = getProperty<OUString>(xObject, "StreamName"); - uno::Reference<io::XStream> xStream - = xStorage->openStreamElement(aStreamName, embed::ElementModes::READ); + uno::Reference<beans::XPropertySet> xObjectProps(xObject, uno::UNO_QUERY); + uno::Reference<io::XActiveDataStreamer> xStreamProvider( + xObjectProps->getPropertyValue("EmbeddedObject"), uno::UNO_QUERY); + uno::Reference<io::XSeekable> xStream(xStreamProvider->getStream(), uno::UNO_QUERY); // This was empty when either import or export handling was missing. - CPPUNIT_ASSERT_EQUAL(OUString("text/rtf"), getProperty<OUString>(xStream, "MediaType")); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int64>(37888), xStream->getLength()); // Check alternate text (it was empty, for export the 'alt' attribute was used). CPPUNIT_ASSERT_EQUAL(OUString("OLE Object"), getProperty<OUString>(xObject, "Title").trim()); diff --git a/sw/qa/extras/htmlimport/data/data.ole b/sw/qa/extras/htmlimport/data/data.ole new file mode 100644 index 000000000000..d3dc23d793e2 --- /dev/null +++ b/sw/qa/extras/htmlimport/data/data.ole @@ -0,0 +1 @@ +{\pict} diff --git a/sw/qa/extras/htmlimport/data/ole-data.xhtml b/sw/qa/extras/htmlimport/data/ole-data.xhtml new file mode 100644 index 000000000000..e8f1910ad4c9 --- /dev/null +++ b/sw/qa/extras/htmlimport/data/ole-data.xhtml @@ -0,0 +1,7 @@ +<reqif-xhtml:div> +<reqif-xhtml:p> +<reqif-xhtml:object data="data.ole" type="application/octet-stream"> + <reqif-xhtml:object data="ole2.png" type="image/png"></reqif-xhtml:object> +</reqif-xhtml:object> +</reqif-xhtml:p> +</reqif-xhtml:div> diff --git a/sw/qa/extras/htmlimport/htmlimport.cxx b/sw/qa/extras/htmlimport/htmlimport.cxx index 1241bfec3b04..326af44fb298 100644 --- a/sw/qa/extras/htmlimport/htmlimport.cxx +++ b/sw/qa/extras/htmlimport/htmlimport.cxx @@ -489,6 +489,33 @@ CPPUNIT_TEST_FIXTURE(SwModelTestBase, testOleImgSvg) CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), xObjects->getCount()); } +CPPUNIT_TEST_FIXTURE(SwModelTestBase, testOleData) +{ + // Given an XHTML with an <object> (containing non-image, non-OLE2 data) and an inner <object> + // (containing PNG): + uno::Sequence<beans::PropertyValue> aLoadProperties = { + comphelper::makePropertyValue("FilterName", OUString("HTML (StarWriter)")), + comphelper::makePropertyValue("FilterOptions", OUString("xhtmlns=reqif-xhtml")), + }; + OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "ole-data.xhtml"; + + // When loading the document: + mxComponent = loadFromDesktop(aURL, "com.sun.star.text.TextDocument", aLoadProperties); + + // Then make sure the result is a single clickable Writer image: + uno::Reference<text::XTextGraphicObjectsSupplier> xSupplier(mxComponent, uno::UNO_QUERY); + uno::Reference<container::XIndexAccess> xObjects(xSupplier->getGraphicObjects(), + uno::UNO_QUERY); + // Without the accompanying fix in place, this test would have failed with: + // - Expected: 0 + // - Actual : 1 + // i.e. the image was not imported as a Writer image (but as an OLE object). + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), xObjects->getCount()); + uno::Reference<css::drawing::XShape> xShape = getShape(1); + // And then the image was not clickable: this was empty. + CPPUNIT_ASSERT(getProperty<OUString>(xShape, "HyperLinkURL").endsWith("/data.ole")); +} + CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sw/source/filter/html/htmlgrin.cxx b/sw/source/filter/html/htmlgrin.cxx index 628ef2704630..2a8e8c46dc13 100644 --- a/sw/source/filter/html/htmlgrin.cxx +++ b/sw/source/filter/html/htmlgrin.cxx @@ -865,6 +865,14 @@ IMAGE_SETEVENT: } } + else if (!m_aEmbedURL.isEmpty()) + { + // This is an inner <object> image and the outer <object> has a URL for us. Set that on the + // image. + SwFormatURL aURL(pFlyFormat->GetURL()); + aURL.SetURL(m_aEmbedURL, bIsMap); + pFlyFormat->SetFormatAttr(aURL); + } if( !aMacroItem.GetMacroTable().empty() ) { diff --git a/sw/source/filter/html/htmlplug.cxx b/sw/source/filter/html/htmlplug.cxx index 7310d1284765..39a18a4487af 100644 --- a/sw/source/filter/html/htmlplug.cxx +++ b/sw/source/filter/html/htmlplug.cxx @@ -562,8 +562,8 @@ bool SwHTMLParser::InsertEmbed() aCnt.SwitchPersistence(xStorage); aObjName = aCnt.CreateUniqueObjectName(); { - SvFileStream aFileStream(aURLObj.GetMainURL(INetURLObject::DecodeMechanism::NONE), - StreamMode::READ); + OUString aEmbedURL = aURLObj.GetMainURL(INetURLObject::DecodeMechanism::NONE); + SvFileStream aFileStream(aEmbedURL, StreamMode::READ); uno::Reference<io::XInputStream> xInStream; SvMemoryStream aMemoryStream; @@ -601,8 +601,13 @@ bool SwHTMLParser::InsertEmbed() } if (!xInStream.is()) - // Non-RTF case. - xInStream.set(new utl::OStreamWrapper(aFileStream)); + { + // Object data is neither OLE2 in RTF, nor an image. Then map this to an URL that + // will be set on the inner image. + m_aEmbedURL = aEmbedURL; + // Signal success, so the outer object won't fall back to the image handler. + return true; + } if (!xObj.is()) { diff --git a/sw/source/filter/html/swhtml.hxx b/sw/source/filter/html/swhtml.hxx index 59b7db28c467..c3cebcf8984d 100644 --- a/sw/source/filter/html/swhtml.hxx +++ b/sw/source/filter/html/swhtml.hxx @@ -478,6 +478,9 @@ class SwHTMLParser : public SfxHTMLParser, public SvtListener std::set<OUString> m_aAllowedRTFOLEMimeTypes; + /// This is the URL of the outer <object> data if it's not OLE2 or an image. + OUString m_aEmbedURL; + void DeleteFormImpl(); void DocumentDetected(); commit f84299152634c732b2da47f3ab732128a0c369c0 Author: Miklos Vajna <vmik...@collabora.com> AuthorDate: Mon May 23 16:24:00 2022 +0200 Commit: Miklos Vajna <vmik...@collabora.com> CommitDate: Wed May 25 10:11:42 2022 +0200 sw HTML import: extend list of MIME types recognized as image objects The HTML import sometimes imports <object> elements as embedded objects and sometimes as Writer images. This MIME-type based approach works great to differentiate plain images (with PNG fallback) from real embedded objects, but analysis on some larger document corpus pointed out a few missing entries in this allow-list. Fix the problem by adding a 2nd variant for BMP, TIFF and WMF, which should also count as image, not object. This could be improved further in a later commit by getting a full list of image MIME types we support from VCL. (cherry picked from commit e7fd5af6f65d17cbb3a753544bcab9501e5d1caa) Change-Id: Idb2485012e44b4c7db396b5062e7173f3c85495e diff --git a/sw/source/filter/html/htmlplug.cxx b/sw/source/filter/html/htmlplug.cxx index c2fc7b9c5a18..7310d1284765 100644 --- a/sw/source/filter/html/htmlplug.cxx +++ b/sw/source/filter/html/htmlplug.cxx @@ -443,6 +443,9 @@ bool SwHTMLParser::InsertEmbed() u"image/svg+xml", u"image/tiff", u"image/x-emf", + u"image/bmp", + u"image/tif", + u"image/wmf", }; if (vAllowlist.find(aType) != vAllowlist.end() && m_aEmbeds.empty())