sc/source/filter/inc/orcus_utils.hxx | 16 +++++++++++ sc/source/filter/orcus/filterdetect.cxx | 18 ++----------- sc/source/filter/orcus/orcusfiltersimpl.cxx | 32 ----------------------- sc/source/filter/orcus/utils.cxx | 38 ++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 46 deletions(-)
New commits: commit 0923c08b2890a3d85694421babd6a512cafe6231 Author: Kohei Yoshida <kohei.yosh...@collabora.com> AuthorDate: Tue Aug 26 21:43:44 2025 -0400 Commit: Kohei Yoshida <kohei.yosh...@collabora.com> CommitDate: Wed Aug 27 13:38:17 2025 +0200 Create temp file to store input stream during type detection This is also to avoid potential memory usage spikes on large files when detecting their types. Change-Id: I194620cfab0677f72beae1774e16d77395e6ec03 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/190261 Reviewed-by: Kohei Yoshida <ko...@libreoffice.org> Tested-by: Jenkins diff --git a/sc/source/filter/inc/orcus_utils.hxx b/sc/source/filter/inc/orcus_utils.hxx index ca7e73db59f2..c50f3e771f6d 100644 --- a/sc/source/filter/inc/orcus_utils.hxx +++ b/sc/source/filter/inc/orcus_utils.hxx @@ -8,9 +8,25 @@ #pragma once #include <rtl/ustring.hxx> +#include <unotools/tempfile.hxx> +#include <com/sun/star/io/XInputStream.hpp> #include <orcus/stream.hpp> orcus::file_content toFileContent(const OUString& rPath); +/** + * Stream copied to a temporary file with a filepath. + */ +class CopiedTempStream +{ + utl::TempFileNamed maTemp; + +public: + CopiedTempStream(SvStream& rSrc); + CopiedTempStream(const css::uno::Reference<css::io::XInputStream>& xSrc); + + OUString getFileName() const { return maTemp.GetFileName(); } +}; + /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/filter/orcus/filterdetect.cxx b/sc/source/filter/orcus/filterdetect.cxx index d8bfc2fe23cc..285d5a01d8cd 100644 --- a/sc/source/filter/orcus/filterdetect.cxx +++ b/sc/source/filter/orcus/filterdetect.cxx @@ -14,8 +14,8 @@ #include <cppuhelper/supportsservice.hxx> #include <unotools/mediadescriptor.hxx> - #include <tools/stream.hxx> +#include <orcus_utils.hxx> #include <orcus/format_detection.hpp> @@ -71,20 +71,10 @@ OUString OrcusFormatDetect::detect(css::uno::Sequence<css::beans::PropertyValue> css::uno::Reference<css::io::XInputStream> xInputStream( aMediaDescriptor[utl::MediaDescriptor::PROP_INPUTSTREAM], css::uno::UNO_QUERY); - SvMemoryStream aContent(xInputStream->available()); - - static const sal_Int32 nBytes = 4096; - css::uno::Sequence<sal_Int8> aSeq(nBytes); - bool bEnd = false; - while (!bEnd) - { - sal_Int32 nReadBytes = xInputStream->readBytes(aSeq, nBytes); - bEnd = (nReadBytes != nBytes); - aContent.WriteBytes(aSeq.getConstArray(), nReadBytes); - } - std::string_view aStream(static_cast<const char*>(aContent.GetData()), aContent.GetSize()); - orcus::format_t eFormat = orcus::detect(aStream); + CopiedTempStream aTemp(xInputStream); + auto aContent = toFileContent(aTemp.getFileName()); + orcus::format_t eFormat = orcus::detect(aContent.str()); switch (eFormat) { diff --git a/sc/source/filter/orcus/orcusfiltersimpl.cxx b/sc/source/filter/orcus/orcusfiltersimpl.cxx index c0e2b2facd61..f0a09099d45b 100644 --- a/sc/source/filter/orcus/orcusfiltersimpl.cxx +++ b/sc/source/filter/orcus/orcusfiltersimpl.cxx @@ -20,7 +20,6 @@ #include <svl/itemset.hxx> #include <rtl/ustring.hxx> #include <sal/log.hxx> -#include <unotools/tempfile.hxx> #include <orcus/format_detection.hpp> #include <orcus/orcus_import_ods.hpp> @@ -31,37 +30,6 @@ using namespace com::sun::star; namespace { -/** - * Stream copied to a temporary file with a filepath. - */ -class CopiedTempStream -{ - utl::TempFileNamed maTemp; - -public: - CopiedTempStream(SvStream& rSrc) - { - maTemp.EnableKillingFile(); - SvStream* pDest = maTemp.GetStream(StreamMode::WRITE); - - rSrc.Seek(0); - - const std::size_t nReadBuffer = 1024 * 32; - std::size_t nRead = 0; - - do - { - char pData[nReadBuffer]; - nRead = rSrc.ReadBytes(pData, nReadBuffer); - pDest->WriteBytes(pData, nRead); - } while (nRead == nReadBuffer); - - maTemp.CloseStream(); - } - - OUString getFileName() const { return maTemp.GetFileName(); } -}; - uno::Reference<task::XStatusIndicator> getStatusIndicator(const SfxMedium& rMedium) { uno::Reference<task::XStatusIndicator> xStatusIndicator; diff --git a/sc/source/filter/orcus/utils.cxx b/sc/source/filter/orcus/utils.cxx index d3df1513cc5c..89846684bc75 100644 --- a/sc/source/filter/orcus/utils.cxx +++ b/sc/source/filter/orcus/utils.cxx @@ -17,4 +17,42 @@ orcus::file_content toFileContent(const OUString& rPath) #endif } +CopiedTempStream::CopiedTempStream(SvStream& rSrc) +{ + maTemp.EnableKillingFile(); + SvStream* pDest = maTemp.GetStream(StreamMode::WRITE); + + rSrc.Seek(0); + + const std::size_t nReadBuffer = 1024 * 32; + std::size_t nRead = 0; + + do + { + char pData[nReadBuffer]; + nRead = rSrc.ReadBytes(pData, nReadBuffer); + pDest->WriteBytes(pData, nRead); + } while (nRead == nReadBuffer); + + maTemp.CloseStream(); +} + +CopiedTempStream::CopiedTempStream(const css::uno::Reference<css::io::XInputStream>& xSrc) +{ + maTemp.EnableKillingFile(); + SvStream* pDest = maTemp.GetStream(StreamMode::WRITE); + + const sal_Int32 nBytes = 1024 * 32; + css::uno::Sequence<sal_Int8> aSeq(nBytes); + + for (bool bEnd = false; !bEnd;) + { + sal_Int32 nReadBytes = xSrc->readBytes(aSeq, nBytes); + bEnd = (nReadBytes != nBytes); + pDest->WriteBytes(aSeq.getConstArray(), nReadBytes); + } + + maTemp.CloseStream(); +} + /* vim:set shiftwidth=4 softtabstop=4 expandtab: */