configure.ac | 2 filter/Configuration_filter.mk | 2 filter/source/config/cache/typedetection.cxx | 1 filter/source/config/fragments/filters/calc_Parquet.xcu | 19 ++++ filter/source/config/fragments/types/calc_Parquet.xcu | 17 +++ sc/inc/orcusfilters.hxx | 18 ++-- sc/source/filter/inc/orcusfiltersimpl.hxx | 7 - sc/source/filter/orcus/filterdetect.cxx | 2 sc/source/filter/orcus/orcusfiltersimpl.cxx | 70 +++++----------- sc/source/ui/docshell/docsh.cxx | 63 +++++--------- 10 files changed, 102 insertions(+), 99 deletions(-)
New commits: commit b14583ba37a6d7ce398ccd3cf339f954785b03d8 Author: Kohei Yoshida <ko...@libreoffice.org> AuthorDate: Wed May 31 21:33:56 2023 -0400 Commit: Kohei Yoshida <ko...@libreoffice.org> CommitDate: Wed Oct 25 03:59:57 2023 +0200 Support conditional loading of Apache Parquet files into Calc Also, use orcus::create_filter() and simplify the logic a bit. This requires orcus 0.19.1 or newer. Note that this change makes it possible to load Apache Parquet files if and only if orcus has been built with the parquet import filter enabled. Using orcus without the parquet import filter enabled will not break the build or run-time behavior; you just can't load parquet files. Change-Id: I9f8820998b7b0667d1e7cd532c32b1c7e55ca999 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158411 Tested-by: Jenkins Reviewed-by: Kohei Yoshida <ko...@libreoffice.org> diff --git a/configure.ac b/configure.ac index 999ab64289e8..c8466ae65e01 100644 --- a/configure.ac +++ b/configure.ac @@ -10878,7 +10878,7 @@ fi dnl =================================================================== dnl Orcus dnl =================================================================== -libo_CHECK_SYSTEM_MODULE([orcus],[ORCUS],[liborcus-0.18 >= 0.18.0]) +libo_CHECK_SYSTEM_MODULE([orcus],[ORCUS],[liborcus-0.18 >= 0.19.1]) if test "$with_system_orcus" != "yes"; then if test "$SYSTEM_BOOST" = "TRUE"; then dnl Link with Boost.System diff --git a/filter/Configuration_filter.mk b/filter/Configuration_filter.mk index bd3d3486234e..8a323e83fd54 100644 --- a/filter/Configuration_filter.mk +++ b/filter/Configuration_filter.mk @@ -477,6 +477,7 @@ $(eval $(call filter_Configuration_add_types,fcfg_langpack,fcfg_calc_types.xcu,f generic_Text \ calc_Gnumeric \ calc_Lotus \ + calc_Parquet \ calc_QPro \ calc_MS_Excel_40 \ calc_MS_Excel_40_VorlageTemplate \ @@ -535,6 +536,7 @@ $(eval $(call filter_Configuration_add_filters,fcfg_langpack,fcfg_calc_filters.x calc_HTML_WebQuery \ calc_StarOffice_XML_Calc_Template \ calc_pdf_Export \ + calc_Parquet \ dBase \ calc8 \ calc8_template \ diff --git a/filter/source/config/cache/typedetection.cxx b/filter/source/config/cache/typedetection.cxx index 18d13a8f7796..a83a1406d0c1 100644 --- a/filter/source/config/cache/typedetection.cxx +++ b/filter/source/config/cache/typedetection.cxx @@ -211,6 +211,7 @@ int getFlatTypeRank(std::u16string_view rType) "calc_SYLK", "calc_DIF", "calc_dBase", + "Apache Parquet", // Binary (raster and vector image files) "emf_MS_Windows_Metafile", diff --git a/filter/source/config/fragments/filters/calc_Parquet.xcu b/filter/source/config/fragments/filters/calc_Parquet.xcu new file mode 100644 index 000000000000..5b0fea8257bb --- /dev/null +++ b/filter/source/config/fragments/filters/calc_Parquet.xcu @@ -0,0 +1,19 @@ +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. +--> +<node oor:name="Apache Parquet Spreadsheet" oor:op="replace"> + <prop oor:name="Flags"><value>IMPORT ALIEN PREFERRED</value></prop> + <prop oor:name="UIComponent"/> + <prop oor:name="FilterService"/> + <prop oor:name="UserData"/> + <prop oor:name="Type"><value>Apache Parquet</value></prop> + <prop oor:name="TemplateName"/> + <prop oor:name="DocumentService"><value>com.sun.star.sheet.SpreadsheetDocument</value></prop> + <prop oor:name="UIName"> + <value xml:lang="en-US">Apache Parquet Spreadsheet</value> + </prop> +</node> diff --git a/filter/source/config/fragments/types/calc_Parquet.xcu b/filter/source/config/fragments/types/calc_Parquet.xcu new file mode 100644 index 000000000000..6c29d886c92f --- /dev/null +++ b/filter/source/config/fragments/types/calc_Parquet.xcu @@ -0,0 +1,17 @@ +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. +--> +<node oor:name="Apache Parquet" oor:op="replace" > + <prop oor:name="DetectService"><value>com.sun.star.comp.sc.OrcusFilterDetect</value></prop> + <prop oor:name="URLPattern"/> + <prop oor:name="Extensions"><value>parquet</value></prop> + <prop oor:name="MediaType"/> + <prop oor:name="Preferred"><value>true</value></prop> + <prop oor:name="PreferredFilter"><value>Apache Parquet Spreadsheet</value></prop> + <prop oor:name="UIName"><value xml:lang="en-US">Apache Parquet</value></prop> + <prop oor:name="ClipboardFormat"/> +</node> diff --git a/sc/inc/orcusfilters.hxx b/sc/inc/orcusfilters.hxx index 6d17f3741ebb..f13f5cc630d2 100644 --- a/sc/inc/orcusfilters.hxx +++ b/sc/inc/orcusfilters.hxx @@ -25,17 +25,17 @@ namespace weld { class TreeView; } class ScOrcusFilters { public: - virtual ~ScOrcusFilters() {} - - virtual bool importCSV(ScDocument& rDoc, SfxMedium& rMedium) const = 0; - - virtual bool importGnumeric(ScDocument& rDoc, SfxMedium& rMedium) const = 0; + enum class ImportResult + { + NotSupported, + Success, + Failure + }; - virtual bool importExcel2003XML(ScDocument& rDoc, SfxMedium& rMedium) const = 0; - - virtual bool importXLSX(ScDocument& rDoc, SfxMedium& rMedium) const = 0; + virtual ~ScOrcusFilters() {} - virtual bool importODS(ScDocument& rDoc, SfxMedium& rMedium) const = 0; + virtual ImportResult importByName( + ScDocument& rDoc, SfxMedium& rMedium, const OUString& rFilterName) const = 0; /** * Used to import just the styles from an xml file. diff --git a/sc/source/filter/inc/orcusfiltersimpl.hxx b/sc/source/filter/inc/orcusfiltersimpl.hxx index 070d69aec03e..7bba1410dc5c 100644 --- a/sc/source/filter/inc/orcusfiltersimpl.hxx +++ b/sc/source/filter/inc/orcusfiltersimpl.hxx @@ -16,11 +16,8 @@ class ScOrcusFiltersImpl : public ScOrcusFilters { public: - virtual bool importCSV(ScDocument& rDoc, SfxMedium& rMedium) const override; - virtual bool importGnumeric(ScDocument& rDoc, SfxMedium& rMedium) const override; - virtual bool importExcel2003XML(ScDocument& rDoc, SfxMedium& rMedium) const override; - virtual bool importXLSX(ScDocument& rDoc, SfxMedium& rMedium) const override; - virtual bool importODS(ScDocument& rDoc, SfxMedium& rMedium) const override; + virtual ImportResult importByName(ScDocument& rDoc, SfxMedium& rMedium, + const OUString& rFilterName) const override; virtual bool importODS_Styles(ScDocument& rDoc, OUString& aFileName) const override; diff --git a/sc/source/filter/orcus/filterdetect.cxx b/sc/source/filter/orcus/filterdetect.cxx index 5750932e95d7..06f6015a8f0c 100644 --- a/sc/source/filter/orcus/filterdetect.cxx +++ b/sc/source/filter/orcus/filterdetect.cxx @@ -90,6 +90,8 @@ OUString OrcusFormatDetect::detect(css::uno::Sequence<css::beans::PropertyValue> return "Gnumeric XML"; case orcus::format_t::xls_xml: return "calc_MS_Excel_2003_XML"; + case orcus::format_t::parquet: + return "Apache Parquet"; default: ; } diff --git a/sc/source/filter/orcus/orcusfiltersimpl.cxx b/sc/source/filter/orcus/orcusfiltersimpl.cxx index 1d3bc9c46234..2a13c761d5d5 100644 --- a/sc/source/filter/orcus/orcusfiltersimpl.cxx +++ b/sc/source/filter/orcus/orcusfiltersimpl.cxx @@ -19,11 +19,7 @@ #include <rtl/ustring.hxx> #include <sal/log.hxx> -#include <orcus/orcus_csv.hpp> -#include <orcus/orcus_gnumeric.hpp> -#include <orcus/orcus_xlsx.hpp> -#include <orcus/orcus_xls_xml.hpp> -#include <orcus/orcus_ods.hpp> +#include <orcus/format_detection.hpp> #include <orcus/orcus_import_ods.hpp> #include <orcus/stream.hpp> #include <com/sun/star/task/XStatusIndicator.hpp> @@ -70,49 +66,35 @@ bool loadFileContent(SfxMedium& rMedium, orcus::iface::import_filter& filter) } } -bool ScOrcusFiltersImpl::importCSV(ScDocument& rDoc, SfxMedium& rMedium) const +ScOrcusFilters::ImportResult ScOrcusFiltersImpl::importByName(ScDocument& rDoc, SfxMedium& rMedium, + const OUString& rFilterName) const { - ScOrcusFactory aFactory(rDoc); - aFactory.setStatusIndicator(getStatusIndicator(rMedium)); - - orcus::orcus_csv filter(&aFactory); - return loadFileContent(rMedium, filter); -} - -bool ScOrcusFiltersImpl::importGnumeric(ScDocument& rDoc, SfxMedium& rMedium) const -{ - ScOrcusFactory aFactory(rDoc); - aFactory.setStatusIndicator(getStatusIndicator(rMedium)); - - orcus::orcus_gnumeric filter(&aFactory); - return loadFileContent(rMedium, filter); -} - -bool ScOrcusFiltersImpl::importExcel2003XML(ScDocument& rDoc, SfxMedium& rMedium) const -{ - ScOrcusFactory aFactory(rDoc); - aFactory.setStatusIndicator(getStatusIndicator(rMedium)); - - orcus::orcus_xls_xml filter(&aFactory); - return loadFileContent(rMedium, filter); -} - -bool ScOrcusFiltersImpl::importXLSX(ScDocument& rDoc, SfxMedium& rMedium) const -{ - ScOrcusFactory aFactory(rDoc); - aFactory.setStatusIndicator(getStatusIndicator(rMedium)); + const std::unordered_map<OUString, orcus::format_t> aMap = { + { "Apache Parquet Spreadsheet", orcus::format_t::parquet }, + { "Gnumeric Spreadsheet", orcus::format_t::gnumeric }, + { "MS Excel 2003 XML Orcus", orcus::format_t::xls_xml }, + { "csv", orcus::format_t::csv }, + { "gnumeric", orcus::format_t::gnumeric }, + { "ods", orcus::format_t::ods }, + { "parquet", orcus::format_t::parquet }, + { "xls-xml", orcus::format_t::xls_xml }, + { "xlsx", orcus::format_t::xlsx }, + }; + + if (auto it = aMap.find(rFilterName); it != aMap.end()) + { + ScOrcusFactory aFactory(rDoc); + aFactory.setStatusIndicator(getStatusIndicator(rMedium)); - orcus::orcus_xlsx filter(&aFactory); - return loadFileContent(rMedium, filter); -} + auto filter = orcus::create_filter(it->second, &aFactory); + if (!filter) + return ImportResult::Failure; -bool ScOrcusFiltersImpl::importODS(ScDocument& rDoc, SfxMedium& rMedium) const -{ - ScOrcusFactory aFactory(rDoc); - aFactory.setStatusIndicator(getStatusIndicator(rMedium)); + bool res = loadFileContent(rMedium, *filter); + return res ? ImportResult::Success : ImportResult::Failure; + } - orcus::orcus_ods filter(&aFactory); - return loadFileContent(rMedium, filter); + return ImportResult::NotSupported; } bool ScOrcusFiltersImpl::importODS_Styles(ScDocument& rDoc, OUString& aPath) const diff --git a/sc/source/ui/docshell/docsh.cxx b/sc/source/ui/docshell/docsh.cxx index 7ba08a1cc5d5..175eb2f9c38a 100644 --- a/sc/source/ui/docshell/docsh.cxx +++ b/sc/source/ui/docshell/docsh.cxx @@ -1261,22 +1261,6 @@ bool ScDocShell::ConvertFrom( SfxMedium& rMedium ) else bRet = true; } - else if (aFltName == "Gnumeric Spreadsheet") - { - ScOrcusFilters* pOrcus = ScFormatFilter::Get().GetOrcusFilters(); - if (!pOrcus) - return false; - - bRet = pOrcus->importGnumeric(*m_pDocument, rMedium); - } - else if (aFltName == "MS Excel 2003 XML Orcus") - { - ScOrcusFilters* pOrcus = ScFormatFilter::Get().GetOrcusFilters(); - if (!pOrcus) - return false; - - bRet = pOrcus->importExcel2003XML(*m_pDocument, rMedium); - } else if (aFltName == SC_TEXT_CSV_FILTER_NAME) { ScAsciiOptions aOptions; @@ -1604,10 +1588,27 @@ bool ScDocShell::ConvertFrom( SfxMedium& rMedium ) } else { - if (!GetErrorIgnoreWarning()) + ScOrcusFilters* pOrcus = ScFormatFilter::Get().GetOrcusFilters(); + if (!pOrcus) + return false; + + switch (pOrcus->importByName(*m_pDocument, rMedium, aFltName)) { - SAL_WARN("sc.filter", "No match for filter '" << aFltName << "' in ConvertFrom"); - SetError(SCERR_IMPORT_NI); + case ScOrcusFilters::ImportResult::Success: + bRet = true; + break; + case ScOrcusFilters::ImportResult::Failure: + bRet = false; + break; + case ScOrcusFilters::ImportResult::NotSupported: + { + if (!GetErrorIgnoreWarning()) + { + SAL_WARN("sc.filter", "No match for filter '" << aFltName << "' in ConvertFrom"); + SetError(SCERR_IMPORT_NI); + } + break; + } } } @@ -1695,27 +1696,9 @@ bool ScDocShell::LoadExternal( SfxMedium& rMed ) if (!pOrcus) return false; - const OUString& rFilterName = pFilter->GetName(); - if (rFilterName == "gnumeric") - { - if (!pOrcus->importGnumeric(*m_pDocument, rMed)) - return false; - } - else if (rFilterName == "csv") - { - if (!pOrcus->importCSV(*m_pDocument, rMed)) - return false; - } - else if (rFilterName == "xlsx") - { - if (!pOrcus->importXLSX(*m_pDocument, rMed)) - return false; - } - else if (rFilterName == "ods") - { - if (!pOrcus->importODS(*m_pDocument, rMed)) - return false; - } + auto res = pOrcus->importByName(*m_pDocument, rMed, pFilter->GetName()); + if (res != ScOrcusFilters::ImportResult::Success) + return false; FinishedLoading(); return true;