configure.ac                                            |    2 
 filter/Configuration_filter.mk                          |    2 
 filter/source/config/cache/typedetection.cxx            |    1 
 filter/source/config/fragments/filters/calc_Parquet.xcu |   19 ++++
 filter/source/config/fragments/types/calc_Parquet.xcu   |   17 +++
 sc/inc/orcusfilters.hxx                                 |   18 ++--
 sc/source/filter/inc/orcusfiltersimpl.hxx               |    7 -
 sc/source/filter/orcus/filterdetect.cxx                 |    2 
 sc/source/filter/orcus/orcusfiltersimpl.cxx             |   70 +++++-----------
 sc/source/ui/docshell/docsh.cxx                         |   63 +++++---------
 10 files changed, 102 insertions(+), 99 deletions(-)

New commits:
commit b14583ba37a6d7ce398ccd3cf339f954785b03d8
Author:     Kohei Yoshida <ko...@libreoffice.org>
AuthorDate: Wed May 31 21:33:56 2023 -0400
Commit:     Kohei Yoshida <ko...@libreoffice.org>
CommitDate: Wed Oct 25 03:59:57 2023 +0200

    Support conditional loading of Apache Parquet files into Calc
    
    Also, use orcus::create_filter() and simplify the logic a bit.  This
    requires orcus 0.19.1 or newer.
    
    Note that this change makes it possible to load Apache Parquet files
    if and only if orcus has been built with the parquet import filter
    enabled.  Using orcus without the parquet import filter enabled will
    not break the build or run-time behavior; you just can't load parquet
    files.
    
    Change-Id: I9f8820998b7b0667d1e7cd532c32b1c7e55ca999
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158411
    Tested-by: Jenkins
    Reviewed-by: Kohei Yoshida <ko...@libreoffice.org>

diff --git a/configure.ac b/configure.ac
index 999ab64289e8..c8466ae65e01 100644
--- a/configure.ac
+++ b/configure.ac
@@ -10878,7 +10878,7 @@ fi
 dnl ===================================================================
 dnl Orcus
 dnl ===================================================================
-libo_CHECK_SYSTEM_MODULE([orcus],[ORCUS],[liborcus-0.18 >= 0.18.0])
+libo_CHECK_SYSTEM_MODULE([orcus],[ORCUS],[liborcus-0.18 >= 0.19.1])
 if test "$with_system_orcus" != "yes"; then
     if test "$SYSTEM_BOOST" = "TRUE"; then
         dnl Link with Boost.System
diff --git a/filter/Configuration_filter.mk b/filter/Configuration_filter.mk
index bd3d3486234e..8a323e83fd54 100644
--- a/filter/Configuration_filter.mk
+++ b/filter/Configuration_filter.mk
@@ -477,6 +477,7 @@ $(eval $(call 
filter_Configuration_add_types,fcfg_langpack,fcfg_calc_types.xcu,f
        generic_Text \
        calc_Gnumeric \
        calc_Lotus \
+       calc_Parquet \
        calc_QPro \
        calc_MS_Excel_40 \
        calc_MS_Excel_40_VorlageTemplate \
@@ -535,6 +536,7 @@ $(eval $(call 
filter_Configuration_add_filters,fcfg_langpack,fcfg_calc_filters.x
        calc_HTML_WebQuery \
        calc_StarOffice_XML_Calc_Template \
        calc_pdf_Export \
+       calc_Parquet \
        dBase \
        calc8 \
        calc8_template \
diff --git a/filter/source/config/cache/typedetection.cxx 
b/filter/source/config/cache/typedetection.cxx
index 18d13a8f7796..a83a1406d0c1 100644
--- a/filter/source/config/cache/typedetection.cxx
+++ b/filter/source/config/cache/typedetection.cxx
@@ -211,6 +211,7 @@ int getFlatTypeRank(std::u16string_view rType)
         "calc_SYLK",
         "calc_DIF",
         "calc_dBase",
+        "Apache Parquet",
 
         // Binary (raster and vector image files)
         "emf_MS_Windows_Metafile",
diff --git a/filter/source/config/fragments/filters/calc_Parquet.xcu 
b/filter/source/config/fragments/filters/calc_Parquet.xcu
new file mode 100644
index 000000000000..5b0fea8257bb
--- /dev/null
+++ b/filter/source/config/fragments/filters/calc_Parquet.xcu
@@ -0,0 +1,19 @@
+<!--
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+-->
+<node oor:name="Apache Parquet Spreadsheet" oor:op="replace">
+    <prop oor:name="Flags"><value>IMPORT ALIEN PREFERRED</value></prop>
+    <prop oor:name="UIComponent"/>
+    <prop oor:name="FilterService"/>
+    <prop oor:name="UserData"/>
+    <prop oor:name="Type"><value>Apache Parquet</value></prop>
+    <prop oor:name="TemplateName"/>
+    <prop 
oor:name="DocumentService"><value>com.sun.star.sheet.SpreadsheetDocument</value></prop>
+    <prop oor:name="UIName">
+        <value xml:lang="en-US">Apache Parquet Spreadsheet</value>
+    </prop>
+</node>
diff --git a/filter/source/config/fragments/types/calc_Parquet.xcu 
b/filter/source/config/fragments/types/calc_Parquet.xcu
new file mode 100644
index 000000000000..6c29d886c92f
--- /dev/null
+++ b/filter/source/config/fragments/types/calc_Parquet.xcu
@@ -0,0 +1,17 @@
+<!--
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+-->
+<node oor:name="Apache Parquet" oor:op="replace" >
+    <prop 
oor:name="DetectService"><value>com.sun.star.comp.sc.OrcusFilterDetect</value></prop>
+    <prop oor:name="URLPattern"/>
+    <prop oor:name="Extensions"><value>parquet</value></prop>
+    <prop oor:name="MediaType"/>
+    <prop oor:name="Preferred"><value>true</value></prop>
+    <prop oor:name="PreferredFilter"><value>Apache Parquet 
Spreadsheet</value></prop>
+    <prop oor:name="UIName"><value xml:lang="en-US">Apache 
Parquet</value></prop>
+    <prop oor:name="ClipboardFormat"/>
+</node>
diff --git a/sc/inc/orcusfilters.hxx b/sc/inc/orcusfilters.hxx
index 6d17f3741ebb..f13f5cc630d2 100644
--- a/sc/inc/orcusfilters.hxx
+++ b/sc/inc/orcusfilters.hxx
@@ -25,17 +25,17 @@ namespace weld { class TreeView; }
 class ScOrcusFilters
 {
 public:
-    virtual ~ScOrcusFilters() {}
-
-    virtual bool importCSV(ScDocument& rDoc, SfxMedium& rMedium) const = 0;
-
-    virtual bool importGnumeric(ScDocument& rDoc, SfxMedium& rMedium) const = 
0;
+    enum class ImportResult
+    {
+        NotSupported,
+        Success,
+        Failure
+    };
 
-    virtual bool importExcel2003XML(ScDocument& rDoc, SfxMedium& rMedium) 
const = 0;
-
-    virtual bool importXLSX(ScDocument& rDoc, SfxMedium& rMedium) const = 0;
+    virtual ~ScOrcusFilters() {}
 
-    virtual bool importODS(ScDocument& rDoc, SfxMedium& rMedium) const = 0;
+    virtual ImportResult importByName(
+        ScDocument& rDoc, SfxMedium& rMedium, const OUString& rFilterName) 
const = 0;
 
     /**
      * Used to import just the styles from an xml file.
diff --git a/sc/source/filter/inc/orcusfiltersimpl.hxx 
b/sc/source/filter/inc/orcusfiltersimpl.hxx
index 070d69aec03e..7bba1410dc5c 100644
--- a/sc/source/filter/inc/orcusfiltersimpl.hxx
+++ b/sc/source/filter/inc/orcusfiltersimpl.hxx
@@ -16,11 +16,8 @@
 class ScOrcusFiltersImpl : public ScOrcusFilters
 {
 public:
-    virtual bool importCSV(ScDocument& rDoc, SfxMedium& rMedium) const 
override;
-    virtual bool importGnumeric(ScDocument& rDoc, SfxMedium& rMedium) const 
override;
-    virtual bool importExcel2003XML(ScDocument& rDoc, SfxMedium& rMedium) 
const override;
-    virtual bool importXLSX(ScDocument& rDoc, SfxMedium& rMedium) const 
override;
-    virtual bool importODS(ScDocument& rDoc, SfxMedium& rMedium) const 
override;
+    virtual ImportResult importByName(ScDocument& rDoc, SfxMedium& rMedium,
+                                      const OUString& rFilterName) const 
override;
 
     virtual bool importODS_Styles(ScDocument& rDoc, OUString& aFileName) const 
override;
 
diff --git a/sc/source/filter/orcus/filterdetect.cxx 
b/sc/source/filter/orcus/filterdetect.cxx
index 5750932e95d7..06f6015a8f0c 100644
--- a/sc/source/filter/orcus/filterdetect.cxx
+++ b/sc/source/filter/orcus/filterdetect.cxx
@@ -90,6 +90,8 @@ OUString 
OrcusFormatDetect::detect(css::uno::Sequence<css::beans::PropertyValue>
             return "Gnumeric XML";
         case orcus::format_t::xls_xml:
             return "calc_MS_Excel_2003_XML";
+        case orcus::format_t::parquet:
+            return "Apache Parquet";
         default:
             ;
     }
diff --git a/sc/source/filter/orcus/orcusfiltersimpl.cxx 
b/sc/source/filter/orcus/orcusfiltersimpl.cxx
index 1d3bc9c46234..2a13c761d5d5 100644
--- a/sc/source/filter/orcus/orcusfiltersimpl.cxx
+++ b/sc/source/filter/orcus/orcusfiltersimpl.cxx
@@ -19,11 +19,7 @@
 #include <rtl/ustring.hxx>
 #include <sal/log.hxx>
 
-#include <orcus/orcus_csv.hpp>
-#include <orcus/orcus_gnumeric.hpp>
-#include <orcus/orcus_xlsx.hpp>
-#include <orcus/orcus_xls_xml.hpp>
-#include <orcus/orcus_ods.hpp>
+#include <orcus/format_detection.hpp>
 #include <orcus/orcus_import_ods.hpp>
 #include <orcus/stream.hpp>
 #include <com/sun/star/task/XStatusIndicator.hpp>
@@ -70,49 +66,35 @@ bool loadFileContent(SfxMedium& rMedium, 
orcus::iface::import_filter& filter)
 }
 }
 
-bool ScOrcusFiltersImpl::importCSV(ScDocument& rDoc, SfxMedium& rMedium) const
+ScOrcusFilters::ImportResult ScOrcusFiltersImpl::importByName(ScDocument& 
rDoc, SfxMedium& rMedium,
+                                                              const OUString& 
rFilterName) const
 {
-    ScOrcusFactory aFactory(rDoc);
-    aFactory.setStatusIndicator(getStatusIndicator(rMedium));
-
-    orcus::orcus_csv filter(&aFactory);
-    return loadFileContent(rMedium, filter);
-}
-
-bool ScOrcusFiltersImpl::importGnumeric(ScDocument& rDoc, SfxMedium& rMedium) 
const
-{
-    ScOrcusFactory aFactory(rDoc);
-    aFactory.setStatusIndicator(getStatusIndicator(rMedium));
-
-    orcus::orcus_gnumeric filter(&aFactory);
-    return loadFileContent(rMedium, filter);
-}
-
-bool ScOrcusFiltersImpl::importExcel2003XML(ScDocument& rDoc, SfxMedium& 
rMedium) const
-{
-    ScOrcusFactory aFactory(rDoc);
-    aFactory.setStatusIndicator(getStatusIndicator(rMedium));
-
-    orcus::orcus_xls_xml filter(&aFactory);
-    return loadFileContent(rMedium, filter);
-}
-
-bool ScOrcusFiltersImpl::importXLSX(ScDocument& rDoc, SfxMedium& rMedium) const
-{
-    ScOrcusFactory aFactory(rDoc);
-    aFactory.setStatusIndicator(getStatusIndicator(rMedium));
+    const std::unordered_map<OUString, orcus::format_t> aMap = {
+        { "Apache Parquet Spreadsheet", orcus::format_t::parquet },
+        { "Gnumeric Spreadsheet", orcus::format_t::gnumeric },
+        { "MS Excel 2003 XML Orcus", orcus::format_t::xls_xml },
+        { "csv", orcus::format_t::csv },
+        { "gnumeric", orcus::format_t::gnumeric },
+        { "ods", orcus::format_t::ods },
+        { "parquet", orcus::format_t::parquet },
+        { "xls-xml", orcus::format_t::xls_xml },
+        { "xlsx", orcus::format_t::xlsx },
+    };
+
+    if (auto it = aMap.find(rFilterName); it != aMap.end())
+    {
+        ScOrcusFactory aFactory(rDoc);
+        aFactory.setStatusIndicator(getStatusIndicator(rMedium));
 
-    orcus::orcus_xlsx filter(&aFactory);
-    return loadFileContent(rMedium, filter);
-}
+        auto filter = orcus::create_filter(it->second, &aFactory);
+        if (!filter)
+            return ImportResult::Failure;
 
-bool ScOrcusFiltersImpl::importODS(ScDocument& rDoc, SfxMedium& rMedium) const
-{
-    ScOrcusFactory aFactory(rDoc);
-    aFactory.setStatusIndicator(getStatusIndicator(rMedium));
+        bool res = loadFileContent(rMedium, *filter);
+        return res ? ImportResult::Success : ImportResult::Failure;
+    }
 
-    orcus::orcus_ods filter(&aFactory);
-    return loadFileContent(rMedium, filter);
+    return ImportResult::NotSupported;
 }
 
 bool ScOrcusFiltersImpl::importODS_Styles(ScDocument& rDoc, OUString& aPath) 
const
diff --git a/sc/source/ui/docshell/docsh.cxx b/sc/source/ui/docshell/docsh.cxx
index 7ba08a1cc5d5..175eb2f9c38a 100644
--- a/sc/source/ui/docshell/docsh.cxx
+++ b/sc/source/ui/docshell/docsh.cxx
@@ -1261,22 +1261,6 @@ bool ScDocShell::ConvertFrom( SfxMedium& rMedium )
             else
                 bRet = true;
         }
-        else if (aFltName == "Gnumeric Spreadsheet")
-        {
-            ScOrcusFilters* pOrcus = ScFormatFilter::Get().GetOrcusFilters();
-            if (!pOrcus)
-                return false;
-
-            bRet = pOrcus->importGnumeric(*m_pDocument, rMedium);
-        }
-        else if (aFltName == "MS Excel 2003 XML Orcus")
-        {
-            ScOrcusFilters* pOrcus = ScFormatFilter::Get().GetOrcusFilters();
-            if (!pOrcus)
-                return false;
-
-            bRet = pOrcus->importExcel2003XML(*m_pDocument, rMedium);
-        }
         else if (aFltName == SC_TEXT_CSV_FILTER_NAME)
         {
             ScAsciiOptions aOptions;
@@ -1604,10 +1588,27 @@ bool ScDocShell::ConvertFrom( SfxMedium& rMedium )
         }
         else
         {
-            if (!GetErrorIgnoreWarning())
+            ScOrcusFilters* pOrcus = ScFormatFilter::Get().GetOrcusFilters();
+            if (!pOrcus)
+                return false;
+
+            switch (pOrcus->importByName(*m_pDocument, rMedium, aFltName))
             {
-                SAL_WARN("sc.filter", "No match for filter '" << aFltName << 
"' in ConvertFrom");
-                SetError(SCERR_IMPORT_NI);
+                case ScOrcusFilters::ImportResult::Success:
+                    bRet = true;
+                    break;
+                case ScOrcusFilters::ImportResult::Failure:
+                    bRet = false;
+                    break;
+                case ScOrcusFilters::ImportResult::NotSupported:
+                {
+                    if (!GetErrorIgnoreWarning())
+                    {
+                        SAL_WARN("sc.filter", "No match for filter '" << 
aFltName << "' in ConvertFrom");
+                        SetError(SCERR_IMPORT_NI);
+                    }
+                    break;
+                }
             }
         }
 
@@ -1695,27 +1696,9 @@ bool ScDocShell::LoadExternal( SfxMedium& rMed )
         if (!pOrcus)
             return false;
 
-        const OUString& rFilterName = pFilter->GetName();
-        if (rFilterName == "gnumeric")
-        {
-            if (!pOrcus->importGnumeric(*m_pDocument, rMed))
-                return false;
-        }
-        else if (rFilterName == "csv")
-        {
-            if (!pOrcus->importCSV(*m_pDocument, rMed))
-                return false;
-        }
-        else if (rFilterName == "xlsx")
-        {
-            if (!pOrcus->importXLSX(*m_pDocument, rMed))
-                return false;
-        }
-        else if (rFilterName == "ods")
-        {
-            if (!pOrcus->importODS(*m_pDocument, rMed))
-                return false;
-        }
+        auto res = pOrcus->importByName(*m_pDocument, rMed, 
pFilter->GetName());
+        if (res != ScOrcusFilters::ImportResult::Success)
+            return false;
 
         FinishedLoading();
         return true;

Reply via email to