external/liborcus/UnpackedTarball_liborcus.mk |    1 
 external/liborcus/win_path_utf16.patch        |   33 ++++++++++++++++++++++++++
 sc/source/filter/orcus/orcusfiltersimpl.cxx   |   33 +++-----------------------
 3 files changed, 38 insertions(+), 29 deletions(-)

New commits:
commit 3984aa54e43ce2cf342937b9afedbf1d5e79592c
Author:     Mike Kaganski <mike.kagan...@collabora.com>
AuthorDate: Wed Aug 3 18:26:25 2022 +0300
Commit:     Eike Rathke <er...@redhat.com>
CommitDate: Fri Aug 19 17:48:01 2022 +0200

    tdf#150247: patch orcus to use UTF-16 paths on Windows
    
    This adds UTF8->UTF16 conversion of the paths passed to ctor of
    file_content on Windows, since both boost::filesystem::file_size
    and boost::interprocess::file_mapping take UTF-16 strings on this
    platform. So the assumption is that 8-bit path strings passed to
    orcus are UTF-8-encoded.
    
    This partially reverts commit 75252e58d9b5d020bf7bd6ca66b3a9d780463051
    (it keeps use of osl_getThreadTextEncoding for platforms other than
    Windows).
    
    Change-Id: Ie467f71a65945f4f07ff432136ea06b811c3f794
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/137759
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com>
    (cherry picked from commit c2e86396b741b956efc05e9dfea1e1c3286dfb9d)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/137960
    Reviewed-by: Eike Rathke <er...@redhat.com>

diff --git a/external/liborcus/UnpackedTarball_liborcus.mk 
b/external/liborcus/UnpackedTarball_liborcus.mk
index 12698bab7234..27668819ac67 100644
--- a/external/liborcus/UnpackedTarball_liborcus.mk
+++ b/external/liborcus/UnpackedTarball_liborcus.mk
@@ -45,6 +45,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,liborcus,\
 ifeq ($(OS),WNT)
 $(eval $(call gb_UnpackedTarball_add_patches,liborcus,\
        external/liborcus/windows-constants-hack.patch \
+       external/liborcus/win_path_utf16.patch \
 ))
 endif
 
diff --git a/external/liborcus/win_path_utf16.patch 
b/external/liborcus/win_path_utf16.patch
new file mode 100644
index 000000000000..0a6781e728b3
--- /dev/null
+++ b/external/liborcus/win_path_utf16.patch
@@ -0,0 +1,33 @@
+diff --git a/src/parser/stream.cpp b/src/parser/stream.cpp
+index 00395f59ff25..8f385fb8965a 100644
+--- a/src/parser/stream.cpp
++++ b/src/parser/stream.cpp
+@@ -147,6 +147,14 @@ std::tuple<std::string_view, size_t, size_t> 
find_line_with_offset(std::string_v
+     return std::make_tuple(line, line_num, offset_on_line);
+ }
+ 
++#ifdef _WIN32
++std::wstring to_wstring(std::string_view s)
++{
++    std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> conversion;
++    return conversion.from_bytes(s.data(), s.data() + s.size());
++}
++#endif
++
+ } // anonymous namespace
+ 
+ struct file_content::impl
+@@ -162,8 +170,13 @@ struct file_content::impl
+     impl() : content_size(0), content(nullptr) {}
+ 
+     impl(std::string_view filepath) :
++#ifdef _WIN32
++        content_size(fs::file_size(to_wstring(filepath))),
++        mapped_file(to_wstring(filepath).c_str(), bip::read_only),
++#else
+         content_size(fs::file_size(std::string{filepath}.c_str())),
+         mapped_file(std::string{filepath}.c_str(), bip::read_only),
++#endif
+         mapped_region(mapped_file, bip::read_only, 0, content_size),
+         content(nullptr)
+     {
diff --git a/sc/source/filter/orcus/orcusfiltersimpl.cxx 
b/sc/source/filter/orcus/orcusfiltersimpl.cxx
index db2d5705d51e..e7fdb44ca91e 100644
--- a/sc/source/filter/orcus/orcusfiltersimpl.cxx
+++ b/sc/source/filter/orcus/orcusfiltersimpl.cxx
@@ -11,8 +11,6 @@
 #include <orcusinterface.hxx>
 #include <tokenarray.hxx>
 
-#include <memory>
-
 #include <osl/thread.hxx>
 #include <sfx2/docfile.hxx>
 #include <sfx2/frame.hxx>
@@ -30,13 +28,6 @@
 #include <orcus/stream.hpp>
 #include <com/sun/star/task/XStatusIndicator.hpp>
 
-#if defined _WIN32
-#include <boost/filesystem/operations.hpp> // for 
boost::filesystem::filesystem_error
-#include <o3tl/char16_t2wchar_t.hxx>
-#include <prewin.h>
-#include <postwin.h>
-#endif
-
 using namespace com::sun::star;
 
 namespace
@@ -129,33 +120,17 @@ bool ScOrcusFiltersImpl::importODS(ScDocument& rDoc, 
SfxMedium& rMedium) const
 
 bool ScOrcusFiltersImpl::importODS_Styles(ScDocument& rDoc, OUString& aPath) 
const
 {
-    OString aPath8 = OUStringToOString(aPath, osl_getThreadTextEncoding());
-
     try
     {
 #if defined _WIN32
-        std::unique_ptr<orcus::file_content> content;
-        try
-        {
-            content = std::make_unique<orcus::file_content>(aPath8.getStr());
-        }
-        catch (const boost::filesystem::filesystem_error&)
-        {
-            // Maybe the path contains characters not representable in ACP. 
It's not
-            // yet possible to pass Unicode path to orcus::file_content ctor - 
see
-            // https://gitlab.com/orcus/orcus/-/issues/30; try short path.
-            wchar_t buf[32767];
-            if (GetShortPathNameW(o3tl::toW(aPath.getStr()), buf, 
std::size(buf)) == 0)
-                throw;
-            aPath8 = OUStringToOString(o3tl::toU(buf), 
osl_getThreadTextEncoding());
-            content = std::make_unique<orcus::file_content>(aPath8);
-        }
+        OString aPath8 = OUStringToOString(aPath, RTL_TEXTENCODING_UTF8);
 #else
-        auto content = std::make_unique<orcus::file_content>(aPath8);
+        OString aPath8 = OUStringToOString(aPath, osl_getThreadTextEncoding());
 #endif
+        orcus::file_content content(aPath8);
         ScOrcusFactory aFactory(rDoc);
         ScOrcusStyles aStyles(aFactory);
-        orcus::import_ods::read_styles(content->str(), &aStyles);
+        orcus::import_ods::read_styles(content.str(), &aStyles);
     }
     catch (const std::exception& e)
     {

Reply via email to