i18nutil/source/utility/unicode.cxx | 6 ++++++ include/i18nutil/unicode.hxx | 1 + sdext/CppunitTest_sdext_pdfimport.mk | 1 + sdext/Executable_pdf2xml.mk | 1 + sdext/Executable_pdfunzip.mk | 1 + sdext/Library_pdfimport.mk | 2 ++ sdext/source/pdfimport/inc/pdfiprocessor.hxx | 2 +- sdext/source/pdfimport/tree/drawtreevisiting.cxx | 5 +++++ sdext/source/pdfimport/tree/pdfiprocessor.cxx | 17 ++++++++++++++++- sdext/source/pdfimport/tree/writertreevisiting.cxx | 5 +++++ 10 files changed, 39 insertions(+), 2 deletions(-)
New commits: commit 0a19375b73b12885f9022d82cb51e9c268cc0d6a Author: Kevin Suo <suokunl...@126.com> AuthorDate: Mon Nov 21 23:32:53 2022 +0800 Commit: Stephan Bergmann <sberg...@redhat.com> CommitDate: Wed Nov 30 22:53:06 2022 +0100 tdf#104597 related: restore the mirroring of Bidi_Mirrored characters The method PDFIProcessor::mirrorString() was removed, and ::comphelper::string::reverseString was used, by Kevin Suo in commit 69e9925ded584113e52f84ef0ed7c224079fa061. The old PDFIProcessor::mirrorString() did two things: 1) Reverse the code points in the string. However, due to its use of iterateCodePoints(&i) rather than iterateCodePoints(&i, -1) (i.e. iterating in forward rather than reversed order), the reverse was not functioning at all. 2) Use GetMirroredChar (i.e. the icu unicode u_charMirror() which was dropped in commit 720a093461aadff044ac0b1b7b45cf3d416b3785) to get the mirrored-image of characters with the Bidi_Mirrored properties. Stephan Bergmann restored the #1 in commit 50d73574b6c3d71f9a539c895a15d6fcda22390b (by introducing comphelper::string::reverseCodePoints). However, the #2 was not restored. I do not see a clear reason to drop #2 (at least this is an area I don't quite familiar with), so it's better to restore it as well, in *this* commit, named as SubstituteBidiMirrored. Conflicts: i18nutil/source/utility/unicode.cxx sdext/source/pdfimport/inc/pdfiprocessor.hxx Change-Id: Ic263097938310e6e04f3b95bb12e4f4e834198f2 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/143041 Tested-by: Jenkins Reviewed-by: Kevin Suo <suokunl...@126.com> (cherry picked from commit e788317bb58f4e4ad3bf57ee7b0e995cb4407ede) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/143492 Reviewed-by: Stephan Bergmann <sberg...@redhat.com> diff --git a/i18nutil/source/utility/unicode.cxx b/i18nutil/source/utility/unicode.cxx index 5e479989eae9..550f9e71b92f 100644 --- a/i18nutil/source/utility/unicode.cxx +++ b/i18nutil/source/utility/unicode.cxx @@ -23,6 +23,7 @@ #include <i18nlangtag/languagetagicu.hxx> #include <i18nutil/unicode.hxx> #include <sal/log.hxx> +#include <unicode/uchar.h> #include <unicode/numfmt.h> #include "unicode_data.h" #include <rtl/character.hxx> @@ -96,6 +97,11 @@ unicode::getUnicodeDirection( const sal_Unicode ch ) { return r; } +sal_uInt32 unicode::GetMirroredChar(sal_uInt32 nChar) { + nChar = u_charMirror(nChar); + return nChar; +} + #define bit(name) (1U << name) #define UPPERMASK bit(UnicodeType::UPPERCASE_LETTER) diff --git a/include/i18nutil/unicode.hxx b/include/i18nutil/unicode.hxx index 0ca14290981e..c14d22cecb66 100644 --- a/include/i18nutil/unicode.hxx +++ b/include/i18nutil/unicode.hxx @@ -43,6 +43,7 @@ public: static sal_Unicode getUnicodeScriptStart(css::i18n::UnicodeScript type); static sal_Unicode getUnicodeScriptEnd(css::i18n::UnicodeScript type); static sal_uInt8 getUnicodeDirection(const sal_Unicode ch); + static sal_uInt32 GetMirroredChar(sal_uInt32); static bool isControl(const sal_Unicode ch); static bool isAlpha(const sal_Unicode ch); static bool isSpace(const sal_Unicode ch); diff --git a/sdext/CppunitTest_sdext_pdfimport.mk b/sdext/CppunitTest_sdext_pdfimport.mk index b2676d32002b..ee24b9aedde7 100644 --- a/sdext/CppunitTest_sdext_pdfimport.mk +++ b/sdext/CppunitTest_sdext_pdfimport.mk @@ -33,6 +33,7 @@ $(eval $(call gb_CppunitTest_use_libraries,sdext_pdfimport,\ unotest \ tl \ xo \ + i18nutil \ )) $(eval $(call gb_CppunitTest_use_library_objects,sdext_pdfimport,pdfimport)) diff --git a/sdext/Executable_pdf2xml.mk b/sdext/Executable_pdf2xml.mk index b2439164a1ed..81e00eca11c0 100644 --- a/sdext/Executable_pdf2xml.mk +++ b/sdext/Executable_pdf2xml.mk @@ -32,6 +32,7 @@ $(eval $(call gb_Executable_use_libraries,pdf2xml,\ sal \ tl \ xo \ + i18nutil \ )) $(eval $(call gb_Executable_use_library_objects,pdf2xml,pdfimport)) diff --git a/sdext/Executable_pdfunzip.mk b/sdext/Executable_pdfunzip.mk index 1fcca8a863d7..10c179336048 100644 --- a/sdext/Executable_pdfunzip.mk +++ b/sdext/Executable_pdfunzip.mk @@ -30,6 +30,7 @@ $(eval $(call gb_Executable_use_libraries,pdfunzip,\ vcl \ tl \ xo \ + i18nutil \ )) $(eval $(call gb_Executable_use_library_objects,pdfunzip,pdfimport)) diff --git a/sdext/Library_pdfimport.mk b/sdext/Library_pdfimport.mk index b68c106268ae..dc1290cd0eb1 100644 --- a/sdext/Library_pdfimport.mk +++ b/sdext/Library_pdfimport.mk @@ -29,10 +29,12 @@ $(eval $(call gb_Library_use_libraries,pdfimport,\ sal \ tl \ xo \ + i18nutil \ )) $(eval $(call gb_Library_use_externals,pdfimport,\ boost_headers \ + icu_headers \ zlib \ $(if $(filter-out WNT MACOSX,$(OS)),fontconfig) \ )) diff --git a/sdext/source/pdfimport/inc/pdfiprocessor.hxx b/sdext/source/pdfimport/inc/pdfiprocessor.hxx index c872ead78917..7cbe7d7a5104 100644 --- a/sdext/source/pdfimport/inc/pdfiprocessor.hxx +++ b/sdext/source/pdfimport/inc/pdfiprocessor.hxx @@ -79,7 +79,7 @@ namespace pdfi static void sortElements( Element* pElement ); - static OUString mirrorString( const OUString& i_rInString ); + static OUString SubstituteBidiMirrored(const OUString& rString); private: void processGlyphLine(); diff --git a/sdext/source/pdfimport/tree/drawtreevisiting.cxx b/sdext/source/pdfimport/tree/drawtreevisiting.cxx index 26f20fbafb27..b19140a0beb9 100644 --- a/sdext/source/pdfimport/tree/drawtreevisiting.cxx +++ b/sdext/source/pdfimport/tree/drawtreevisiting.cxx @@ -123,7 +123,12 @@ void DrawXmlEmitter::visit( TextElement& elem, const std::list< std::unique_ptr< } if (isRTL) // If so, reverse string + { + // First, produce mirrored-image for each code point which has the Bidi_Mirrored property. + str = PDFIProcessor::SubstituteBidiMirrored(str); + // Then, reverse the code points in the string, in backward order. str = ::comphelper::string::reverseCodePoints(str); + } m_rEmitContext.rEmitter.beginTag( "text:span", aProps ); diff --git a/sdext/source/pdfimport/tree/pdfiprocessor.cxx b/sdext/source/pdfimport/tree/pdfiprocessor.cxx index 701813942ba2..23a2cd5a031d 100644 --- a/sdext/source/pdfimport/tree/pdfiprocessor.cxx +++ b/sdext/source/pdfimport/tree/pdfiprocessor.cxx @@ -33,7 +33,7 @@ #include <basegfx/polygon/b2dpolygontools.hxx> #include <basegfx/utils/canvastools.hxx> #include <basegfx/matrix/b2dhommatrix.hxx> -#include <vcl/svapp.hxx> +#include <i18nutil/unicode.hxx> using namespace com::sun::star; @@ -695,6 +695,21 @@ void PDFIProcessor::sortElements(Element* pEle) pEle->Children.sort(lr_tb_sort); } +/* Produce mirrored-image for each code point which has the Bidi_Mirrored property, within a string. + This need to be done in forward order. +*/ +OUString PDFIProcessor::SubstituteBidiMirrored(const OUString& rString) +{ + const sal_Int32 nLen = rString.getLength(); + OUStringBuffer aMirror(nLen); + + for (sal_Int32 i = 0; i < nLen;) { + const sal_uInt32 nCodePoint = rString.iterateCodePoints(&i); + aMirror.appendUtf32(unicode::GetMirroredChar(nCodePoint)); + } + return aMirror.makeStringAndClear(); +} + } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/tree/writertreevisiting.cxx b/sdext/source/pdfimport/tree/writertreevisiting.cxx index 9ba6efd4265e..cb312bf50d22 100644 --- a/sdext/source/pdfimport/tree/writertreevisiting.cxx +++ b/sdext/source/pdfimport/tree/writertreevisiting.cxx @@ -112,7 +112,12 @@ void WriterXmlEmitter::visit( TextElement& elem, const std::list< std::unique_pt } if (isRTL) // If so, reverse string + { + // First, produce mirrored-image for each code point which has the Bidi_Mirrored property. + str = PDFIProcessor::SubstituteBidiMirrored(str); + // Then, reverse the code points in the string, in backward order. str = ::comphelper::string::reverseCodePoints(str); + } m_rEmitContext.rEmitter.beginTag( "text:span", aProps );