external/pdfium/UnpackedTarball_pdfium.mk | 2 + external/pdfium/tounicodeinfo.patch.1 | 45 ++++++++++++++++++++++++++++++ include/vcl/filter/PDFiumLibrary.hxx | 1 vcl/source/pdf/PDFiumLibrary.cxx | 18 ++++++++++++ 4 files changed, 66 insertions(+)
New commits: commit 247b2d442ecef143bf89f12ccc8469f1ea718cbc Author: Caolán McNamara <[email protected]> AuthorDate: Thu Aug 21 17:20:19 2025 +0100 Commit: Miklos Vajna <[email protected]> CommitDate: Fri Sep 26 17:04:00 2025 +0200 expose pdfium to-unicode stream note DecodeStreamMaybeCopyAndReturnLength has unusual behaviour and refuses to write to a buffer larger than needed. Change-Id: Ie11fa3e6bfff8c810d66a892f46aa756fbbd2b9b Reviewed-on: https://gerrit.libreoffice.org/c/core/+/191468 Reviewed-by: Miklos Vajna <[email protected]> Tested-by: Jenkins CollaboraOffice <[email protected]> diff --git a/external/pdfium/UnpackedTarball_pdfium.mk b/external/pdfium/UnpackedTarball_pdfium.mk index 945f886ad04a..d418c7d23d71 100644 --- a/external/pdfium/UnpackedTarball_pdfium.mk +++ b/external/pdfium/UnpackedTarball_pdfium.mk @@ -19,6 +19,8 @@ pdfium_patches += constexpr-template.patch pdfium_patches += freebsd.patch pdfium_patches += system-abseil.diff +# expose this mapping information +pdfium_patches += tounicodeinfo.patch.1 $(eval $(call gb_UnpackedTarball_UnpackedTarball,pdfium)) diff --git a/external/pdfium/tounicodeinfo.patch.1 b/external/pdfium/tounicodeinfo.patch.1 new file mode 100644 index 000000000000..0bcad0cb5a7c --- /dev/null +++ b/external/pdfium/tounicodeinfo.patch.1 @@ -0,0 +1,45 @@ +diff -ru pdfium.orig/fpdfsdk/fpdf_edittext.cpp pdfium/fpdfsdk/fpdf_edittext.cpp +--- pdfium.orig/fpdfsdk/fpdf_edittext.cpp 2025-08-21 16:56:03.855282337 +0100 ++++ pdfium/fpdfsdk/fpdf_edittext.cpp 2025-08-21 17:18:56.347453326 +0100 +@@ -958,6 +958,26 @@ + return true; + } + ++FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFFont_GetToUnicodeContent(FPDF_FONT font, ++ uint8_t* buffer, ++ size_t buflen, ++ size_t* out_buflen) ++{ ++ auto* cfont = CPDFFontFromFPDFFont(font); ++ if (!cfont || !out_buflen) ++ return false; ++ ++ RetainPtr<const CPDF_Stream> pStream = cfont->GetFontDict()->GetStreamFor("ToUnicode"); ++ if (!pStream) ++ return false; ++ ++ // SAFETY: caller ensures `buffer` points to at least `buflen` bytes. ++ *out_buflen = DecodeStreamMaybeCopyAndReturnLength( ++ pStream, UNSAFE_BUFFERS(pdfium::make_span(buffer, buflen))); ++ ++ return true; ++} ++ + FPDF_EXPORT int FPDF_CALLCONV FPDFFont_GetIsEmbedded(FPDF_FONT font) { + auto* cfont = CPDFFontFromFPDFFont(font); + if (!cfont) +diff -ru pdfium.orig/public/fpdf_edit.h pdfium/public/fpdf_edit.h +--- pdfium.orig/public/fpdf_edit.h 2025-08-21 16:56:03.860206877 +0100 ++++ pdfium/public/fpdf_edit.h 2025-08-21 17:15:06.289917550 +0100 +@@ -1496,6 +1496,11 @@ + size_t buflen, + size_t* out_buflen); + ++FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFFont_GetToUnicodeContent(FPDF_FONT font, ++ uint8_t* buffer, ++ size_t buflen, ++ size_t* out_buflen); ++ + // Experimental API. + // Get whether |font| is embedded or not. + // diff --git a/include/vcl/filter/PDFiumLibrary.hxx b/include/vcl/filter/PDFiumLibrary.hxx index 6988b312087b..59fd5f3a0545 100644 --- a/include/vcl/filter/PDFiumLibrary.hxx +++ b/include/vcl/filter/PDFiumLibrary.hxx @@ -160,6 +160,7 @@ public: virtual int getFontAngle() = 0; virtual PFDiumFont getFont() = 0; virtual bool getFontData(PFDiumFont font, std::vector<uint8_t>& rData) = 0; + virtual bool getFontToUnicode(PFDiumFont font, std::vector<uint8_t>& rData) = 0; virtual bool getFontProperties(FontWeight& weight) = 0; virtual PDFTextRenderMode getTextRenderMode() = 0; virtual Color getFillColor() = 0; diff --git a/vcl/source/pdf/PDFiumLibrary.cxx b/vcl/source/pdf/PDFiumLibrary.cxx index d3ddddf928a9..8995732601b1 100644 --- a/vcl/source/pdf/PDFiumLibrary.cxx +++ b/vcl/source/pdf/PDFiumLibrary.cxx @@ -421,6 +421,7 @@ public: int getFontAngle() override; PFDiumFont getFont() override; bool getFontData(PFDiumFont font, std::vector<uint8_t>& rData) override; + bool getFontToUnicode(PFDiumFont font, std::vector<uint8_t>& rData) override; bool getFontProperties(FontWeight& weight) override; PDFTextRenderMode getTextRenderMode() override; Color getFillColor() override; @@ -1185,6 +1186,23 @@ bool PDFiumPageObjectImpl::getFontData(PFDiumFont font, std::vector<uint8_t>& rD return bOk; } +bool PDFiumPageObjectImpl::getFontToUnicode(PFDiumFont font, std::vector<uint8_t>& rData) +{ + FPDF_FONT pFontObject = static_cast<FPDF_FONT>(font); + + size_t buflen(0); + bool bOk = FPDFFont_GetToUnicodeContent(pFontObject, nullptr, 0, &buflen); + if (!bOk) + { + SAL_WARN("vcl.filter", "PDFiumImpl: failed to get font data"); + return false; + } + rData.resize(buflen); + bOk = FPDFFont_GetToUnicodeContent(pFontObject, rData.data(), rData.size(), &buflen); + assert(bOk && rData.size() == buflen); + return bOk; +} + bool PDFiumPageObjectImpl::getFontProperties(FontWeight& weight) { // FPDFFont_GetWeight turns out not to be that useful. It seems to just
