external/pdfium/UnpackedTarball_pdfium.mk |    2 +
 external/pdfium/tounicodeinfo.patch.1     |   45 ++++++++++++++++++++++++++++++
 include/vcl/filter/PDFiumLibrary.hxx      |    1 
 vcl/source/pdf/PDFiumLibrary.cxx          |   18 ++++++++++++
 4 files changed, 66 insertions(+)

New commits:
commit 247b2d442ecef143bf89f12ccc8469f1ea718cbc
Author:     Caolán McNamara <[email protected]>
AuthorDate: Thu Aug 21 17:20:19 2025 +0100
Commit:     Miklos Vajna <[email protected]>
CommitDate: Fri Sep 26 17:04:00 2025 +0200

    expose pdfium to-unicode stream
    
    note DecodeStreamMaybeCopyAndReturnLength has unusual behaviour
    and refuses to write to a buffer larger than needed.
    
    Change-Id: Ie11fa3e6bfff8c810d66a892f46aa756fbbd2b9b
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/191468
    Reviewed-by: Miklos Vajna <[email protected]>
    Tested-by: Jenkins CollaboraOffice <[email protected]>

diff --git a/external/pdfium/UnpackedTarball_pdfium.mk 
b/external/pdfium/UnpackedTarball_pdfium.mk
index 945f886ad04a..d418c7d23d71 100644
--- a/external/pdfium/UnpackedTarball_pdfium.mk
+++ b/external/pdfium/UnpackedTarball_pdfium.mk
@@ -19,6 +19,8 @@ pdfium_patches += constexpr-template.patch
 pdfium_patches += freebsd.patch
 
 pdfium_patches += system-abseil.diff
+# expose this mapping information
+pdfium_patches += tounicodeinfo.patch.1
 
 $(eval $(call gb_UnpackedTarball_UnpackedTarball,pdfium))
 
diff --git a/external/pdfium/tounicodeinfo.patch.1 
b/external/pdfium/tounicodeinfo.patch.1
new file mode 100644
index 000000000000..0bcad0cb5a7c
--- /dev/null
+++ b/external/pdfium/tounicodeinfo.patch.1
@@ -0,0 +1,45 @@
+diff -ru pdfium.orig/fpdfsdk/fpdf_edittext.cpp pdfium/fpdfsdk/fpdf_edittext.cpp
+--- pdfium.orig/fpdfsdk/fpdf_edittext.cpp      2025-08-21 16:56:03.855282337 
+0100
++++ pdfium/fpdfsdk/fpdf_edittext.cpp   2025-08-21 17:18:56.347453326 +0100
+@@ -958,6 +958,26 @@
+   return true;
+ }
+ 
++FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFFont_GetToUnicodeContent(FPDF_FONT 
font,
++                                                                 uint8_t* 
buffer,
++                                                                 size_t 
buflen,
++                                                                 size_t* 
out_buflen)
++{
++  auto* cfont = CPDFFontFromFPDFFont(font);
++  if (!cfont || !out_buflen)
++    return false;
++
++  RetainPtr<const CPDF_Stream> pStream = 
cfont->GetFontDict()->GetStreamFor("ToUnicode");
++  if (!pStream)
++    return false;
++
++  // SAFETY: caller ensures `buffer` points to at least `buflen` bytes.
++  *out_buflen = DecodeStreamMaybeCopyAndReturnLength(
++      pStream, UNSAFE_BUFFERS(pdfium::make_span(buffer, buflen)));
++
++  return true;
++}
++
+ FPDF_EXPORT int FPDF_CALLCONV FPDFFont_GetIsEmbedded(FPDF_FONT font) {
+   auto* cfont = CPDFFontFromFPDFFont(font);
+   if (!cfont)
+diff -ru pdfium.orig/public/fpdf_edit.h pdfium/public/fpdf_edit.h
+--- pdfium.orig/public/fpdf_edit.h     2025-08-21 16:56:03.860206877 +0100
++++ pdfium/public/fpdf_edit.h  2025-08-21 17:15:06.289917550 +0100
+@@ -1496,6 +1496,11 @@
+                                                          size_t buflen,
+                                                          size_t* out_buflen);
+ 
++FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFFont_GetToUnicodeContent(FPDF_FONT 
font,
++                                                               uint8_t* 
buffer,
++                                                               size_t buflen,
++                                                               size_t* 
out_buflen);
++
+ // Experimental API.
+ // Get whether |font| is embedded or not.
+ //
diff --git a/include/vcl/filter/PDFiumLibrary.hxx 
b/include/vcl/filter/PDFiumLibrary.hxx
index 6988b312087b..59fd5f3a0545 100644
--- a/include/vcl/filter/PDFiumLibrary.hxx
+++ b/include/vcl/filter/PDFiumLibrary.hxx
@@ -160,6 +160,7 @@ public:
     virtual int getFontAngle() = 0;
     virtual PFDiumFont getFont() = 0;
     virtual bool getFontData(PFDiumFont font, std::vector<uint8_t>& rData) = 0;
+    virtual bool getFontToUnicode(PFDiumFont font, std::vector<uint8_t>& 
rData) = 0;
     virtual bool getFontProperties(FontWeight& weight) = 0;
     virtual PDFTextRenderMode getTextRenderMode() = 0;
     virtual Color getFillColor() = 0;
diff --git a/vcl/source/pdf/PDFiumLibrary.cxx b/vcl/source/pdf/PDFiumLibrary.cxx
index d3ddddf928a9..8995732601b1 100644
--- a/vcl/source/pdf/PDFiumLibrary.cxx
+++ b/vcl/source/pdf/PDFiumLibrary.cxx
@@ -421,6 +421,7 @@ public:
     int getFontAngle() override;
     PFDiumFont getFont() override;
     bool getFontData(PFDiumFont font, std::vector<uint8_t>& rData) override;
+    bool getFontToUnicode(PFDiumFont font, std::vector<uint8_t>& rData) 
override;
     bool getFontProperties(FontWeight& weight) override;
     PDFTextRenderMode getTextRenderMode() override;
     Color getFillColor() override;
@@ -1185,6 +1186,23 @@ bool PDFiumPageObjectImpl::getFontData(PFDiumFont font, 
std::vector<uint8_t>& rD
     return bOk;
 }
 
+bool PDFiumPageObjectImpl::getFontToUnicode(PFDiumFont font, 
std::vector<uint8_t>& rData)
+{
+    FPDF_FONT pFontObject = static_cast<FPDF_FONT>(font);
+
+    size_t buflen(0);
+    bool bOk = FPDFFont_GetToUnicodeContent(pFontObject, nullptr, 0, &buflen);
+    if (!bOk)
+    {
+        SAL_WARN("vcl.filter", "PDFiumImpl: failed to get font data");
+        return false;
+    }
+    rData.resize(buflen);
+    bOk = FPDFFont_GetToUnicodeContent(pFontObject, rData.data(), 
rData.size(), &buflen);
+    assert(bOk && rData.size() == buflen);
+    return bOk;
+}
+
 bool PDFiumPageObjectImpl::getFontProperties(FontWeight& weight)
 {
     // FPDFFont_GetWeight turns out not to be that useful. It seems to just

Reply via email to