vcl/qa/cppunit/filter/ipdf/data/comment-end.pdf | 69 ++++++++++++++++++++++++ vcl/qa/cppunit/filter/ipdf/ipdf.cxx | 19 ++++++ vcl/source/filter/ipdf/pdfdocument.cxx | 7 ++ 3 files changed, 94 insertions(+), 1 deletion(-)
New commits: commit be73b5974284cd7094ae2bf31cd57e70e1e08dd2 Author: Miklos Vajna <vmik...@collabora.com> AuthorDate: Wed May 12 10:51:09 2021 +0200 Commit: Michael Stahl <michael.st...@allotropia.de> CommitDate: Fri May 14 12:19:51 2021 +0200 vcl PDF tokenizer: fix EOF position when \r is not followed by \n Otherwise this would break partial tokenize when we only read a trailer in the middle of the file: m_aEOFs.back() is one byte larger than rStream.Tell(), so we reader past the end of the trailer, resulting in a tokenize failure. What's special about the bugdoc: - it has 2 xrefs, the first is incomplete, and refers to a second which is later in the file - the object length is as indirect object, triggering an xref lookup - the first EOF is followed by a \r, but then not with a \n This results in reading past the end of the first trailer and then triggering a lookup failure. FWIW, pdfium does the same in <https://pdfium.googlesource.com/pdfium/+/59d107323f6727bbd5f8a4d0843081790638a1dd/core/fpdfapi/parser/cpdf_syntax_parser.cpp#446>, we're on in sync with it. Change-Id: Ia556a25e333b5e4f1418d92a98d74358862120e2 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/115466 Reviewed-by: Miklos Vajna <vmik...@collabora.com> Tested-by: Jenkins (cherry picked from commit 6b1d5bafdc722d07d3dc4980764275a6caa707ba) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/115516 Reviewed-by: Michael Stahl <michael.st...@allotropia.de> diff --git a/vcl/qa/cppunit/filter/ipdf/data/comment-end.pdf b/vcl/qa/cppunit/filter/ipdf/data/comment-end.pdf new file mode 100644 index 000000000000..6f1ad86f5c99 --- /dev/null +++ b/vcl/qa/cppunit/filter/ipdf/data/comment-end.pdf @@ -0,0 +1,69 @@ +%PDF-1.7 +%��� +1 0 obj << + /Type /Catalog + /Pages 2 0 R +>> +endobj +2 0 obj << + /Type /Pages + /MediaBox [0 0 200 300] + /Count 1 + /Kids [3 0 R] +>> +endobj +3 0 obj << + /Type /Page + /Parent 2 0 R + /Contents 4 0 R +>> +endobj +4 0 obj << + /Length 4 +>> +stream +q +Q +endstream +endobj +xref +0 5 +0000000000 65535 f +0000000015 00000 n +0000000068 00000 n +0000000157 00000 n +0000000226 00000 n +trailer << + /Root 1 0 R + /Size 5 + /Prev 541 +>> +startxref +280 +%%EOF %%TEST +4 0 obj << + /Length 5 0 R +>> +stream +q +Q +endstream +endobj +5 0 obj +4 +endobj +xref +0 6 +0000000000 65535 f +0000000015 00000 n +0000000068 00000 n +0000000157 00000 n +0000000466 00000 n +0000000524 00000 n +trailer << + /Root 1 0 R + /Size 6 +>> +startxref +280 +%%EOF diff --git a/vcl/qa/cppunit/filter/ipdf/ipdf.cxx b/vcl/qa/cppunit/filter/ipdf/ipdf.cxx index d94eb76aa5b3..93cc22360b56 100644 --- a/vcl/qa/cppunit/filter/ipdf/ipdf.cxx +++ b/vcl/qa/cppunit/filter/ipdf/ipdf.cxx @@ -178,6 +178,25 @@ CPPUNIT_TEST_FIXTURE(VclFilterIpdfTest, testRealNumbers) CPPUNIT_ASSERT(!aPages.empty()); } +CPPUNIT_TEST_FIXTURE(VclFilterIpdfTest, testCommentEnd) +{ + // Load the test document: + // - it has two xrefs + // - second xref has an updated page content object with an indirect length + // - last startxref refers to the first xref + // - first xref has a /Prev to the second xref + // - first xref is terminated by a \r, which is not followed by a newline + // this means that if reading doesn't stop at the end of the first xref, then we'll try to look + // up the offset of the length object, which we don't yet have + OUString aSourceURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "comment-end.pdf"; + SvFileStream aFile(aSourceURL, StreamMode::READ); + vcl::filter::PDFDocument aDocument; + + // Without the accompanying fix in place, this test would have failed, because Tokenize() didn't + // stop at the end of the first xref. + CPPUNIT_ASSERT(aDocument.Read(aFile)); +} + CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx index 804713abaf10..a0164c0c4ce6 100644 --- a/vcl/source/filter/ipdf/pdfdocument.cxx +++ b/vcl/source/filter/ipdf/pdfdocument.cxx @@ -2194,9 +2194,14 @@ bool PDFCommentElement::Read(SvStream& rStream) sal_uInt64 nPos = rStream.Tell(); if (ch == '\r') { + rStream.ReadChar(ch); + rStream.SeekRel(-1); // If the comment ends with a \r\n, count the \n as well to match Adobe Acrobat // behavior. - nPos += 1; + if (ch == '\n') + { + nPos += 1; + } } m_rDoc.PushBackEOF(nPos); }
_______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits