package/inc/ByteGrabber.hxx | 1 package/inc/ZipFile.hxx | 7 package/source/zipapi/ByteGrabber.cxx | 18 ++ package/source/zipapi/ZipFile.cxx | 231 +++++++++++++++++++++++++++++--- sc/qa/unit/subsequent_filters_test4.cxx | 10 + vcl/qa/cppunit/pdfexport/pdfexport.cxx | 4 6 files changed, 246 insertions(+), 25 deletions(-)
New commits: commit cffa22ee046fd62b38692ca25dea3a6827889d9a Author: Michael Stahl <michael.st...@allotropia.de> AuthorDate: Tue Jul 16 12:12:09 2024 +0200 Commit: Thorsten Behrens <thorsten.behr...@allotropia.de> CommitDate: Wed Jul 24 08:36:59 2024 +0200 package: add additional consistency checks for local file header Check it contains same as central directory header, also check data descriptor if available. Also check there are no gaps or overlaps. This causes 3 fuzzer generated test documents to fail to load; adapt tests. Change-Id: If5813652f3bd03e90fdf95eb97e1e1523455b2b8 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/170571 Tested-by: Jenkins Reviewed-by: Michael Stahl <michael.st...@allotropia.de> (cherry picked from commit efae4fc42d5fe3c0a69757226f38efc10d101194) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/170588 Reviewed-by: Thorsten Behrens <thorsten.behr...@allotropia.de> diff --git a/package/inc/ByteGrabber.hxx b/package/inc/ByteGrabber.hxx index ba1512cf5162..4a93398a7704 100644 --- a/package/inc/ByteGrabber.hxx +++ b/package/inc/ByteGrabber.hxx @@ -61,6 +61,7 @@ public: sal_uInt16 ReadUInt16(); sal_uInt32 ReadUInt32(); + sal_uInt64 ReadUInt64(); sal_Int16 ReadInt16() { return static_cast<sal_Int16>(ReadUInt16()); diff --git a/package/inc/ZipFile.hxx b/package/inc/ZipFile.hxx index 7e7de4bd67bc..aa2dcd5f3efc 100644 --- a/package/inc/ZipFile.hxx +++ b/package/inc/ZipFile.hxx @@ -29,6 +29,7 @@ #include "HashMaps.hxx" #include "EncryptionData.hxx" +#include <optional> #include <unordered_set> class MemoryByteGrabber; @@ -89,13 +90,13 @@ private: void getSizeAndCRC( sal_Int64 nOffset, sal_Int64 nCompressedSize, sal_Int64 *nSize, sal_Int32 *nCRC ); - void readLOC( ZipEntry &rEntry ); + sal_uInt64 readLOC(ZipEntry &rEntry); sal_Int32 readCEN(); sal_Int32 findEND(); void recover(); - static void readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 nExtraLen, + static bool readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 nExtraLen, sal_uInt64& nSize, sal_uInt64& nCompressedSize, - sal_uInt64* nOffset, + ::std::optional<sal_uInt64> & roOffset, OUString const* pCENFilenameToCheck); public: diff --git a/package/source/zipapi/ByteGrabber.cxx b/package/source/zipapi/ByteGrabber.cxx index 5a491de6509b..0e701a60007f 100644 --- a/package/source/zipapi/ByteGrabber.cxx +++ b/package/source/zipapi/ByteGrabber.cxx @@ -119,4 +119,22 @@ sal_uInt32 ByteGrabber::ReadUInt32() | ( pSequence[3] & 0xFF ) << 24 ); } +sal_uInt64 ByteGrabber::ReadUInt64() +{ + std::scoped_lock aGuard(m_aMutex); + + if (xStream->readBytes(aSequence, 8) != 8) + return 0; + + pSequence = aSequence.getConstArray(); + return static_cast<sal_uInt64>(pSequence[0] & 0xFF) + | static_cast<sal_uInt64>(pSequence[1] & 0xFF) << 8 + | static_cast<sal_uInt64>(pSequence[2] & 0xFF) << 16 + | static_cast<sal_uInt64>(pSequence[3] & 0xFF) << 24 + | static_cast<sal_uInt64>(pSequence[4] & 0xFF) << 32 + | static_cast<sal_uInt64>(pSequence[5] & 0xFF) << 40 + | static_cast<sal_uInt64>(pSequence[6] & 0xFF) << 48 + | static_cast<sal_uInt64>(pSequence[7] & 0xFF) << 56; +} + /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/package/source/zipapi/ZipFile.cxx b/package/source/zipapi/ZipFile.cxx index 6773fcf20e1f..3baff4d5250d 100644 --- a/package/source/zipapi/ZipFile.cxx +++ b/package/source/zipapi/ZipFile.cxx @@ -910,7 +910,7 @@ uno::Reference< XInputStream > ZipFile::getWrappedRawStream( return createStreamForZipEntry ( aMutexHolder, rEntry, rData, UNBUFF_STREAM_WRAPPEDRAW, true, true, aMediaType ); } -void ZipFile::readLOC( ZipEntry &rEntry ) +sal_uInt64 ZipFile::readLOC(ZipEntry &rEntry) { ::osl::MutexGuard aGuard( m_aMutexHolder->GetMutex() ); @@ -926,13 +926,16 @@ void ZipFile::readLOC( ZipEntry &rEntry ) // Just verify the path and calculate the data offset and otherwise // rely on the central directory info. - aGrabber.ReadInt16(); //version - aGrabber.ReadInt16(); //flag - aGrabber.ReadInt16(); //how + aGrabber.ReadInt16(); // version - ignore any mismatch (Maven created JARs) + sal_uInt16 const nLocFlag = aGrabber.ReadUInt16(); // general purpose bit flag + sal_uInt16 const nLocMethod = aGrabber.ReadUInt16(); // compression method + // Do *not* compare timestamps, since MSO 2010 can produce documents + // with timestamp difference in the central directory entry and local + // file header. aGrabber.ReadInt32(); //time - aGrabber.ReadInt32(); //crc - aGrabber.ReadInt32(); //compressed size - aGrabber.ReadInt32(); //size + sal_uInt32 nLocCrc = aGrabber.ReadUInt32(); //crc + sal_uInt64 nLocCompressedSize = aGrabber.ReadUInt32(); //compressed size + sal_uInt64 nLocSize = aGrabber.ReadUInt32(); //size sal_Int16 nPathLen = aGrabber.ReadInt16(); sal_Int16 nExtraLen = aGrabber.ReadInt16(); @@ -944,6 +947,7 @@ void ZipFile::readLOC( ZipEntry &rEntry ) rEntry.nOffset = aGrabber.getPosition() + nPathLen + nExtraLen; + sal_Int64 nEnd = {}; // avoid -Werror=maybe-uninitialized bool bBroken = false; try @@ -965,8 +969,133 @@ void ZipFile::readLOC( ZipEntry &rEntry ) rEntry.sPath = sLOCPath; } - bBroken = rEntry.nPathLen != nPathLen - || rEntry.sPath != sLOCPath; + if (rEntry.nPathLen != nPathLen || rEntry.sPath != sLOCPath) + { + SAL_INFO("package", "LOC inconsistent name: \"" << rEntry.sPath << "\""); + bBroken = true; + } + + bool isZip64{false}; + ::std::optional<sal_uInt64> oOffset64; + if (nExtraLen != 0) + { + Sequence<sal_Int8> aExtraBuffer; + aGrabber.readBytes(aExtraBuffer, nExtraLen); + MemoryByteGrabber extraMemGrabber(aExtraBuffer); + + isZip64 = readExtraFields(extraMemGrabber, nExtraLen, + nLocSize, nLocCompressedSize, oOffset64, &sLOCPath); + } + + // Just plain ignore bits 1 & 2 of the flag field - they are either + // purely informative, or even fully undefined (depending on method). + // Also ignore bit 11 ("Language encoding flag"): tdf125300.docx is + // example with mismatch - and the actual file names are compared in + // any case and required to be UTF-8. + if ((rEntry.nFlag & ~0x806U) != (nLocFlag & ~0x806U)) + { + SAL_INFO("package", "LOC inconsistent flag: \"" << rEntry.sPath << "\""); + bBroken = true; + } + + // TODO: "older versions with encrypted streams write mismatching DEFLATE/STORE" ??? + if (rEntry.nMethod != nLocMethod) + { + SAL_INFO("package", "LOC inconsistent method: \"" << rEntry.sPath << "\""); + bBroken = true; + } + + if (o3tl::checked_add<sal_Int64>(rEntry.nOffset, rEntry.nCompressedSize, nEnd)) + { + throw ZipException(u"Integer-overflow"_ustr); + } + + // read "data descriptor" - this can be 12, 16, 20, or 24 bytes in size + if ((rEntry.nFlag & 0x08) != 0) + { +#if 0 + if (nLocMethod == STORED) // example: fdo68983.odt has this :( + { + SAL_INFO("package", "LOC STORED with data descriptor: \"" << rEntry.sPath << "\""); + bBroken = true; + } + else +#endif + { + decltype(nLocCrc) nDDCrc; + decltype(nLocCompressedSize) nDDCompressedSize; + decltype(nLocSize) nDDSize; + aGrabber.seek(aGrabber.getPosition() + rEntry.nCompressedSize); + sal_uInt32 nTemp = aGrabber.ReadUInt32(); + if (nTemp == 0x08074b50) // APPNOTE says PK78 is optional??? + { + nDDCrc = aGrabber.ReadUInt32(); + } + else + { + nDDCrc = nTemp; + } + if (isZip64) + { + nDDCompressedSize = aGrabber.ReadUInt64(); + nDDSize = aGrabber.ReadUInt64(); + } + else + { + nDDCompressedSize = aGrabber.ReadUInt32(); + nDDSize = aGrabber.ReadUInt32(); + } + if (nEnd < aGrabber.getPosition()) + { + nEnd = aGrabber.getPosition(); + } + else + { + SAL_INFO("package", "LOC invalid size: \"" << rEntry.sPath << "\""); + bBroken = true; + } + // tdf91429.docx has same values in LOC and in (superfluous) DD + if ((nLocCrc == 0 || nLocCrc == nDDCrc) + && (nLocCompressedSize == 0 || nLocCompressedSize == sal_uInt64(-1) || nLocCompressedSize == nDDCompressedSize) + && (nLocSize == 0 || nLocSize == sal_uInt64(-1) || nLocSize == nDDSize)) + + { + nLocCrc = nDDCrc; + nLocCompressedSize = nDDCompressedSize; + nLocSize = nDDSize; + } + else + { + SAL_INFO("package", "LOC non-0 with data descriptor: \"" << rEntry.sPath << "\""); + bBroken = true; + } + } + } + + // unit test file export64.zip has nLocCrc/nLocCS/nLocSize = 0 on mimetype + if (nLocCrc != 0 && static_cast<sal_uInt32>(rEntry.nCrc) != nLocCrc) + { + SAL_INFO("package", "LOC inconsistent CRC: \"" << rEntry.sPath << "\""); + bBroken = true; + } + + if (nLocCompressedSize != 0 && static_cast<sal_uInt64>(rEntry.nCompressedSize) != nLocCompressedSize) + { + SAL_INFO("package", "LOC inconsistent compressed size: \"" << rEntry.sPath << "\""); + bBroken = true; + } + + if (nLocSize != 0 && static_cast<sal_uInt64>(rEntry.nSize) != nLocSize) + { + SAL_INFO("package", "LOC inconsistent size: \"" << rEntry.sPath << "\""); + bBroken = true; + } + + if (oOffset64 && o3tl::make_unsigned(nPos) != *oOffset64) + { + SAL_INFO("package", "LOC inconsistent offset: \"" << rEntry.sPath << "\""); + bBroken = true; + } } catch(...) { @@ -975,6 +1104,8 @@ void ZipFile::readLOC( ZipEntry &rEntry ) if ( bBroken && !bRecoveryMode ) throw ZipIOException("The stream seems to be broken!" ); + + return nEnd; } sal_Int32 ZipFile::findEND() @@ -1062,7 +1193,7 @@ sal_Int32 ZipFile::readCEN() ZipEntry aEntry; sal_Int16 nCommentLen; - sal_Int64 nMinOffset{nEndPos}; + ::std::vector<std::pair<sal_uInt64, sal_uInt64>> unallocated = { { 0, nCenPos } }; aEntries.reserve(nTotal); for (nCount = 0 ; nCount < nTotal; nCount++) @@ -1119,7 +1250,12 @@ sal_Int32 ZipFile::readCEN() if (aEntry.nExtraLen>0) { - readExtraFields(aMemGrabber, aEntry.nExtraLen, nSize, nCompressedSize, &nOffset, &aEntry.sPath); + ::std::optional<sal_uInt64> oOffset64; + readExtraFields(aMemGrabber, aEntry.nExtraLen, nSize, nCompressedSize, oOffset64, &aEntry.sPath); + if (oOffset64) + { + nOffset = *oOffset64; + } } aEntry.nCompressedSize = nCompressedSize; aEntry.nSize = nSize; @@ -1127,12 +1263,64 @@ sal_Int32 ZipFile::readCEN() if (o3tl::checked_add<sal_Int64>(aEntry.nOffset, nLocPos, aEntry.nOffset)) throw ZipException("Integer-overflow"); - nMinOffset = std::min(nMinOffset, aEntry.nOffset); if (o3tl::checked_multiply<sal_Int64>(aEntry.nOffset, -1, aEntry.nOffset)) throw ZipException("Integer-overflow"); aMemGrabber.skipBytes(nCommentLen); + // unfortunately readLOC is required now to check the consistency + assert(aEntry.nOffset <= 0); + sal_uInt64 const nStart{ o3tl::make_unsigned(-aEntry.nOffset) }; + sal_uInt64 const nEnd = readLOC(aEntry); + assert(nStart < nEnd); + for (auto it = unallocated.begin(); ; ++it) + { + if (it == unallocated.end()) + { + throw ZipException(u"overlapping entries"_ustr); + } + if (nStart < it->first) + { + throw ZipException(u"overlapping entries"_ustr); + } + else if (it->first == nStart) + { + if (it->second == nEnd) + { + unallocated.erase(it); + break; + } + else if (nEnd < it->second) + { + it->first = nEnd; + break; + } + else + { + throw ZipException(u"overlapping entries"_ustr); + } + } + else if (nStart < it->second) + { + if (nEnd < it->second) + { + auto const temp{it->first}; + it->first = nEnd; + unallocated.insert(it, { temp, nStart }); + break; + } + else if (nEnd == it->second) + { + it->second = nStart; + break; + } + else + { + throw ZipException(u"overlapping entries"_ustr); + } + } + } + // Is this a FAT-compatible empty entry? if (aEntry.nSize == 0 && (versionMadeBy & 0xff00) == 0) { @@ -1158,9 +1346,9 @@ sal_Int32 ZipFile::readCEN() if (nCount != nTotal) throw ZipException("Count != Total" ); - if (nMinOffset != 0) + if (!unallocated.empty()) { - throw ZipException(u"Extra bytes at beginning of zip file"_ustr); + throw ZipException(u"Zip file has holes! It will leak!"_ustr); } } catch ( IllegalArgumentException & ) @@ -1171,10 +1359,12 @@ sal_Int32 ZipFile::readCEN() return nCenPos; } -void ZipFile::readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 nExtraLen, +bool ZipFile::readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 nExtraLen, sal_uInt64& nSize, sal_uInt64& nCompressedSize, - sal_uInt64* nOffset, OUString const*const pCENFilenameToCheck) + std::optional<sal_uInt64> & roOffset, + OUString const*const pCENFilenameToCheck) { + bool isZip64{false}; while (nExtraLen > 0) // Extensible data fields { sal_Int16 nheaderID = aMemGrabber.ReadInt16(); @@ -1188,15 +1378,16 @@ void ZipFile::readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 nExtraLe { nCompressedSize = aMemGrabber.ReadUInt64(); nReadSize = 16; - if (dataSize >= 24 && nOffset) + if (dataSize >= 24 && roOffset) { - *nOffset = aMemGrabber.ReadUInt64(); + roOffset.emplace(aMemGrabber.ReadUInt64()); nReadSize = 24; // 4 byte should be "Disk Start Number" but we not need it } } if (dataSize > nReadSize) aMemGrabber.skipBytes(dataSize - nReadSize); + isZip64 = true; } // Info-ZIP Unicode Path Extra Field - pointless as we expect UTF-8 in CEN already else if (nheaderID == 0x7075 && pCENFilenameToCheck) // ignore in recovery mode @@ -1233,6 +1424,7 @@ void ZipFile::readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 nExtraLe } nExtraLen -= dataSize + 4; } + return isZip64; } void ZipFile::recover() @@ -1328,8 +1520,9 @@ void ZipFile::recover() MemoryByteGrabber aMemGrabberExtra(aExtraBuffer); if (aEntry.nExtraLen > 0) { + ::std::optional<sal_uInt64> oOffset64; readExtraFields(aMemGrabberExtra, aEntry.nExtraLen, nSize, - nCompressedSize, nullptr, nullptr); + nCompressedSize, oOffset64, nullptr); } } diff --git a/sc/qa/unit/subsequent_filters_test4.cxx b/sc/qa/unit/subsequent_filters_test4.cxx index 47de6248aecc..8105c3981e0a 100644 --- a/sc/qa/unit/subsequent_filters_test4.cxx +++ b/sc/qa/unit/subsequent_filters_test4.cxx @@ -55,6 +55,7 @@ #include <com/sun/star/drawing/XDrawPageSupplier.hpp> #include <com/sun/star/drawing/XControlShape.hpp> +#include <comphelper/propertyvalue.hxx> #include <com/sun/star/sheet/XSpreadsheetDocument.hpp> #include <com/sun/star/container/XIndexAccess.hpp> @@ -1917,7 +1918,14 @@ CPPUNIT_TEST_FIXTURE(ScFiltersTest4, testSortWithFormattingXLS) // just needs to not crash on recalc CPPUNIT_TEST_FIXTURE(ScFiltersTest4, testForcepoint107) { - createScDoc("xlsx/forcepoint107.xlsx"); + // It expectedly fails to load normally + CPPUNIT_ASSERT_ASSERTION_FAIL(createScDoc("xlsx/forcepoint107.xlsx")); + + // importing it must succeed with RepairPackage set to true. + uno::Sequence<beans::PropertyValue> aParams + = { comphelper::makePropertyValue(u"RepairPackage"_ustr, true) }; + loadWithParams(createFileURL(u"xlsx/forcepoint107.xlsx"), aParams); + ScDocShell* pDocSh = getScDocShell(); pDocSh->DoHardRecalc(); } diff --git a/sd/qa/unit/data/pptx/pass/ofz35597-1.pptx b/sd/qa/unit/data/pptx/fail/ofz35597-1.pptx similarity index 100% rename from sd/qa/unit/data/pptx/pass/ofz35597-1.pptx rename to sd/qa/unit/data/pptx/fail/ofz35597-1.pptx diff --git a/sd/qa/unit/data/pptx/pass/ofz46160-1.pptx b/sd/qa/unit/data/pptx/fail/ofz46160-1.pptx similarity index 100% rename from sd/qa/unit/data/pptx/pass/ofz46160-1.pptx rename to sd/qa/unit/data/pptx/fail/ofz46160-1.pptx diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx index e9a8530c3cce..aca25cabc1f6 100644 --- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx +++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx @@ -1888,8 +1888,8 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf113143) CPPUNIT_TEST_FIXTURE(PdfExportTest, testForcePoint71) { // I just care it doesn't crash - aMediaDescriptor["FilterName"] <<= OUString("writer_pdf_Export"); - saveAsPDF(u"forcepoint71.key"); + // numerous Zip errors are detected now and libetonyek cannot RepairPackage + CPPUNIT_ASSERT_ASSERTION_FAIL(loadFromFile(u"forcepoint71.key")); } CPPUNIT_TEST_FIXTURE(PdfExportTest, testForcePoint80)