package/inc/ByteGrabber.hxx             |    1 
 package/inc/ZipFile.hxx                 |    7 
 package/source/zipapi/ByteGrabber.cxx   |   18 ++
 package/source/zipapi/ZipFile.cxx       |  231 +++++++++++++++++++++++++++++---
 sc/qa/unit/subsequent_filters_test4.cxx |   10 +
 vcl/qa/cppunit/pdfexport/pdfexport.cxx  |    4 
 6 files changed, 246 insertions(+), 25 deletions(-)

New commits:
commit cffa22ee046fd62b38692ca25dea3a6827889d9a
Author:     Michael Stahl <michael.st...@allotropia.de>
AuthorDate: Tue Jul 16 12:12:09 2024 +0200
Commit:     Thorsten Behrens <thorsten.behr...@allotropia.de>
CommitDate: Wed Jul 24 08:36:59 2024 +0200

    package: add additional consistency checks for local file header
    
    Check it contains same as central directory header, also check data
    descriptor if available.  Also check there are no gaps or overlaps.
    
    This causes 3 fuzzer generated test documents to fail to load; adapt
    tests.
    
    Change-Id: If5813652f3bd03e90fdf95eb97e1e1523455b2b8
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/170571
    Tested-by: Jenkins
    Reviewed-by: Michael Stahl <michael.st...@allotropia.de>
    (cherry picked from commit efae4fc42d5fe3c0a69757226f38efc10d101194)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/170588
    Reviewed-by: Thorsten Behrens <thorsten.behr...@allotropia.de>

diff --git a/package/inc/ByteGrabber.hxx b/package/inc/ByteGrabber.hxx
index ba1512cf5162..4a93398a7704 100644
--- a/package/inc/ByteGrabber.hxx
+++ b/package/inc/ByteGrabber.hxx
@@ -61,6 +61,7 @@ public:
 
     sal_uInt16 ReadUInt16();
     sal_uInt32 ReadUInt32();
+    sal_uInt64 ReadUInt64();
     sal_Int16 ReadInt16()
     {
         return static_cast<sal_Int16>(ReadUInt16());
diff --git a/package/inc/ZipFile.hxx b/package/inc/ZipFile.hxx
index 7e7de4bd67bc..aa2dcd5f3efc 100644
--- a/package/inc/ZipFile.hxx
+++ b/package/inc/ZipFile.hxx
@@ -29,6 +29,7 @@
 #include "HashMaps.hxx"
 #include "EncryptionData.hxx"
 
+#include <optional>
 #include <unordered_set>
 
 class MemoryByteGrabber;
@@ -89,13 +90,13 @@ private:
 
     void getSizeAndCRC( sal_Int64 nOffset, sal_Int64 nCompressedSize, 
sal_Int64 *nSize, sal_Int32 *nCRC );
 
-    void readLOC( ZipEntry &rEntry );
+    sal_uInt64 readLOC(ZipEntry &rEntry);
     sal_Int32 readCEN();
     sal_Int32 findEND();
     void recover();
-    static void readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 
nExtraLen,
+    static bool readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 
nExtraLen,
                                 sal_uInt64& nSize, sal_uInt64& nCompressedSize,
-                                sal_uInt64* nOffset,
+                                ::std::optional<sal_uInt64> & roOffset,
                                 OUString const* pCENFilenameToCheck);
 
 public:
diff --git a/package/source/zipapi/ByteGrabber.cxx 
b/package/source/zipapi/ByteGrabber.cxx
index 5a491de6509b..0e701a60007f 100644
--- a/package/source/zipapi/ByteGrabber.cxx
+++ b/package/source/zipapi/ByteGrabber.cxx
@@ -119,4 +119,22 @@ sal_uInt32 ByteGrabber::ReadUInt32()
           | ( pSequence[3] & 0xFF ) << 24 );
 }
 
+sal_uInt64 ByteGrabber::ReadUInt64()
+{
+    std::scoped_lock aGuard(m_aMutex);
+
+    if (xStream->readBytes(aSequence, 8) != 8)
+        return 0;
+
+    pSequence = aSequence.getConstArray();
+    return  static_cast<sal_uInt64>(pSequence[0] & 0xFF)
+          | static_cast<sal_uInt64>(pSequence[1] & 0xFF) << 8
+          | static_cast<sal_uInt64>(pSequence[2] & 0xFF) << 16
+          | static_cast<sal_uInt64>(pSequence[3] & 0xFF) << 24
+          | static_cast<sal_uInt64>(pSequence[4] & 0xFF) << 32
+          | static_cast<sal_uInt64>(pSequence[5] & 0xFF) << 40
+          | static_cast<sal_uInt64>(pSequence[6] & 0xFF) << 48
+          | static_cast<sal_uInt64>(pSequence[7] & 0xFF) << 56;
+}
+
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/package/source/zipapi/ZipFile.cxx 
b/package/source/zipapi/ZipFile.cxx
index 6773fcf20e1f..3baff4d5250d 100644
--- a/package/source/zipapi/ZipFile.cxx
+++ b/package/source/zipapi/ZipFile.cxx
@@ -910,7 +910,7 @@ uno::Reference< XInputStream > ZipFile::getWrappedRawStream(
     return createStreamForZipEntry ( aMutexHolder, rEntry, rData, 
UNBUFF_STREAM_WRAPPEDRAW, true, true, aMediaType );
 }
 
-void ZipFile::readLOC( ZipEntry &rEntry )
+sal_uInt64 ZipFile::readLOC(ZipEntry &rEntry)
 {
     ::osl::MutexGuard aGuard( m_aMutexHolder->GetMutex() );
 
@@ -926,13 +926,16 @@ void ZipFile::readLOC( ZipEntry &rEntry )
     // Just verify the path and calculate the data offset and otherwise
     // rely on the central directory info.
 
-    aGrabber.ReadInt16(); //version
-    aGrabber.ReadInt16(); //flag
-    aGrabber.ReadInt16(); //how
+    aGrabber.ReadInt16(); // version - ignore any mismatch (Maven created JARs)
+    sal_uInt16 const nLocFlag = aGrabber.ReadUInt16(); // general purpose bit 
flag
+    sal_uInt16 const nLocMethod = aGrabber.ReadUInt16(); // compression method
+    // Do *not* compare timestamps, since MSO 2010 can produce documents
+    // with timestamp difference in the central directory entry and local
+    // file header.
     aGrabber.ReadInt32(); //time
-    aGrabber.ReadInt32(); //crc
-    aGrabber.ReadInt32(); //compressed size
-    aGrabber.ReadInt32(); //size
+    sal_uInt32 nLocCrc = aGrabber.ReadUInt32(); //crc
+    sal_uInt64 nLocCompressedSize = aGrabber.ReadUInt32(); //compressed size
+    sal_uInt64 nLocSize = aGrabber.ReadUInt32(); //size
     sal_Int16 nPathLen = aGrabber.ReadInt16();
     sal_Int16 nExtraLen = aGrabber.ReadInt16();
 
@@ -944,6 +947,7 @@ void ZipFile::readLOC( ZipEntry &rEntry )
 
     rEntry.nOffset = aGrabber.getPosition() + nPathLen + nExtraLen;
 
+    sal_Int64 nEnd = {}; // avoid -Werror=maybe-uninitialized
     bool bBroken = false;
 
     try
@@ -965,8 +969,133 @@ void ZipFile::readLOC( ZipEntry &rEntry )
             rEntry.sPath = sLOCPath;
         }
 
-        bBroken = rEntry.nPathLen != nPathLen
-                        || rEntry.sPath != sLOCPath;
+        if (rEntry.nPathLen != nPathLen || rEntry.sPath != sLOCPath)
+        {
+            SAL_INFO("package", "LOC inconsistent name: \"" << rEntry.sPath << 
"\"");
+            bBroken = true;
+        }
+
+        bool isZip64{false};
+        ::std::optional<sal_uInt64> oOffset64;
+        if (nExtraLen != 0)
+        {
+            Sequence<sal_Int8> aExtraBuffer;
+            aGrabber.readBytes(aExtraBuffer, nExtraLen);
+            MemoryByteGrabber extraMemGrabber(aExtraBuffer);
+
+            isZip64 = readExtraFields(extraMemGrabber, nExtraLen,
+                    nLocSize, nLocCompressedSize, oOffset64, &sLOCPath);
+        }
+
+        // Just plain ignore bits 1 & 2 of the flag field - they are either
+        // purely informative, or even fully undefined (depending on method).
+        // Also ignore bit 11 ("Language encoding flag"): tdf125300.docx is
+        // example with mismatch - and the actual file names are compared in
+        // any case and required to be UTF-8.
+        if ((rEntry.nFlag & ~0x806U) != (nLocFlag & ~0x806U))
+        {
+            SAL_INFO("package", "LOC inconsistent flag: \"" << rEntry.sPath << 
"\"");
+            bBroken = true;
+        }
+
+        // TODO: "older versions with encrypted streams write mismatching 
DEFLATE/STORE" ???
+        if (rEntry.nMethod != nLocMethod)
+        {
+            SAL_INFO("package", "LOC inconsistent method: \"" << rEntry.sPath 
<< "\"");
+            bBroken = true;
+        }
+
+        if (o3tl::checked_add<sal_Int64>(rEntry.nOffset, 
rEntry.nCompressedSize, nEnd))
+        {
+            throw ZipException(u"Integer-overflow"_ustr);
+        }
+
+        // read "data descriptor" - this can be 12, 16, 20, or 24 bytes in size
+        if ((rEntry.nFlag & 0x08) != 0)
+        {
+#if 0
+            if (nLocMethod == STORED) // example: fdo68983.odt has this :(
+            {
+                SAL_INFO("package", "LOC STORED with data descriptor: \"" << 
rEntry.sPath << "\"");
+                bBroken = true;
+            }
+            else
+#endif
+            {
+                decltype(nLocCrc) nDDCrc;
+                decltype(nLocCompressedSize) nDDCompressedSize;
+                decltype(nLocSize) nDDSize;
+                aGrabber.seek(aGrabber.getPosition() + rEntry.nCompressedSize);
+                sal_uInt32 nTemp = aGrabber.ReadUInt32();
+                if (nTemp == 0x08074b50) // APPNOTE says PK78 is optional???
+                {
+                    nDDCrc = aGrabber.ReadUInt32();
+                }
+                else
+                {
+                    nDDCrc = nTemp;
+                }
+                if (isZip64)
+                {
+                    nDDCompressedSize = aGrabber.ReadUInt64();
+                    nDDSize = aGrabber.ReadUInt64();
+                }
+                else
+                {
+                    nDDCompressedSize = aGrabber.ReadUInt32();
+                    nDDSize = aGrabber.ReadUInt32();
+                }
+                if (nEnd < aGrabber.getPosition())
+                {
+                    nEnd = aGrabber.getPosition();
+                }
+                else
+                {
+                    SAL_INFO("package", "LOC invalid size: \"" << rEntry.sPath 
<< "\"");
+                    bBroken = true;
+                }
+                // tdf91429.docx has same values in LOC and in (superfluous) DD
+                if ((nLocCrc == 0 || nLocCrc == nDDCrc)
+                    && (nLocCompressedSize == 0 || nLocCompressedSize == 
sal_uInt64(-1) || nLocCompressedSize == nDDCompressedSize)
+                    && (nLocSize == 0 || nLocSize == sal_uInt64(-1) || 
nLocSize == nDDSize))
+
+                {
+                    nLocCrc = nDDCrc;
+                    nLocCompressedSize = nDDCompressedSize;
+                    nLocSize = nDDSize;
+                }
+                else
+                {
+                    SAL_INFO("package", "LOC non-0 with data descriptor: \"" 
<< rEntry.sPath << "\"");
+                    bBroken = true;
+                }
+            }
+        }
+
+        // unit test file export64.zip has nLocCrc/nLocCS/nLocSize = 0 on 
mimetype
+        if (nLocCrc != 0 && static_cast<sal_uInt32>(rEntry.nCrc) != nLocCrc)
+        {
+            SAL_INFO("package", "LOC inconsistent CRC: \"" << rEntry.sPath << 
"\"");
+            bBroken = true;
+        }
+
+        if (nLocCompressedSize != 0 && 
static_cast<sal_uInt64>(rEntry.nCompressedSize) != nLocCompressedSize)
+        {
+            SAL_INFO("package", "LOC inconsistent compressed size: \"" << 
rEntry.sPath << "\"");
+            bBroken = true;
+        }
+
+        if (nLocSize != 0 && static_cast<sal_uInt64>(rEntry.nSize) != nLocSize)
+        {
+            SAL_INFO("package", "LOC inconsistent size: \"" << rEntry.sPath << 
"\"");
+            bBroken = true;
+        }
+
+        if (oOffset64 && o3tl::make_unsigned(nPos) != *oOffset64)
+        {
+            SAL_INFO("package", "LOC inconsistent offset: \"" << rEntry.sPath 
<< "\"");
+            bBroken = true;
+        }
     }
     catch(...)
     {
@@ -975,6 +1104,8 @@ void ZipFile::readLOC( ZipEntry &rEntry )
 
     if ( bBroken && !bRecoveryMode )
         throw ZipIOException("The stream seems to be broken!" );
+
+    return nEnd;
 }
 
 sal_Int32 ZipFile::findEND()
@@ -1062,7 +1193,7 @@ sal_Int32 ZipFile::readCEN()
 
         ZipEntry aEntry;
         sal_Int16 nCommentLen;
-        sal_Int64 nMinOffset{nEndPos};
+        ::std::vector<std::pair<sal_uInt64, sal_uInt64>> unallocated = { { 0, 
nCenPos } };
 
         aEntries.reserve(nTotal);
         for (nCount = 0 ; nCount < nTotal; nCount++)
@@ -1119,7 +1250,12 @@ sal_Int32 ZipFile::readCEN()
 
             if (aEntry.nExtraLen>0)
             {
-                readExtraFields(aMemGrabber, aEntry.nExtraLen, nSize, 
nCompressedSize, &nOffset, &aEntry.sPath);
+                ::std::optional<sal_uInt64> oOffset64;
+                readExtraFields(aMemGrabber, aEntry.nExtraLen, nSize, 
nCompressedSize, oOffset64, &aEntry.sPath);
+                if (oOffset64)
+                {
+                    nOffset = *oOffset64;
+                }
             }
             aEntry.nCompressedSize = nCompressedSize;
             aEntry.nSize = nSize;
@@ -1127,12 +1263,64 @@ sal_Int32 ZipFile::readCEN()
 
             if (o3tl::checked_add<sal_Int64>(aEntry.nOffset, nLocPos, 
aEntry.nOffset))
                 throw ZipException("Integer-overflow");
-            nMinOffset = std::min(nMinOffset, aEntry.nOffset);
             if (o3tl::checked_multiply<sal_Int64>(aEntry.nOffset, -1, 
aEntry.nOffset))
                 throw ZipException("Integer-overflow");
 
             aMemGrabber.skipBytes(nCommentLen);
 
+            // unfortunately readLOC is required now to check the consistency
+            assert(aEntry.nOffset <= 0);
+            sal_uInt64 const nStart{ o3tl::make_unsigned(-aEntry.nOffset) };
+            sal_uInt64 const nEnd = readLOC(aEntry);
+            assert(nStart < nEnd);
+            for (auto it = unallocated.begin(); ; ++it)
+            {
+                if (it == unallocated.end())
+                {
+                    throw ZipException(u"overlapping entries"_ustr);
+                }
+                if (nStart < it->first)
+                {
+                    throw ZipException(u"overlapping entries"_ustr);
+                }
+                else if (it->first == nStart)
+                {
+                    if (it->second == nEnd)
+                    {
+                        unallocated.erase(it);
+                        break;
+                    }
+                    else if (nEnd < it->second)
+                    {
+                        it->first = nEnd;
+                        break;
+                    }
+                    else
+                    {
+                        throw ZipException(u"overlapping entries"_ustr);
+                    }
+                }
+                else if (nStart < it->second)
+                {
+                    if (nEnd < it->second)
+                    {
+                        auto const temp{it->first};
+                        it->first = nEnd;
+                        unallocated.insert(it, { temp, nStart });
+                        break;
+                    }
+                    else if (nEnd == it->second)
+                    {
+                        it->second = nStart;
+                        break;
+                    }
+                    else
+                    {
+                        throw ZipException(u"overlapping entries"_ustr);
+                    }
+                }
+            }
+
             // Is this a FAT-compatible empty entry?
             if (aEntry.nSize == 0 && (versionMadeBy & 0xff00) == 0)
             {
@@ -1158,9 +1346,9 @@ sal_Int32 ZipFile::readCEN()
 
         if (nCount != nTotal)
             throw ZipException("Count != Total" );
-        if (nMinOffset != 0)
+        if (!unallocated.empty())
         {
-            throw ZipException(u"Extra bytes at beginning of zip file"_ustr);
+            throw ZipException(u"Zip file has holes! It will leak!"_ustr);
         }
     }
     catch ( IllegalArgumentException & )
@@ -1171,10 +1359,12 @@ sal_Int32 ZipFile::readCEN()
     return nCenPos;
 }
 
-void ZipFile::readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 
nExtraLen,
+bool ZipFile::readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 
nExtraLen,
         sal_uInt64& nSize, sal_uInt64& nCompressedSize,
-        sal_uInt64* nOffset, OUString const*const pCENFilenameToCheck)
+        std::optional<sal_uInt64> & roOffset,
+        OUString const*const pCENFilenameToCheck)
 {
+    bool isZip64{false};
     while (nExtraLen > 0) // Extensible data fields
     {
         sal_Int16 nheaderID = aMemGrabber.ReadInt16();
@@ -1188,15 +1378,16 @@ void ZipFile::readExtraFields(MemoryByteGrabber& 
aMemGrabber, sal_Int16 nExtraLe
             {
                 nCompressedSize = aMemGrabber.ReadUInt64();
                 nReadSize = 16;
-                if (dataSize >= 24 && nOffset)
+                if (dataSize >= 24 && roOffset)
                 {
-                    *nOffset = aMemGrabber.ReadUInt64();
+                    roOffset.emplace(aMemGrabber.ReadUInt64());
                     nReadSize = 24;
                     // 4 byte should be "Disk Start Number" but we not need it
                 }
             }
             if (dataSize > nReadSize)
                 aMemGrabber.skipBytes(dataSize - nReadSize);
+            isZip64 = true;
         }
         // Info-ZIP Unicode Path Extra Field - pointless as we expect UTF-8 in 
CEN already
         else if (nheaderID == 0x7075 && pCENFilenameToCheck) // ignore in 
recovery mode
@@ -1233,6 +1424,7 @@ void ZipFile::readExtraFields(MemoryByteGrabber& 
aMemGrabber, sal_Int16 nExtraLe
         }
         nExtraLen -= dataSize + 4;
     }
+    return isZip64;
 }
 
 void ZipFile::recover()
@@ -1328,8 +1520,9 @@ void ZipFile::recover()
                                     MemoryByteGrabber 
aMemGrabberExtra(aExtraBuffer);
                                     if (aEntry.nExtraLen > 0)
                                     {
+                                        ::std::optional<sal_uInt64> oOffset64;
                                         readExtraFields(aMemGrabberExtra, 
aEntry.nExtraLen, nSize,
-                                                        nCompressedSize, 
nullptr, nullptr);
+                                            nCompressedSize, oOffset64, 
nullptr);
                                     }
                                 }
 
diff --git a/sc/qa/unit/subsequent_filters_test4.cxx 
b/sc/qa/unit/subsequent_filters_test4.cxx
index 47de6248aecc..8105c3981e0a 100644
--- a/sc/qa/unit/subsequent_filters_test4.cxx
+++ b/sc/qa/unit/subsequent_filters_test4.cxx
@@ -55,6 +55,7 @@
 
 #include <com/sun/star/drawing/XDrawPageSupplier.hpp>
 #include <com/sun/star/drawing/XControlShape.hpp>
+#include <comphelper/propertyvalue.hxx>
 
 #include <com/sun/star/sheet/XSpreadsheetDocument.hpp>
 #include <com/sun/star/container/XIndexAccess.hpp>
@@ -1917,7 +1918,14 @@ CPPUNIT_TEST_FIXTURE(ScFiltersTest4, 
testSortWithFormattingXLS)
 // just needs to not crash on recalc
 CPPUNIT_TEST_FIXTURE(ScFiltersTest4, testForcepoint107)
 {
-    createScDoc("xlsx/forcepoint107.xlsx");
+    // It expectedly fails to load normally
+    CPPUNIT_ASSERT_ASSERTION_FAIL(createScDoc("xlsx/forcepoint107.xlsx"));
+
+    // importing it must succeed with RepairPackage set to true.
+    uno::Sequence<beans::PropertyValue> aParams
+        = { comphelper::makePropertyValue(u"RepairPackage"_ustr, true) };
+    loadWithParams(createFileURL(u"xlsx/forcepoint107.xlsx"), aParams);
+
     ScDocShell* pDocSh = getScDocShell();
     pDocSh->DoHardRecalc();
 }
diff --git a/sd/qa/unit/data/pptx/pass/ofz35597-1.pptx 
b/sd/qa/unit/data/pptx/fail/ofz35597-1.pptx
similarity index 100%
rename from sd/qa/unit/data/pptx/pass/ofz35597-1.pptx
rename to sd/qa/unit/data/pptx/fail/ofz35597-1.pptx
diff --git a/sd/qa/unit/data/pptx/pass/ofz46160-1.pptx 
b/sd/qa/unit/data/pptx/fail/ofz46160-1.pptx
similarity index 100%
rename from sd/qa/unit/data/pptx/pass/ofz46160-1.pptx
rename to sd/qa/unit/data/pptx/fail/ofz46160-1.pptx
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx 
b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
index e9a8530c3cce..aca25cabc1f6 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
@@ -1888,8 +1888,8 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf113143)
 CPPUNIT_TEST_FIXTURE(PdfExportTest, testForcePoint71)
 {
     // I just care it doesn't crash
-    aMediaDescriptor["FilterName"] <<= OUString("writer_pdf_Export");
-    saveAsPDF(u"forcepoint71.key");
+    // numerous Zip errors are detected now and libetonyek cannot RepairPackage
+    CPPUNIT_ASSERT_ASSERTION_FAIL(loadFromFile(u"forcepoint71.key"));
 }
 
 CPPUNIT_TEST_FIXTURE(PdfExportTest, testForcePoint80)

Reply via email to