package/inc/ByteGrabber.hxx             |    1 
 package/inc/ZipFile.hxx                 |    7 
 package/source/zipapi/ByteGrabber.cxx   |   18 ++
 package/source/zipapi/ZipFile.cxx       |  231 +++++++++++++++++++++++++++++---
 sc/qa/unit/subsequent_filters_test4.cxx |    9 +
 vcl/qa/cppunit/pdfexport/pdfexport.cxx  |    4 
 6 files changed, 245 insertions(+), 25 deletions(-)

New commits:
commit efae4fc42d5fe3c0a69757226f38efc10d101194
Author:     Michael Stahl <michael.st...@allotropia.de>
AuthorDate: Tue Jul 16 12:12:09 2024 +0200
Commit:     Michael Stahl <michael.st...@allotropia.de>
CommitDate: Tue Jul 16 15:57:43 2024 +0200

    package: add additional consistency checks for local file header
    
    Check it contains same as central directory header, also check data
    descriptor if available.  Also check there are no gaps or overlaps.
    
    This causes 3 fuzzer generated test documents to fail to load; adapt
    tests.
    
    Change-Id: If5813652f3bd03e90fdf95eb97e1e1523455b2b8
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/170571
    Tested-by: Jenkins
    Reviewed-by: Michael Stahl <michael.st...@allotropia.de>

diff --git a/package/inc/ByteGrabber.hxx b/package/inc/ByteGrabber.hxx
index ba1512cf5162..4a93398a7704 100644
--- a/package/inc/ByteGrabber.hxx
+++ b/package/inc/ByteGrabber.hxx
@@ -61,6 +61,7 @@ public:
 
     sal_uInt16 ReadUInt16();
     sal_uInt32 ReadUInt32();
+    sal_uInt64 ReadUInt64();
     sal_Int16 ReadInt16()
     {
         return static_cast<sal_Int16>(ReadUInt16());
diff --git a/package/inc/ZipFile.hxx b/package/inc/ZipFile.hxx
index 8828d929273f..ed9847a30013 100644
--- a/package/inc/ZipFile.hxx
+++ b/package/inc/ZipFile.hxx
@@ -29,6 +29,7 @@
 #include "HashMaps.hxx"
 #include "EncryptionData.hxx"
 
+#include <optional>
 #include <span>
 #include <unordered_set>
 
@@ -86,15 +87,15 @@ class ZipFile
 
     void getSizeAndCRC( sal_Int64 nOffset, sal_Int64 nCompressedSize, 
sal_Int64 *nSize, sal_Int32 *nCRC );
 
-    void readLOC( ZipEntry &rEntry );
+    sal_uInt64 readLOC(ZipEntry &rEntry);
     sal_Int32 readCEN();
     sal_Int32 findEND();
     void HandlePK34(std::span<const sal_Int8> data, sal_Int64 dataOffset, 
sal_Int64 totalSize);
     void HandlePK78(std::span<const sal_Int8> data, sal_Int64 dataOffset);
     void recover();
-    static void readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 
nExtraLen,
+    static bool readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 
nExtraLen,
                                 sal_uInt64& nSize, sal_uInt64& nCompressedSize,
-                                sal_uInt64* nOffset,
+                                ::std::optional<sal_uInt64> & roOffset,
                                 OUString const* pCENFilenameToCheck);
 
 public:
diff --git a/package/source/zipapi/ByteGrabber.cxx 
b/package/source/zipapi/ByteGrabber.cxx
index 5a491de6509b..0e701a60007f 100644
--- a/package/source/zipapi/ByteGrabber.cxx
+++ b/package/source/zipapi/ByteGrabber.cxx
@@ -119,4 +119,22 @@ sal_uInt32 ByteGrabber::ReadUInt32()
           | ( pSequence[3] & 0xFF ) << 24 );
 }
 
+sal_uInt64 ByteGrabber::ReadUInt64()
+{
+    std::scoped_lock aGuard(m_aMutex);
+
+    if (xStream->readBytes(aSequence, 8) != 8)
+        return 0;
+
+    pSequence = aSequence.getConstArray();
+    return  static_cast<sal_uInt64>(pSequence[0] & 0xFF)
+          | static_cast<sal_uInt64>(pSequence[1] & 0xFF) << 8
+          | static_cast<sal_uInt64>(pSequence[2] & 0xFF) << 16
+          | static_cast<sal_uInt64>(pSequence[3] & 0xFF) << 24
+          | static_cast<sal_uInt64>(pSequence[4] & 0xFF) << 32
+          | static_cast<sal_uInt64>(pSequence[5] & 0xFF) << 40
+          | static_cast<sal_uInt64>(pSequence[6] & 0xFF) << 48
+          | static_cast<sal_uInt64>(pSequence[7] & 0xFF) << 56;
+}
+
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/package/source/zipapi/ZipFile.cxx 
b/package/source/zipapi/ZipFile.cxx
index f7713b8665c1..5fa21941b8b6 100644
--- a/package/source/zipapi/ZipFile.cxx
+++ b/package/source/zipapi/ZipFile.cxx
@@ -927,7 +927,7 @@ uno::Reference< XInputStream > ZipFile::getWrappedRawStream(
     return createStreamForZipEntry ( aMutexHolder, rEntry, rData, 
UNBUFF_STREAM_WRAPPEDRAW, true, true, aMediaType );
 }
 
-void ZipFile::readLOC( ZipEntry &rEntry )
+sal_uInt64 ZipFile::readLOC(ZipEntry &rEntry)
 {
     ::osl::MutexGuard aGuard( m_aMutexHolder->GetMutex() );
 
@@ -943,13 +943,16 @@ void ZipFile::readLOC( ZipEntry &rEntry )
     // Just verify the path and calculate the data offset and otherwise
     // rely on the central directory info.
 
-    aGrabber.ReadInt16(); //version
-    aGrabber.ReadInt16(); //flag
-    aGrabber.ReadInt16(); //how
+    aGrabber.ReadInt16(); // version - ignore any mismatch (Maven created JARs)
+    sal_uInt16 const nLocFlag = aGrabber.ReadUInt16(); // general purpose bit 
flag
+    sal_uInt16 const nLocMethod = aGrabber.ReadUInt16(); // compression method
+    // Do *not* compare timestamps, since MSO 2010 can produce documents
+    // with timestamp difference in the central directory entry and local
+    // file header.
     aGrabber.ReadInt32(); //time
-    aGrabber.ReadInt32(); //crc
-    aGrabber.ReadInt32(); //compressed size
-    aGrabber.ReadInt32(); //size
+    sal_uInt32 nLocCrc = aGrabber.ReadUInt32(); //crc
+    sal_uInt64 nLocCompressedSize = aGrabber.ReadUInt32(); //compressed size
+    sal_uInt64 nLocSize = aGrabber.ReadUInt32(); //size
     sal_Int16 nPathLen = aGrabber.ReadInt16();
     sal_Int16 nExtraLen = aGrabber.ReadInt16();
 
@@ -961,6 +964,7 @@ void ZipFile::readLOC( ZipEntry &rEntry )
 
     rEntry.nOffset = aGrabber.getPosition() + nPathLen + nExtraLen;
 
+    sal_Int64 nEnd = {}; // avoid -Werror=maybe-uninitialized
     bool bBroken = false;
 
     try
@@ -982,8 +986,133 @@ void ZipFile::readLOC( ZipEntry &rEntry )
             rEntry.sPath = sLOCPath;
         }
 
-        bBroken = rEntry.nPathLen != nPathLen
-                        || rEntry.sPath != sLOCPath;
+        if (rEntry.nPathLen != nPathLen || rEntry.sPath != sLOCPath)
+        {
+            SAL_INFO("package", "LOC inconsistent name: \"" << rEntry.sPath << 
"\"");
+            bBroken = true;
+        }
+
+        bool isZip64{false};
+        ::std::optional<sal_uInt64> oOffset64;
+        if (nExtraLen != 0)
+        {
+            Sequence<sal_Int8> aExtraBuffer;
+            aGrabber.readBytes(aExtraBuffer, nExtraLen);
+            MemoryByteGrabber extraMemGrabber(aExtraBuffer);
+
+            isZip64 = readExtraFields(extraMemGrabber, nExtraLen,
+                    nLocSize, nLocCompressedSize, oOffset64, &sLOCPath);
+        }
+
+        // Just plain ignore bits 1 & 2 of the flag field - they are either
+        // purely informative, or even fully undefined (depending on method).
+        // Also ignore bit 11 ("Language encoding flag"): tdf125300.docx is
+        // example with mismatch - and the actual file names are compared in
+        // any case and required to be UTF-8.
+        if ((rEntry.nFlag & ~0x806U) != (nLocFlag & ~0x806U))
+        {
+            SAL_INFO("package", "LOC inconsistent flag: \"" << rEntry.sPath << 
"\"");
+            bBroken = true;
+        }
+
+        // TODO: "older versions with encrypted streams write mismatching 
DEFLATE/STORE" ???
+        if (rEntry.nMethod != nLocMethod)
+        {
+            SAL_INFO("package", "LOC inconsistent method: \"" << rEntry.sPath 
<< "\"");
+            bBroken = true;
+        }
+
+        if (o3tl::checked_add<sal_Int64>(rEntry.nOffset, 
rEntry.nCompressedSize, nEnd))
+        {
+            throw ZipException(u"Integer-overflow"_ustr);
+        }
+
+        // read "data descriptor" - this can be 12, 16, 20, or 24 bytes in size
+        if ((rEntry.nFlag & 0x08) != 0)
+        {
+#if 0
+            if (nLocMethod == STORED) // example: fdo68983.odt has this :(
+            {
+                SAL_INFO("package", "LOC STORED with data descriptor: \"" << 
rEntry.sPath << "\"");
+                bBroken = true;
+            }
+            else
+#endif
+            {
+                decltype(nLocCrc) nDDCrc;
+                decltype(nLocCompressedSize) nDDCompressedSize;
+                decltype(nLocSize) nDDSize;
+                aGrabber.seek(aGrabber.getPosition() + rEntry.nCompressedSize);
+                sal_uInt32 nTemp = aGrabber.ReadUInt32();
+                if (nTemp == 0x08074b50) // APPNOTE says PK78 is optional???
+                {
+                    nDDCrc = aGrabber.ReadUInt32();
+                }
+                else
+                {
+                    nDDCrc = nTemp;
+                }
+                if (isZip64)
+                {
+                    nDDCompressedSize = aGrabber.ReadUInt64();
+                    nDDSize = aGrabber.ReadUInt64();
+                }
+                else
+                {
+                    nDDCompressedSize = aGrabber.ReadUInt32();
+                    nDDSize = aGrabber.ReadUInt32();
+                }
+                if (nEnd < aGrabber.getPosition())
+                {
+                    nEnd = aGrabber.getPosition();
+                }
+                else
+                {
+                    SAL_INFO("package", "LOC invalid size: \"" << rEntry.sPath 
<< "\"");
+                    bBroken = true;
+                }
+                // tdf91429.docx has same values in LOC and in (superfluous) DD
+                if ((nLocCrc == 0 || nLocCrc == nDDCrc)
+                    && (nLocCompressedSize == 0 || nLocCompressedSize == 
sal_uInt64(-1) || nLocCompressedSize == nDDCompressedSize)
+                    && (nLocSize == 0 || nLocSize == sal_uInt64(-1) || 
nLocSize == nDDSize))
+
+                {
+                    nLocCrc = nDDCrc;
+                    nLocCompressedSize = nDDCompressedSize;
+                    nLocSize = nDDSize;
+                }
+                else
+                {
+                    SAL_INFO("package", "LOC non-0 with data descriptor: \"" 
<< rEntry.sPath << "\"");
+                    bBroken = true;
+                }
+            }
+        }
+
+        // unit test file export64.zip has nLocCrc/nLocCS/nLocSize = 0 on 
mimetype
+        if (nLocCrc != 0 && static_cast<sal_uInt32>(rEntry.nCrc) != nLocCrc)
+        {
+            SAL_INFO("package", "LOC inconsistent CRC: \"" << rEntry.sPath << 
"\"");
+            bBroken = true;
+        }
+
+        if (nLocCompressedSize != 0 && 
static_cast<sal_uInt64>(rEntry.nCompressedSize) != nLocCompressedSize)
+        {
+            SAL_INFO("package", "LOC inconsistent compressed size: \"" << 
rEntry.sPath << "\"");
+            bBroken = true;
+        }
+
+        if (nLocSize != 0 && static_cast<sal_uInt64>(rEntry.nSize) != nLocSize)
+        {
+            SAL_INFO("package", "LOC inconsistent size: \"" << rEntry.sPath << 
"\"");
+            bBroken = true;
+        }
+
+        if (oOffset64 && o3tl::make_unsigned(nPos) != *oOffset64)
+        {
+            SAL_INFO("package", "LOC inconsistent offset: \"" << rEntry.sPath 
<< "\"");
+            bBroken = true;
+        }
     }
     catch(...)
     {
@@ -992,6 +1121,8 @@ void ZipFile::readLOC( ZipEntry &rEntry )
 
     if ( bBroken && !bRecoveryMode )
         throw ZipIOException(u"The stream seems to be broken!"_ustr );
+
+    return nEnd;
 }
 
 sal_Int32 ZipFile::findEND()
@@ -1079,7 +1210,7 @@ sal_Int32 ZipFile::readCEN()
 
         ZipEntry aEntry;
         sal_Int16 nCommentLen;
-        sal_Int64 nMinOffset{nEndPos};
+        ::std::vector<std::pair<sal_uInt64, sal_uInt64>> unallocated = { { 0, 
nCenPos } };
 
         aEntries.reserve(nTotal);
         for (nCount = 0 ; nCount < nTotal; nCount++)
@@ -1136,7 +1267,12 @@ sal_Int32 ZipFile::readCEN()
 
             if (aEntry.nExtraLen>0)
             {
-                readExtraFields(aMemGrabber, aEntry.nExtraLen, nSize, 
nCompressedSize, &nOffset, &aEntry.sPath);
+                ::std::optional<sal_uInt64> oOffset64;
+                readExtraFields(aMemGrabber, aEntry.nExtraLen, nSize, 
nCompressedSize, oOffset64, &aEntry.sPath);
+                if (oOffset64)
+                {
+                    nOffset = *oOffset64;
+                }
             }
             aEntry.nCompressedSize = nCompressedSize;
             aEntry.nSize = nSize;
@@ -1144,12 +1280,64 @@ sal_Int32 ZipFile::readCEN()
 
             if (o3tl::checked_add<sal_Int64>(aEntry.nOffset, nLocPos, 
aEntry.nOffset))
                 throw ZipException(u"Integer-overflow"_ustr);
-            nMinOffset = std::min(nMinOffset, aEntry.nOffset);
             if (o3tl::checked_multiply<sal_Int64>(aEntry.nOffset, -1, 
aEntry.nOffset))
                 throw ZipException(u"Integer-overflow"_ustr);
 
             aMemGrabber.skipBytes(nCommentLen);
 
+            // unfortunately readLOC is required now to check the consistency
+            assert(aEntry.nOffset <= 0);
+            sal_uInt64 const nStart{ o3tl::make_unsigned(-aEntry.nOffset) };
+            sal_uInt64 const nEnd = readLOC(aEntry);
+            assert(nStart < nEnd);
+            for (auto it = unallocated.begin(); ; ++it)
+            {
+                if (it == unallocated.end())
+                {
+                    throw ZipException(u"overlapping entries"_ustr);
+                }
+                if (nStart < it->first)
+                {
+                    throw ZipException(u"overlapping entries"_ustr);
+                }
+                else if (it->first == nStart)
+                {
+                    if (it->second == nEnd)
+                    {
+                        unallocated.erase(it);
+                        break;
+                    }
+                    else if (nEnd < it->second)
+                    {
+                        it->first = nEnd;
+                        break;
+                    }
+                    else
+                    {
+                        throw ZipException(u"overlapping entries"_ustr);
+                    }
+                }
+                else if (nStart < it->second)
+                {
+                    if (nEnd < it->second)
+                    {
+                        auto const temp{it->first};
+                        it->first = nEnd;
+                        unallocated.insert(it, { temp, nStart });
+                        break;
+                    }
+                    else if (nEnd == it->second)
+                    {
+                        it->second = nStart;
+                        break;
+                    }
+                    else
+                    {
+                        throw ZipException(u"overlapping entries"_ustr);
+                    }
+                }
+            }
+
             // Is this a FAT-compatible empty entry?
             if (aEntry.nSize == 0 && (versionMadeBy & 0xff00) == 0)
             {
@@ -1175,9 +1363,9 @@ sal_Int32 ZipFile::readCEN()
 
         if (nCount != nTotal)
             throw ZipException(u"Count != Total"_ustr );
-        if (nMinOffset != 0)
+        if (!unallocated.empty())
         {
-            throw ZipException(u"Extra bytes at beginning of zip file"_ustr);
+            throw ZipException(u"Zip file has holes! It will leak!"_ustr);
         }
     }
     catch ( IllegalArgumentException & )
@@ -1188,10 +1376,12 @@ sal_Int32 ZipFile::readCEN()
     return nCenPos;
 }
 
-void ZipFile::readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 
nExtraLen,
+bool ZipFile::readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 
nExtraLen,
         sal_uInt64& nSize, sal_uInt64& nCompressedSize,
-        sal_uInt64* nOffset, OUString const*const pCENFilenameToCheck)
+        std::optional<sal_uInt64> & roOffset,
+        OUString const*const pCENFilenameToCheck)
 {
+    bool isZip64{false};
     while (nExtraLen > 0) // Extensible data fields
     {
         sal_Int16 nheaderID = aMemGrabber.ReadInt16();
@@ -1205,15 +1395,16 @@ void ZipFile::readExtraFields(MemoryByteGrabber& 
aMemGrabber, sal_Int16 nExtraLe
             {
                 nCompressedSize = aMemGrabber.ReadUInt64();
                 nReadSize = 16;
-                if (dataSize >= 24 && nOffset)
+                if (dataSize >= 24 && roOffset)
                 {
-                    *nOffset = aMemGrabber.ReadUInt64();
+                    roOffset.emplace(aMemGrabber.ReadUInt64());
                     nReadSize = 24;
                     // 4 byte should be "Disk Start Number" but we not need it
                 }
             }
             if (dataSize > nReadSize)
                 aMemGrabber.skipBytes(dataSize - nReadSize);
+            isZip64 = true;
         }
         // Info-ZIP Unicode Path Extra Field - pointless as we expect UTF-8 in 
CEN already
         else if (nheaderID == 0x7075 && pCENFilenameToCheck) // ignore in 
recovery mode
@@ -1250,6 +1441,7 @@ void ZipFile::readExtraFields(MemoryByteGrabber& 
aMemGrabber, sal_Int16 nExtraLe
         }
         nExtraLen -= dataSize + 4;
     }
+    return isZip64;
 }
 
 // PK34: Local file header
@@ -1311,7 +1503,8 @@ void ZipFile::HandlePK34(std::span<const sal_Int8> data, 
sal_Int64 dataOffset, s
         MemoryByteGrabber aMemGrabberExtra(aExtraBuffer);
         if (aEntry.nExtraLen > 0)
         {
-            readExtraFields(aMemGrabberExtra, aEntry.nExtraLen, nSize, 
nCompressedSize, nullptr, nullptr);
+            ::std::optional<sal_uInt64> oOffset64;
+            readExtraFields(aMemGrabberExtra, aEntry.nExtraLen, nSize, 
nCompressedSize, oOffset64, nullptr);
         }
     }
 
diff --git a/sc/qa/unit/subsequent_filters_test4.cxx 
b/sc/qa/unit/subsequent_filters_test4.cxx
index f227abb1942f..634973563ba8 100644
--- a/sc/qa/unit/subsequent_filters_test4.cxx
+++ b/sc/qa/unit/subsequent_filters_test4.cxx
@@ -1955,7 +1955,14 @@ CPPUNIT_TEST_FIXTURE(ScFiltersTest4, 
testSortWithFormattingXLS)
 // just needs to not crash on recalc
 CPPUNIT_TEST_FIXTURE(ScFiltersTest4, testForcepoint107)
 {
-    createScDoc("xlsx/forcepoint107.xlsx");
+    // It expectedly fails to load normally
+    CPPUNIT_ASSERT_ASSERTION_FAIL(createScDoc("xlsx/forcepoint107.xlsx"));
+
+    // importing it must succeed with RepairPackage set to true.
+    uno::Sequence<beans::PropertyValue> aParams
+        = { comphelper::makePropertyValue(u"RepairPackage"_ustr, true) };
+    loadWithParams(createFileURL(u"xlsx/forcepoint107.xlsx"), aParams);
+
     ScDocShell* pDocSh = getScDocShell();
     pDocSh->DoHardRecalc();
 }
diff --git a/sd/qa/unit/data/pptx/pass/ofz35597-1.pptx 
b/sd/qa/unit/data/pptx/fail/ofz35597-1.pptx
similarity index 100%
rename from sd/qa/unit/data/pptx/pass/ofz35597-1.pptx
rename to sd/qa/unit/data/pptx/fail/ofz35597-1.pptx
diff --git a/sd/qa/unit/data/pptx/pass/ofz46160-1.pptx 
b/sd/qa/unit/data/pptx/fail/ofz46160-1.pptx
similarity index 100%
rename from sd/qa/unit/data/pptx/pass/ofz46160-1.pptx
rename to sd/qa/unit/data/pptx/fail/ofz46160-1.pptx
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx 
b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
index d4325fe267ce..63b89fcbbcb8 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
@@ -1971,8 +1971,8 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf113143)
 CPPUNIT_TEST_FIXTURE(PdfExportTest, testForcePoint71)
 {
     // I just care it doesn't crash
-    aMediaDescriptor[u"FilterName"_ustr] <<= u"writer_pdf_Export"_ustr;
-    saveAsPDF(u"forcepoint71.key");
+    // numerous Zip errors are detected now and libetonyek cannot RepairPackage
+    CPPUNIT_ASSERT_ASSERTION_FAIL(loadFromFile(u"forcepoint71.key"));
 }
 
 CPPUNIT_TEST_FIXTURE(PdfExportTest, testForcePoint80)

Reply via email to