package/inc/ZipFile.hxx                                       |    5 
 package/qa/cppunit/data/vaidator-fine-libreoffice-no-open.odt |binary
 package/qa/cppunit/test_zippackage.cxx                        |   36 ++++
 package/source/zipapi/ZipFile.cxx                             |   81 ++++++++--
 4 files changed, 106 insertions(+), 16 deletions(-)

New commits:
commit 6ffcf0e0e15bf10fa7243d32b1294931bea73340
Author:     Michael Stahl <michael.st...@allotropia.de>
AuthorDate: Fri Apr 4 17:46:02 2025 +0200
Commit:     Adolfo Jayme Barrientos <fit...@ubuntu.com>
CommitDate: Wed Apr 9 07:33:57 2025 +0200

    tdf#166040 package: ZipFile::recover() find DD without PK78 signature
    
    The PK78 signature is optional, and if the sizes in the local file
    header are 0 then without the DD the recovery won't do anything useful.
    
    If the latest local file header has the DD bit set in flags, then try to
    find the DD before the next thing that has a PK?? signature.
    
    Change-Id: I149a87805f7bcb6216fc909c20ece600ce77956d
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/183722
    Reviewed-by: Michael Stahl <michael.st...@allotropia.de>
    Tested-by: Jenkins
    (cherry picked from commit 91b297730b1497850ca81d55288b65ab1712e468)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/183732
    Reviewed-by: Adolfo Jayme Barrientos <fit...@ubuntu.com>

diff --git a/package/inc/ZipFile.hxx b/package/inc/ZipFile.hxx
index b7612061b11c..1d5683349193 100644
--- a/package/inc/ZipFile.hxx
+++ b/package/inc/ZipFile.hxx
@@ -97,7 +97,10 @@ private:
     sal_uInt64 readLOC_Impl(ZipEntry &rEntry, std::vector<sal_Int8>& 
rNameBuffer, std::vector<sal_Int8>& rExtraBuffer);
     sal_Int32 readCEN();
     std::tuple<sal_Int64, sal_Int64, sal_Int64> findCentralDirectory();
-    void HandlePK34(std::span<const sal_Int8> data, sal_Int64 dataOffset, 
sal_Int64 totalSize);
+    bool TryDDImpl(sal_Int64 dataOffset, sal_Int32 nCRC32,
+        sal_Int64 nCompressedSize, sal_Int64 nSize);
+    bool TryDDEndAt(std::span<const sal_Int8> data, sal_Int64 dataOffset);
+    bool HandlePK34(std::span<const sal_Int8> data, sal_Int64 dataOffset, 
sal_Int64 totalSize);
     void HandlePK78(std::span<const sal_Int8> data, sal_Int64 dataOffset);
     void recover();
     static bool readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 
nExtraLen,
diff --git a/package/qa/cppunit/data/vaidator-fine-libreoffice-no-open.odt 
b/package/qa/cppunit/data/vaidator-fine-libreoffice-no-open.odt
new file mode 100644
index 000000000000..c1887ed636ed
Binary files /dev/null and 
b/package/qa/cppunit/data/vaidator-fine-libreoffice-no-open.odt differ
diff --git a/package/qa/cppunit/test_zippackage.cxx 
b/package/qa/cppunit/test_zippackage.cxx
index 354429576032..f296802e4524 100644
--- a/package/qa/cppunit/test_zippackage.cxx
+++ b/package/qa/cppunit/test_zippackage.cxx
@@ -10,6 +10,8 @@
 #include <unotest/bootstrapfixturebase.hxx>
 
 #include <com/sun/star/beans/NamedValue.hpp>
+#include <com/sun/star/beans/XPropertySet.hpp>
+#include <com/sun/star/container/XHierarchicalNameAccess.hpp>
 #include <com/sun/star/embed/StorageFormats.hpp>
 #include <com/sun/star/packages/zip/ZipIOException.hpp>
 
@@ -447,6 +449,40 @@ CPPUNIT_TEST_FIXTURE(ZipPackageTest, testTdf163818)
                                                                            
m_xContext);
 }
 
+CPPUNIT_TEST_FIXTURE(ZipPackageTest, testDataDescriptorDirectory)
+{
+    auto const url(m_directories.getURLFromSrc(
+        u"/package/qa/cppunit/data/vaidator-fine-libreoffice-no-open.odt"));
+    uno::Sequence<uno::Any> const args{
+        uno::Any(url),
+        uno::Any(beans::NamedValue("StorageFormat", 
uno::Any(embed::StorageFormats::PACKAGE)))
+    };
+
+    // don't load zip file with DD on directory
+    
CPPUNIT_ASSERT_THROW(m_xContext->getServiceManager()->createInstanceWithArgumentsAndContext(
+                             ZipPackage, args, m_xContext),
+                         css::packages::zip::ZipIOException);
+
+    // recovery should work
+    uno::Sequence<uno::Any> const args2{
+        uno::Any(url), uno::Any(beans::NamedValue(u"RepairPackage"_ustr, 
uno::Any(true))),
+        uno::Any(beans::NamedValue("StorageFormat", 
uno::Any(embed::StorageFormats::PACKAGE)))
+    };
+
+    uno::Reference<container::XHierarchicalNameAccess> xZip{
+        
m_xContext->getServiceManager()->createInstanceWithArgumentsAndContext(ZipPackage,
 args2,
+                                                                               
m_xContext),
+        uno::UNO_QUERY
+    };
+
+    CPPUNIT_ASSERT(xZip.is());
+    uno::Reference<beans::XPropertySet> xStream{ 
xZip->getByHierarchicalName(u"manifest.rdf"_ustr),
+                                                 uno::UNO_QUERY };
+    CPPUNIT_ASSERT(xStream.is());
+    // check that the size was recovered from data descriptor
+    CPPUNIT_ASSERT_EQUAL(sal_Int64(899), 
xStream->getPropertyValue(u"Size"_ustr).get<sal_Int64>());
+}
+
 //CPPUNIT_TEST_SUITE_REGISTRATION(...);
 //CPPUNIT_PLUGIN_IMPLEMENT();
 
diff --git a/package/source/zipapi/ZipFile.cxx 
b/package/source/zipapi/ZipFile.cxx
index 24241e66af2e..46c8ad11ee0d 100644
--- a/package/source/zipapi/ZipFile.cxx
+++ b/package/source/zipapi/ZipFile.cxx
@@ -1610,7 +1610,7 @@ bool ZipFile::readExtraFields(MemoryByteGrabber& 
aMemGrabber, sal_Int16 nExtraLe
 }
 
 // PK34: Local file header
-void ZipFile::HandlePK34(std::span<const sal_Int8> data, sal_Int64 dataOffset, 
sal_Int64 totalSize)
+bool ZipFile::HandlePK34(std::span<const sal_Int8> data, sal_Int64 dataOffset, 
sal_Int64 totalSize)
 {
     ZipEntry aEntry;
     Sequence<sal_Int8> aTmpBuffer(data.data() + 4, 26);
@@ -1619,11 +1619,11 @@ void ZipFile::HandlePK34(std::span<const sal_Int8> 
data, sal_Int64 dataOffset, s
     aEntry.nVersion = aMemGrabber.ReadInt16();
     aEntry.nFlag = aMemGrabber.ReadInt16();
     if ((aEntry.nFlag & 1) == 1)
-        return;
+        return false;
 
     aEntry.nMethod = aMemGrabber.ReadInt16();
     if (aEntry.nMethod != STORED && aEntry.nMethod != DEFLATED)
-        return;
+        return false;
 
     aEntry.nTime = aMemGrabber.ReadInt32();
     aEntry.nCrc = aMemGrabber.ReadInt32();
@@ -1635,7 +1635,7 @@ void ZipFile::HandlePK34(std::span<const sal_Int8> data, 
sal_Int64 dataOffset, s
     const sal_Int32 nDescrLength = (aEntry.nMethod == DEFLATED && 
(aEntry.nFlag & 8)) ? 16 : 0;
     const sal_Int64 nBlockHeaderLength = aEntry.nPathLen + aEntry.nExtraLen + 
30 + nDescrLength;
     if (aEntry.nPathLen < 0 || aEntry.nExtraLen < 0 || dataOffset + 
nBlockHeaderLength > totalSize)
-        return;
+        return false;
 
     // read always in UTF8, some tools seem not to set UTF8 bit
     if (o3tl::make_unsigned(30 + aEntry.nPathLen) <= data.size())
@@ -1677,7 +1677,7 @@ void ZipFile::HandlePK34(std::span<const sal_Int8> data, 
sal_Int64 dataOffset, s
     sal_Int64 nBlockLength = nDataSize + nBlockHeaderLength;
 
     if (dataOffset + nBlockLength > totalSize)
-        return;
+        return false;
 
     aEntry.nCompressedSize = nCompressedSize;
     aEntry.nSize = nSize;
@@ -1697,12 +1697,12 @@ void ZipFile::HandlePK34(std::span<const sal_Int8> 
data, sal_Int64 dataOffset, s
                         [path = OUString(aEntry.sPath + "/")](const auto& r)
                         { return r.first.startsWith(path); })
                != aEntries.end())
-        return;
+        return false;
 
     auto const lowerPath(aEntry.sPath.toAsciiLowerCase());
     if (m_EntriesInsensitive.find(lowerPath) != m_EntriesInsensitive.end())
     {   // this is required for OOXML, but not for ODF
-        return;
+        return false;
     }
     m_EntriesInsensitive.insert(lowerPath);
     aEntries.emplace(aEntry.sPath, aEntry);
@@ -1720,6 +1720,7 @@ void ZipFile::HandlePK34(std::span<const sal_Int8> data, 
sal_Int64 dataOffset, s
                 aEntries.erase(it);
         }
     }
+    return (aEntry.nFlag & 8) && (aEntry.nCompressedSize == 0);
 }
 
 // PK78: Data descriptor
@@ -1751,7 +1752,12 @@ void ZipFile::HandlePK78(std::span<const sal_Int8> data, 
sal_Int64 dataOffset)
         nCompressedSize = aMemGrabber.ReadUInt64();
         nSize = aMemGrabber.ReadUInt64();
     }
+    TryDDImpl(dataOffset, nCRC32, nCompressedSize, nSize);
+}
 
+bool ZipFile::TryDDImpl(sal_Int64 const dataOffset, sal_Int32 const nCRC32,
+        sal_Int64 const nCompressedSize, sal_Int64 const nSize)
+{
     for (auto& rEntry : aEntries)
     {
         // this is a broken package, accept this block not only for DEFLATED 
streams
@@ -1777,6 +1783,7 @@ void ZipFile::HandlePK78(std::span<const sal_Int8> data, 
sal_Int64 dataOffset)
                 rEntry.second.nCrc = nCRC32;
                 rEntry.second.nCompressedSize = nCompressedSize;
                 rEntry.second.nSize = nSize;
+                return true;
             }
         }
 #if 0
@@ -1790,6 +1797,32 @@ void ZipFile::HandlePK78(std::span<const sal_Int8> data, 
sal_Int64 dataOffset)
         }
 #endif
     }
+    return false;
+}
+
+bool ZipFile::TryDDEndAt(std::span<const sal_Int8> const data, sal_Int64 const 
dataOffset)
+{
+    assert(!aEntries.empty()); // HandlePK34 must ensure this
+
+    Sequence<sal_Int8> const buf32{data.data() + 8, 12};
+    MemoryByteGrabber mbg32{buf32};
+    sal_uInt32 const nCrc32{mbg32.ReadUInt32()};
+    sal_uInt32 const nCompressedSize32{mbg32.ReadUInt32()};
+    sal_uInt32 const nSize32{mbg32.ReadUInt32()};
+
+    if (TryDDImpl(dataOffset + 8, nCrc32, nCompressedSize32, nSize32))
+    {
+        return true;
+    }
+
+    // then try if Zip64 crc/sizes are plausible
+    Sequence<sal_Int8> const buf64{data.data(), 20};
+    MemoryByteGrabber mbg64{buf64};
+    sal_uInt32 const nCrc64{mbg64.ReadUInt32()};
+    sal_uInt64 const nCompressedSize64{mbg64.ReadUInt64()};
+    sal_uInt64 const nSize64{mbg64.ReadUInt64()};
+
+    return TryDDImpl(dataOffset, nCrc64, nCompressedSize64, nSize64);
 }
 
 void ZipFile::recover()
@@ -1807,6 +1840,7 @@ void ZipFile::recover()
 
         aGrabber.seek( 0 );
 
+        bool findDD{false};
         sal_Int32 nRead;
         for( sal_Int64 nGenPos = 0; (nRead = aGrabber.readBytes( 
aBuffer.data(), nToRead )) && nRead > 16; )
         {
@@ -1820,15 +1854,32 @@ void ZipFile::recover()
                 || ( nBufSize < nToRead && nPos < nBufSize - 16 ) )
 
             {
-                if ( nPos < nBufSize - 30 && pBuffer[nPos] == 'P' && 
pBuffer[nPos+1] == 'K' && pBuffer[nPos+2] == 3 && pBuffer[nPos+3] == 4 )
-                {
-                    HandlePK34(std::span(pBuffer + nPos, nBufSize - nPos), 
nGenPos + nPos, nLength);
-                    nPos += 4;
-                }
-                else if (pBuffer[nPos] == 'P' && pBuffer[nPos+1] == 'K' && 
pBuffer[nPos+2] == 7 && pBuffer[nPos+3] == 8 )
+                if (pBuffer[nPos] == 'P' && pBuffer[nPos+1] == 'K')
                 {
-                    HandlePK78(std::span(pBuffer + nPos, nBufSize - nPos), 
nGenPos + nPos);
-                    nPos += 4;
+                    if (pBuffer[nPos+2] == 7 && pBuffer[nPos+3] == 8)
+                    {
+                        findDD = false;
+                        HandlePK78(std::span(pBuffer + nPos, nBufSize - nPos), 
nGenPos + nPos);
+                        nPos += 4;
+                    }
+                    else
+                    {
+                        if (findDD && 30 < nGenPos+nPos
+                            && TryDDEndAt(std::span(pBuffer + nPos - 20, 20), 
nGenPos + nPos - 20))
+                        {
+                            findDD = false;
+                        }
+                        if (nPos < nBufSize - 30
+                            && pBuffer[nPos+2] == 3 && pBuffer[nPos+3] == 4)
+                        {
+                            findDD = HandlePK34(std::span(pBuffer + nPos, 
nBufSize - nPos), nGenPos + nPos, nLength);
+                            nPos += 4;
+                        }
+                        else
+                        {
+                            ++nPos;
+                        }
+                    }
                 }
                 else
                     nPos++;

Reply via email to