package/inc/ZipFile.hxx | 4 package/source/zipapi/ZipFile.cxx | 369 ++++++++++++++++++-------------------- 2 files changed, 184 insertions(+), 189 deletions(-)
New commits: commit a53f0dc811a115cd68a5c297a68eeb5a9d3bb5ef Author: Mike Kaganski <mike.kagan...@collabora.com> AuthorDate: Fri Feb 2 01:07:45 2024 +0600 Commit: Mike Kaganski <mike.kagan...@collabora.com> CommitDate: Fri Feb 2 07:27:29 2024 +0100 Extract Local file header and Data descriptor handling into own functions Change-Id: I16405f13298934945cc1d5d4a50d403b37c3234e Reviewed-on: https://gerrit.libreoffice.org/c/core/+/162912 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com> diff --git a/package/inc/ZipFile.hxx b/package/inc/ZipFile.hxx index 2d42ed403136..93d95f66ea97 100644 --- a/package/inc/ZipFile.hxx +++ b/package/inc/ZipFile.hxx @@ -29,6 +29,8 @@ #include "HashMaps.hxx" #include "EncryptionData.hxx" +#include <span> + class MemoryByteGrabber; namespace com::sun::star { namespace uno { class XComponentContext; } @@ -83,6 +85,8 @@ class ZipFile void readLOC( ZipEntry &rEntry ); sal_Int32 readCEN(); sal_Int32 findEND(); + void HandlePK34(std::span<const sal_Int8> data, sal_Int64 dataOffset, sal_Int64 totalSize); + void HandlePK78(std::span<const sal_Int8> data, sal_Int64 dataOffset); void recover(); static void readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 nExtraLen, sal_uInt64& nSize, sal_uInt64& nCompressedSize, diff --git a/package/source/zipapi/ZipFile.cxx b/package/source/zipapi/ZipFile.cxx index 71fd66f08196..78823c83476d 100644 --- a/package/source/zipapi/ZipFile.cxx +++ b/package/source/zipapi/ZipFile.cxx @@ -1191,16 +1191,191 @@ void ZipFile::readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 nExtraLe } } +// PK34: Local file header +void ZipFile::HandlePK34(std::span<const sal_Int8> data, sal_Int64 dataOffset, sal_Int64 totalSize) +{ + ZipEntry aEntry; + Sequence<sal_Int8> aTmpBuffer(data.data() + 4, 26); + MemoryByteGrabber aMemGrabber(aTmpBuffer); + + aEntry.nVersion = aMemGrabber.ReadInt16(); + aEntry.nFlag = aMemGrabber.ReadInt16(); + if ((aEntry.nFlag & 1) == 1) + return; + + aEntry.nMethod = aMemGrabber.ReadInt16(); + if (aEntry.nMethod != STORED && aEntry.nMethod != DEFLATED) + return; + + aEntry.nTime = aMemGrabber.ReadInt32(); + aEntry.nCrc = aMemGrabber.ReadInt32(); + sal_uInt64 nCompressedSize = aMemGrabber.ReadUInt32(); + sal_uInt64 nSize = aMemGrabber.ReadUInt32(); + aEntry.nPathLen = aMemGrabber.ReadInt16(); + aEntry.nExtraLen = aMemGrabber.ReadInt16(); + + const sal_Int32 nDescrLength = (aEntry.nMethod == DEFLATED && (aEntry.nFlag & 8)) ? 16 : 0; + const sal_Int64 nBlockHeaderLength = aEntry.nPathLen + aEntry.nExtraLen + 30 + nDescrLength; + if (aEntry.nPathLen < 0 || aEntry.nExtraLen < 0 || dataOffset + nBlockHeaderLength > totalSize) + return; + + // read always in UTF8, some tools seem not to set UTF8 bit + if (o3tl::make_unsigned(30 + aEntry.nPathLen) <= data.size()) + aEntry.sPath = OUString(reinterpret_cast<char const*>(data.data() + 30), aEntry.nPathLen, + RTL_TEXTENCODING_UTF8); + else + { + Sequence<sal_Int8> aFileName; + aGrabber.seek(dataOffset + 30); + aGrabber.readBytes(aFileName, aEntry.nPathLen); + aEntry.sPath = OUString(reinterpret_cast<const char*>(aFileName.getConstArray()), + aFileName.getLength(), RTL_TEXTENCODING_UTF8); + aEntry.nPathLen = static_cast<sal_Int16>(aFileName.getLength()); + } + aEntry.sPath = aEntry.sPath.replace('\', '/'); + + // read 64bit header + if (aEntry.nExtraLen > 0) + { + Sequence<sal_Int8> aExtraBuffer; + if (o3tl::make_unsigned(30 + aEntry.nPathLen) + aEntry.nExtraLen <= data.size()) + { + aExtraBuffer = Sequence<sal_Int8>(data.data() + 30 + aEntry.nPathLen, aEntry.nExtraLen); + } + else + { + aGrabber.seek(dataOffset + 30 + aEntry.nExtraLen); + aGrabber.readBytes(aExtraBuffer, aEntry.nExtraLen); + } + MemoryByteGrabber aMemGrabberExtra(aExtraBuffer); + if (aEntry.nExtraLen > 0) + { + readExtraFields(aMemGrabberExtra, aEntry.nExtraLen, nSize, nCompressedSize, nullptr); + } + } + + sal_Int64 nDataSize = (aEntry.nMethod == DEFLATED) ? nCompressedSize : nSize; + sal_Int64 nBlockLength = nDataSize + nBlockHeaderLength; + + if (dataOffset + nBlockLength > totalSize) + return; + + aEntry.nCompressedSize = nCompressedSize; + aEntry.nSize = nSize; + + aEntry.nOffset = dataOffset + 30 + aEntry.nPathLen + aEntry.nExtraLen; + + if ((aEntry.nSize || aEntry.nCompressedSize) && !checkSizeAndCRC(aEntry)) + { + aEntry.nCrc = 0; + aEntry.nCompressedSize = 0; + aEntry.nSize = 0; + } + + // Do not add this entry, if it is empty and is a directory of an already existing entry + if (aEntry.nSize == 0 && aEntry.nCompressedSize == 0 + && std::find_if(aEntries.begin(), aEntries.end(), + [path = OUString(aEntry.sPath + "/")](const auto& r) + { return r.first.startsWith(path); }) + != aEntries.end()) + return; + + aEntries.emplace(aEntry.sPath, aEntry); + + // Drop any "directory" entry corresponding to this one's path; since we don't use + // central directory, we don't see external file attributes, so sanitize here + sal_Int32 i = 0; + for (OUString subdir = aEntry.sPath.getToken(0, '/', i); i >= 0; + subdir += OUString::Concat("/") + o3tl::getToken(aEntry.sPath, 0, '/', i)) + { + if (auto it = aEntries.find(subdir); it != aEntries.end()) + { + // if not empty, let it fail later in ZipPackage::getZipFileContents + if (it->second.nSize == 0 && it->second.nCompressedSize == 0) + aEntries.erase(it); + } + } +} + +// PK78: Data descriptor +void ZipFile::HandlePK78(std::span<const sal_Int8> data, sal_Int64 dataOffset) +{ + sal_Int64 nCompressedSize, nSize; + Sequence<sal_Int8> aTmpBuffer(data.data() + 4, 12 + 8 + 4); + MemoryByteGrabber aMemGrabber(aTmpBuffer); + sal_Int32 nCRC32 = aMemGrabber.ReadInt32(); + + // FIXME64: find a better way to recognize if Zip64 mode is used + // Now we check if the memory at +16 byte seems to be a signature + // if not, then probably Zip64 mode is used here, except + // if memory at +24 byte seems not to be a signature. + // Normally Data Descriptor should followed by the next Local File header + // that should start with PK34, except for the last file, then it may + // followed by Central directory that start with PK12, or + // followed by "archive decryption header" that don't have a signature. + if ((data[16] == 'P' && data[17] == 'K' && data[19] == data[18] + 1 + && (data[18] == 3 || data[18] == 1)) + || !(data[24] == 'P' && data[25] == 'K' && data[27] == data[26] + 1 + && (data[26] == 3 || data[26] == 1))) + { + nCompressedSize = aMemGrabber.ReadUInt32(); + nSize = aMemGrabber.ReadUInt32(); + } + else + { + nCompressedSize = aMemGrabber.ReadUInt64(); + nSize = aMemGrabber.ReadUInt64(); + } + + for (auto& rEntry : aEntries) + { + // this is a broken package, accept this block not only for DEFLATED streams + if ((rEntry.second.nFlag & 8) == 0) + continue; + sal_Int64 nStreamOffset = dataOffset - nCompressedSize; + if (nStreamOffset == rEntry.second.nOffset + && nCompressedSize > rEntry.second.nCompressedSize) + { + // only DEFLATED blocks need to be checked + bool bAcceptBlock = (rEntry.second.nMethod == STORED && nCompressedSize == nSize); + + if (!bAcceptBlock) + { + sal_Int64 nRealSize = 0; + sal_Int32 nRealCRC = 0; + getSizeAndCRC(nStreamOffset, nCompressedSize, &nRealSize, &nRealCRC); + bAcceptBlock = (nRealSize == nSize && nRealCRC == nCRC32); + } + + if (bAcceptBlock) + { + rEntry.second.nCrc = nCRC32; + rEntry.second.nCompressedSize = nCompressedSize; + rEntry.second.nSize = nSize; + } + } +#if 0 +// for now ignore clearly broken streams + else if( !rEntry.second.nCompressedSize ) + { + rEntry.second.nCrc = nCRC32; + sal_Int32 nRealStreamSize = dataOffset - rEntry.second.nOffset; + rEntry.second.nCompressedSize = nRealStreamSize; + rEntry.second.nSize = nSize; + } +#endif + } +} + void ZipFile::recover() { ::osl::MutexGuard aGuard( m_aMutexHolder->GetMutex() ); - sal_Int64 nLength; Sequence < sal_Int8 > aBuffer; try { - nLength = aGrabber.getLength(); + const sal_Int64 nLength = aGrabber.getLength(); if (nLength < ENDHDR) return; @@ -1210,7 +1385,7 @@ void ZipFile::recover() for( sal_Int64 nGenPos = 0; aGrabber.readBytes( aBuffer, nToRead ) && aBuffer.getLength() > 16; ) { const sal_Int8 *pBuffer = aBuffer.getConstArray(); - sal_Int32 nBufSize = aBuffer.getLength(); + const sal_Int32 nBufSize = aBuffer.getLength(); sal_Int64 nPos = 0; // the buffer should contain at least one header, @@ -1221,196 +1396,12 @@ void ZipFile::recover() { if ( nPos < nBufSize - 30 && pBuffer[nPos] == 'P' && pBuffer[nPos+1] == 'K' && pBuffer[nPos+2] == 3 && pBuffer[nPos+3] == 4 ) { - //PK34: Local file header - ZipEntry aEntry; - Sequence<sal_Int8> aTmpBuffer(&(pBuffer[nPos+4]), 26); - MemoryByteGrabber aMemGrabber(aTmpBuffer); - - aEntry.nVersion = aMemGrabber.ReadInt16(); - aEntry.nFlag = aMemGrabber.ReadInt16(); - - if ( ( aEntry.nFlag & 1 ) != 1 ) - { - aEntry.nMethod = aMemGrabber.ReadInt16(); - - if ( aEntry.nMethod == STORED || aEntry.nMethod == DEFLATED ) - { - aEntry.nTime = aMemGrabber.ReadInt32(); - aEntry.nCrc = aMemGrabber.ReadInt32(); - sal_uInt64 nCompressedSize = aMemGrabber.ReadUInt32(); - sal_uInt64 nSize = aMemGrabber.ReadUInt32(); - aEntry.nPathLen = aMemGrabber.ReadInt16(); - aEntry.nExtraLen = aMemGrabber.ReadInt16(); - - sal_Int32 nDescrLength = - ( aEntry.nMethod == DEFLATED && ( aEntry.nFlag & 8 ) ) ? 16 : 0; - - sal_Int64 nBlockHeaderLength = aEntry.nPathLen + aEntry.nExtraLen + 30 + nDescrLength; - if ( aEntry.nPathLen >= 0 && aEntry.nExtraLen >= 0 - && ( nGenPos + nPos + nBlockHeaderLength ) <= nLength ) - { - // read always in UTF8, some tools seem not to set UTF8 bit - if( nPos + 30 + aEntry.nPathLen <= nBufSize ) - aEntry.sPath = OUString ( reinterpret_cast<char const *>(&pBuffer[nPos + 30]), - aEntry.nPathLen, - RTL_TEXTENCODING_UTF8 ); - else - { - Sequence < sal_Int8 > aFileName; - aGrabber.seek( nGenPos + nPos + 30 ); - aGrabber.readBytes( aFileName, aEntry.nPathLen ); - aEntry.sPath = OUString ( reinterpret_cast<const char *>(aFileName.getConstArray()), - aFileName.getLength(), - RTL_TEXTENCODING_UTF8 ); - aEntry.nPathLen = static_cast< sal_Int16 >(aFileName.getLength()); - } - aEntry.sPath = aEntry.sPath.replace('\', '/'); - - // read 64bit header - if (aEntry.nExtraLen > 0) - { - Sequence<sal_Int8> aExtraBuffer; - if (nPos + 30 + aEntry.nPathLen + aEntry.nExtraLen <= nBufSize) - { - aExtraBuffer = Sequence<sal_Int8>( - &(pBuffer[nPos + 30 + aEntry.nPathLen]), - aEntry.nExtraLen); - } - else - { - aGrabber.seek(nGenPos + nPos + 30 + aEntry.nExtraLen); - aGrabber.readBytes(aExtraBuffer, aEntry.nExtraLen); - } - MemoryByteGrabber aMemGrabberExtra(aExtraBuffer); - if (aEntry.nExtraLen > 0) - { - readExtraFields(aMemGrabberExtra, aEntry.nExtraLen, nSize, - nCompressedSize, nullptr); - } - } - - sal_Int64 nDataSize = ( aEntry.nMethod == DEFLATED ) ? nCompressedSize : nSize; - sal_Int64 nBlockLength = nDataSize + nBlockHeaderLength; - - if (( nGenPos + nPos + nBlockLength ) <= nLength ) - { - aEntry.nCompressedSize = nCompressedSize; - aEntry.nSize = nSize; - - aEntry.nOffset = nGenPos + nPos + 30 + aEntry.nPathLen + aEntry.nExtraLen; - - if ( ( aEntry.nSize || aEntry.nCompressedSize ) && !checkSizeAndCRC( aEntry ) ) - { - aEntry.nCrc = 0; - aEntry.nCompressedSize = 0; - aEntry.nSize = 0; - } - - // Do not add this entry, if it is empty and is a directory of - // an already existing entry - if (aEntry.nSize == 0 && aEntry.nCompressedSize == 0 - && std::find_if( - aEntries.begin(), aEntries.end(), - [path = OUString(aEntry.sPath + "/")](const auto& r) - { return r.first.startsWith(path); }) - != aEntries.end()) - continue; - - aEntries.emplace( aEntry.sPath, aEntry ); - - // Drop any "directory" entry corresponding to this one's path; - // since we don't use central directory, we don't see external - // file attributes, so sanitize here - sal_Int32 i = 0; - for (OUString subdir = aEntry.sPath.getToken(0, '/', i); i >= 0; - subdir += OUString::Concat("/") - + o3tl::getToken(aEntry.sPath, 0, '/', i)) - { - if (auto it = aEntries.find(subdir); it != aEntries.end()) - { - // if not empty, let it fail later in ZipPackage::getZipFileContents - if (it->second.nSize == 0 && it->second.nCompressedSize == 0) - aEntries.erase(it); - } - } - } - } - } - } - + HandlePK34(std::span(pBuffer + nPos, nBufSize - nPos), nGenPos + nPos, nLength); nPos += 4; } else if (pBuffer[nPos] == 'P' && pBuffer[nPos+1] == 'K' && pBuffer[nPos+2] == 7 && pBuffer[nPos+3] == 8 ) { - //PK78: Data descriptor - sal_Int64 nCompressedSize, nSize; - Sequence<sal_Int8> aTmpBuffer(&(pBuffer[nPos + 4]), 12 + 8 + 4); - MemoryByteGrabber aMemGrabber(aTmpBuffer); - sal_Int32 nCRC32 = aMemGrabber.ReadInt32(); - - // FIXME64: find a better way to recognize if Zip64 mode is used - // Now we check if the memory at +16 byte seems to be a signature - // if not, then probably Zip64 mode is used here, except - // if memory at +24 byte seems not to be a signature. - // Normally Data Descriptor should followed by the next Local File header - // that should start with PK34, except for the last file, then it may - // followed by Central directory that start with PK12, or - // followed by "archive decryption header" that don't have a signature. - if ((pBuffer[nPos + 16] == 'P' && pBuffer[nPos + 17] == 'K' - && pBuffer[nPos + 19] == pBuffer[nPos + 18] + 1 - && (pBuffer[nPos + 18] == 3 || pBuffer[nPos + 18] == 1)) - || !(pBuffer[nPos + 24] == 'P' && pBuffer[nPos + 25] == 'K' - && pBuffer[nPos + 27] == pBuffer[nPos + 26] + 1 - && (pBuffer[nPos + 26] == 3 || pBuffer[nPos + 26] == 1))) - { - nCompressedSize = aMemGrabber.ReadUInt32(); - nSize = aMemGrabber.ReadUInt32(); - } - else - { - nCompressedSize = aMemGrabber.ReadUInt64(); - nSize = aMemGrabber.ReadUInt64(); - } - - for( auto& rEntry : aEntries ) - { - // this is a broken package, accept this block not only for DEFLATED streams - if( rEntry.second.nFlag & 8 ) - { - sal_Int64 nStreamOffset = nGenPos + nPos - nCompressedSize; - if ( nStreamOffset == rEntry.second.nOffset && nCompressedSize > rEntry.second.nCompressedSize ) - { - // only DEFLATED blocks need to be checked - bool bAcceptBlock = ( rEntry.second.nMethod == STORED && nCompressedSize == nSize ); - - if ( !bAcceptBlock ) - { - sal_Int64 nRealSize = 0; - sal_Int32 nRealCRC = 0; - getSizeAndCRC( nStreamOffset, nCompressedSize, &nRealSize, &nRealCRC ); - bAcceptBlock = ( nRealSize == nSize && nRealCRC == nCRC32 ); - } - - if ( bAcceptBlock ) - { - rEntry.second.nCrc = nCRC32; - rEntry.second.nCompressedSize = nCompressedSize; - rEntry.second.nSize = nSize; - } - } -#if 0 -// for now ignore clearly broken streams - else if( !rEntry.second.nCompressedSize ) - { - rEntry.second.nCrc = nCRC32; - sal_Int32 nRealStreamSize = nGenPos + nPos - rEntry.second.nOffset; - rEntry.second.nCompressedSize = nRealStreamSize; - rEntry.second.nSize = nSize; - } -#endif - } - } - + HandlePK78(std::span(pBuffer + nPos, nBufSize - nPos), nGenPos + nPos); nPos += 4; } else