Serhiy Storchaka <storch...@gmail.com> added the comment:
Here is the patch which adds support for lzma in zipfile. Later I'll
move `lzma_props_encode` and `lzma_props_decode` to lzma module.
----------
Added file: http://bugs.python.org/file25430/lzma_in_zip.patch
_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue14366>
_______________________________________
diff -r 7bdac82c16ab Doc/library/zipfile.rst
--- a/Doc/library/zipfile.rst Tue May 01 09:00:59 2012 +0200
+++ b/Doc/library/zipfile.rst Tue May 01 11:29:44 2012 +0300
@@ -97,12 +97,20 @@
.. versionadded:: 3.3
+.. data:: ZIP_LZMA
+
+ The numeric constant for the LZMA compression method. This requires the
+ lzma module.
+
+ .. versionadded:: 3.3
+
.. note::
The ZIP file format specification has included support for bzip2
compression
- since 2001. However, some tools (including older Python releases) do not
- support it, and may either refuse to process the ZIP file altogether, or
- fail to extract individual files.
+ since 2001, and for LZMA compression since 2006. However, some tools
+ (including older Python releases) do not support these compression
+ methods, and may either refuse to process the ZIP file altogether,
+ or fail to extract individual files.
.. seealso::
@@ -133,11 +141,11 @@
adding a ZIP archive to another file (such as :file:`python.exe`). If
*mode* is ``a`` and the file does not exist at all, it is created.
*compression* is the ZIP compression method to use when writing the archive,
- and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`; or
- :const:`ZIP_DEFLATED`; unrecognized
- values will cause :exc:`RuntimeError` to be raised. If
:const:`ZIP_DEFLATED` or
- :const:`ZIP_BZIP2` is specified but the corresponded module
- (:mod:`zlib` or :mod:`bz2`) is not available, :exc:`RuntimeError`
+ and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`,
+ :const:`ZIP_BZIP2` or :const:`ZIP_LZMA`; unrecognized
+ values will cause :exc:`RuntimeError` to be raised. If
:const:`ZIP_DEFLATED`,
+ :const:`ZIP_BZIP2` or :const:`ZIP_LZMA` is specified but the corresponded
module
+ (:mod:`zlib`, :mod:`bz2` or :mod:`lzma`) is not available,
:exc:`RuntimeError`
is also raised. The default is :const:`ZIP_STORED`. If *allowZip64* is
``True`` zipfile will create ZIP files that use the ZIP64 extensions when
the zipfile is larger than 2 GB. If it is false (the default)
:mod:`zipfile`
@@ -161,7 +169,7 @@
Added the ability to use :class:`ZipFile` as a context manager.
.. versionchanged:: 3.3
- Added support for :mod:`bzip2` compression.
+ Added support for :mod:`bzip2` and :mod:`lzma` compression.
.. method:: ZipFile.close()
diff -r 7bdac82c16ab Lib/test/support.py
--- a/Lib/test/support.py Tue May 01 09:00:59 2012 +0200
+++ b/Lib/test/support.py Tue May 01 11:29:44 2012 +0300
@@ -45,6 +45,11 @@
except ImportError:
bz2 = None
+try:
+ import lzma
+except ImportError:
+ lzma = None
+
__all__ = [
"Error", "TestFailed", "ResourceDenied", "import_module",
"verbose", "use_resources", "max_memuse", "record_original_stdout",
@@ -62,7 +67,7 @@
"get_attribute", "swap_item", "swap_attr", "requires_IEEE_754",
"TestHandler", "Matcher", "can_symlink", "skip_unless_symlink",
"import_fresh_module", "requires_zlib", "PIPE_MAX_SIZE", "failfast",
- "anticipate_failure", "run_with_tz", "requires_bz2"
+ "anticipate_failure", "run_with_tz", "requires_bz2", "requires_lzma"
]
class Error(Exception):
@@ -513,6 +518,8 @@
requires_bz2 = unittest.skipUnless(bz2, 'requires bz2')
+requires_lzma = unittest.skipUnless(lzma, 'requires lzma')
+
is_jython = sys.platform.startswith('java')
# Filename used for testing
diff -r 7bdac82c16ab Lib/test/test_zipfile.py
--- a/Lib/test/test_zipfile.py Tue May 01 09:00:59 2012 +0200
+++ b/Lib/test/test_zipfile.py Tue May 01 11:29:44 2012 +0300
@@ -13,7 +13,7 @@
from random import randint, random
from unittest import skipUnless
-from test.support import TESTFN, run_unittest, findfile, unlink,
requires_zlib, requires_bz2
+from test.support import TESTFN, run_unittest, findfile, unlink,
requires_zlib, requires_bz2, requires_lzma
TESTFN2 = TESTFN + "2"
TESTFNDIR = TESTFN + "d"
@@ -361,6 +361,55 @@
self.assertEqual(openobj.read(1), b'1')
self.assertEqual(openobj.read(1), b'2')
+ @requires_lzma
+ def test_lzma(self):
+ for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+ self.zip_test(f, zipfile.ZIP_LZMA)
+
+ @requires_lzma
+ def test_open_lzma(self):
+ for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+ self.zip_open_test(f, zipfile.ZIP_LZMA)
+
+ @requires_lzma
+ def test_random_open_lzma(self):
+ for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+ self.zip_random_open_test(f, zipfile.ZIP_LZMA)
+
+ @requires_lzma
+ def test_readline_read_lzma(self):
+ # Issue #7610: calls to readline() interleaved with calls to read().
+ for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+ self.zip_readline_read_test(f, zipfile.ZIP_LZMA)
+
+ @requires_lzma
+ def test_readline_lzma(self):
+ for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+ self.zip_readline_test(f, zipfile.ZIP_LZMA)
+
+ @requires_lzma
+ def test_readlines_lzma(self):
+ for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+ self.zip_readlines_test(f, zipfile.ZIP_LZMA)
+
+ @requires_lzma
+ def test_iterlines_lzma(self):
+ for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+ self.zip_iterlines_test(f, zipfile.ZIP_LZMA)
+
+ @requires_lzma
+ def test_low_compression_lzma(self):
+ """Check for cases where compressed data is larger than original."""
+ # Create the ZIP archive
+ with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_LZMA) as zipfp:
+ zipfp.writestr("strfile", '12')
+
+ # Get an open object for strfile
+ with zipfile.ZipFile(TESTFN2, "r", zipfile.ZIP_LZMA) as zipfp:
+ with zipfp.open("strfile") as openobj:
+ self.assertEqual(openobj.read(1), b'1')
+ self.assertEqual(openobj.read(1), b'2')
+
def test_absolute_arcnames(self):
with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED) as zipfp:
zipfp.write(TESTFN, "/absolute")
@@ -508,6 +557,13 @@
info = zipfp.getinfo('b.txt')
self.assertEqual(info.compress_type, zipfile.ZIP_BZIP2)
+ @requires_lzma
+ def test_writestr_compression_lzma(self):
+ zipfp = zipfile.ZipFile(TESTFN2, "w")
+ zipfp.writestr("b.txt", "hello world", compress_type=zipfile.ZIP_LZMA)
+ info = zipfp.getinfo('b.txt')
+ self.assertEqual(info.compress_type, zipfile.ZIP_LZMA)
+
def zip_test_writestr_permissions(self, f, compression):
# Make sure that writestr creates files with mode 0600,
# when it is passed a name rather than a ZipInfo instance.
@@ -686,6 +742,11 @@
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_test(f, zipfile.ZIP_BZIP2)
+ @requires_lzma
+ def test_lzma(self):
+ for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+ self.zip_test(f, zipfile.ZIP_LZMA)
+
def test_absolute_arcnames(self):
with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED,
allowZip64=True) as zipfp:
@@ -826,6 +887,16 @@
b'\x00 \x80\x80\x81\x00\x00\x00\x00afilePK'
b'\x05\x06\x00\x00\x00\x00\x01\x00\x01\x003\x00\x00\x00[\x00'
b'\x00\x00\x00\x00'),
+ zipfile.ZIP_LZMA: (
+ b'PK\x03\x04\x14\x03\x00\x00\x0e\x00nu\x0c=FA'
+ b'KE\x1b\x00\x00\x00n\x00\x00\x00\x05\x00\x00\x00af'
+ b'ile\t\x04\x05\x00]\x00\x00\x00\x04\x004\x19I'
+ b'\xee\x8d\xe9\x17\x89:3`\tq!.8\x00PK'
+ b'\x01\x02\x14\x03\x14\x03\x00\x00\x0e\x00nu\x0c=FA'
+ b'KE\x1b\x00\x00\x00n\x00\x00\x00\x05\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00 \x80\x80\x81\x00\x00\x00\x00afil'
+ b'ePK\x05\x06\x00\x00\x00\x00\x01\x00\x01\x003\x00\x00'
+ b'\x00>\x00\x00\x00\x00\x00'),
}
def test_unicode_filenames(self):
@@ -1094,6 +1165,10 @@
def test_testzip_with_bad_crc_bzip2(self):
self.check_testzip_with_bad_crc(zipfile.ZIP_BZIP2)
+ @requires_lzma
+ def test_testzip_with_bad_crc_lzma(self):
+ self.check_testzip_with_bad_crc(zipfile.ZIP_LZMA)
+
def check_read_with_bad_crc(self, compression):
"""Tests that files with bad CRCs raise a BadZipFile exception when
read."""
zipdata = self.zips_with_bad_crc[compression]
@@ -1126,6 +1201,10 @@
def test_read_with_bad_crc_bzip2(self):
self.check_read_with_bad_crc(zipfile.ZIP_BZIP2)
+ @requires_lzma
+ def test_read_with_bad_crc_lzma(self):
+ self.check_read_with_bad_crc(zipfile.ZIP_LZMA)
+
def check_read_return_size(self, compression):
# Issue #9837: ZipExtFile.read() shouldn't return more bytes
# than requested.
@@ -1150,6 +1229,10 @@
def test_read_return_size_bzip2(self):
self.check_read_return_size(zipfile.ZIP_BZIP2)
+ @requires_lzma
+ def test_read_return_size_lzma(self):
+ self.check_read_return_size(zipfile.ZIP_LZMA)
+
def test_empty_zipfile(self):
# Check that creating a file in 'w' or 'a' mode and closing without
# adding any files to the archives creates a valid empty ZIP file
@@ -1296,6 +1379,11 @@
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_test(f, zipfile.ZIP_BZIP2)
+ @requires_lzma
+ def test_lzma(self):
+ for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+ self.zip_test(f, zipfile.ZIP_LZMA)
+
def zip_open_test(self, f, compression):
self.make_test_archive(f, compression)
@@ -1341,6 +1429,11 @@
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_open_test(f, zipfile.ZIP_BZIP2)
+ @requires_lzma
+ def test_open_lzma(self):
+ for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+ self.zip_open_test(f, zipfile.ZIP_LZMA)
+
def zip_random_open_test(self, f, compression):
self.make_test_archive(f, compression)
@@ -1374,6 +1467,11 @@
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_random_open_test(f, zipfile.ZIP_BZIP2)
+ @requires_lzma
+ def test_random_open_lzma(self):
+ for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+ self.zip_random_open_test(f, zipfile.ZIP_LZMA)
+
@requires_zlib
class TestsWithMultipleOpens(unittest.TestCase):
@@ -1618,6 +1716,31 @@
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.iterlines_test(f, zipfile.ZIP_BZIP2)
+ @requires_lzma
+ def test_read_lzma(self):
+ for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+ self.read_test(f, zipfile.ZIP_LZMA)
+
+ @requires_lzma
+ def test_readline_read_lzma(self):
+ for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+ self.readline_read_test(f, zipfile.ZIP_LZMA)
+
+ @requires_lzma
+ def test_readline_lzma(self):
+ for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+ self.readline_test(f, zipfile.ZIP_LZMA)
+
+ @requires_lzma
+ def test_readlines_lzma(self):
+ for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+ self.readlines_test(f, zipfile.ZIP_LZMA)
+
+ @requires_lzma
+ def test_iterlines_lzma(self):
+ for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
+ self.iterlines_test(f, zipfile.ZIP_LZMA)
+
def tearDown(self):
for sep, fn in self.arcfiles.items():
os.remove(fn)
diff -r 7bdac82c16ab Lib/zipfile.py
--- a/Lib/zipfile.py Tue May 01 09:00:59 2012 +0200
+++ b/Lib/zipfile.py Tue May 01 11:29:44 2012 +0300
@@ -27,8 +27,13 @@
except ImportError:
bz2 = None
+try:
+ import lzma # We may need its compression method
+except ImportError:
+ lzma = None
+
__all__ = ["BadZipFile", "BadZipfile", "error",
- "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2",
+ "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
"is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
class BadZipFile(Exception):
@@ -52,11 +57,13 @@
ZIP_STORED = 0
ZIP_DEFLATED = 8
ZIP_BZIP2 = 12
+ZIP_LZMA = 14
# Other ZIP compression methods not supported
DEFAULT_VERSION = 20
ZIP64_VERSION = 45
BZIP2_VERSION = 46
+LZMA_VERSION = 63
# Below are some formats and associated data for reading/writing headers using
# the struct module. The names and structures of headers/records are those
used
@@ -365,6 +372,8 @@
if self.compress_type == ZIP_BZIP2:
min_version = max(BZIP2_VERSION, min_version)
+ elif self.compress_type == ZIP_LZMA:
+ min_version = max(LZMA_VERSION, min_version)
self.extract_version = max(min_version, self.extract_version)
self.create_version = max(min_version, self.create_version)
@@ -478,6 +487,79 @@
return c
+def lzma_props_encode(specs):
+ assert specs['id'] == lzma.FILTER_LZMA1
+ prop = (specs['pb'] * 5 + specs['lp']) * 9 + specs['lc']
+ return struct.pack('<BBHBI', 9, 4, 5, prop, specs['dict_size'])
+
+
+def lzma_props_decode(data):
+ if len(data) != 5:
+ raise RuntimeError("This version of lzma is not supported")
+
+ prop, dict_size = struct.unpack('<BI', data)
+ return {
+ 'id': lzma.FILTER_LZMA1,
+ 'pb': prop // 45,
+ 'lp': prop // 9 % 5,
+ 'lc': prop % 9,
+ 'dict_size': dict_size,
+ }
+
+
+class LZMACompressor():
+
+ def __init__(self):
+ self._comp = None
+
+ def _init(self):
+ specs = {
+ 'id': lzma.FILTER_LZMA1,
+ 'pb': 2,
+ 'lp': 0,
+ 'lc': 3,
+ 'dict_size': 8 << 20,
+ }
+ self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[specs])
+ return lzma_props_encode(specs)
+
+ def compress(self, data):
+ if self._comp is None:
+ return self._init() + self._comp.compress(data)
+ return self._comp.compress(data)
+
+ def flush(self):
+ if self._comp is None:
+ return self._init() + self._comp.flush()
+ return self._comp.flush()
+
+
+class LZMADecompressor():
+
+ def __init__(self):
+ self._decomp = None
+ self._unconsumed = b''
+ self.eof = False
+
+ def decompress(self, data):
+ if self._decomp is None:
+ self._unconsumed += data
+ if len(self._unconsumed) <= 4:
+ return b''
+ psize, = struct.unpack('<H', self._unconsumed[2:4])
+ if len(self._unconsumed) <= 4 + psize:
+ return b''
+
+ self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW,
+ filters=[lzma_props_decode(self._unconsumed[4:4 + psize])])
+ data = self._unconsumed[4 + psize:]
+ del self._unconsumed
+
+ result = self._decomp.decompress(data)
+ self.eof = self._decomp.eof
+ return result
+
+
def _check_compression(compression):
if compression == ZIP_STORED:
pass
@@ -489,6 +571,10 @@
if not bz2:
raise RuntimeError(
"Compression requires the (missing) bz2 module")
+ elif compression == ZIP_LZMA:
+ if not lzma:
+ raise RuntimeError(
+ "Compression requires the (missing) lzma module")
else:
raise RuntimeError("That compression method is not supported")
@@ -499,6 +585,8 @@
zlib.DEFLATED, -15)
elif compress_type == ZIP_BZIP2:
return bz2.BZ2Compressor()
+ elif compress_type == ZIP_LZMA:
+ return LZMACompressor()
else:
return None
@@ -510,6 +598,8 @@
return zlib.decompressobj(-15)
elif compress_type == ZIP_BZIP2:
return bz2.BZ2Decompressor()
+ elif compress_type == ZIP_LZMA:
+ return LZMADecompressor()
else:
unknown_compressors = {
1: 'shrink',
@@ -520,7 +610,6 @@
6: 'implode',
9: 'enhanced deflate',
10: 'implode',
- 14: 'lzma',
}
descr = unknown_compressors.get(compress_type)
if descr:
@@ -779,8 +868,8 @@
file: Either the path to the file, or a file-like object.
If it is a path, the file will be opened and closed by ZipFile.
mode: The mode can be either read "r", write "w" or append "a".
- compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib) or
- ZIP_BZIP2 (requires bz2).
+ compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
+ ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
allowZip64: if True ZipFile will create files with ZIP64 extensions when
needed, otherwise it will raise an exception when this would
be necessary.
@@ -1215,6 +1304,9 @@
zinfo.file_size = st.st_size
zinfo.flag_bits = 0x00
zinfo.header_offset = self.fp.tell() # Start of header bytes
+ if zinfo.compress_type == ZIP_LZMA:
+ # Compressed data includes an end-of-stream (EOS) marker
+ zinfo.flag_bits |= 0x02
self._writecheck(zinfo)
self._didModify = True
@@ -1287,6 +1379,9 @@
zinfo.header_offset = self.fp.tell() # Start of header data
if compress_type is not None:
zinfo.compress_type = compress_type
+ if zinfo.compress_type == ZIP_LZMA:
+ # Compressed data includes an end-of-stream (EOS) marker
+ zinfo.flag_bits |= 0x02
self._writecheck(zinfo)
self._didModify = True
@@ -1355,6 +1450,8 @@
if zinfo.compress_type == ZIP_BZIP2:
min_version = max(BZIP2_VERSION, min_version)
+ elif zinfo.compress_type == ZIP_LZMA:
+ min_version = max(LZMA_VERSION, min_version)
extract_version = max(min_version, zinfo.extract_version)
create_version = max(min_version, zinfo.create_version)
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com