Hi,
> stable-pu: package diffoscope/240+deb12u1
After a tip from Fay Stegerman, I've found a way to simplify the
proposed changes to the testsuite. A full, updated debdiff is
attached.
Regards,
--
,''`.
: :' : Chris Lamb
`. `'` la...@debian.org / chris-lamb.co.uk
`-
diff --git a/debian/changelog b/debian/changelog
index fdd84fde..6a35a229 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,17 @@
+diffoscope (240+deb12u1) stable; urgency=medium
+
+ [ Chris Lamb ]
+ * Backport a patch by FC (Fay) Stegerman to fix a FTBFS caused by a
+ zipbomb-related security fix uploaded in python3.11 3.11.2-6+deb12u2
+ (#1070133). diffoscope's testsuite deliberately uses a .zip file that
+ has overlapping entries. (Closes: #1078883)
+ * Do not call marshal.loads on precompiled Python bytecode as it is
+ inherently unsafe. Replace, for now, with a brief textual summary of
+ the 'code' section of .pyc files instead.
+ <https://salsa.debian.org/reproducible-builds/diffoscope/-/issues/371>
+
+ -- Chris Lamb <la...@debian.org> Mon, 26 Aug 2024 11:43:37 +0100
+
diffoscope (240) unstable; urgency=medium
[ Holger Levsen ]
diff --git a/diffoscope/comparators/python.py b/diffoscope/comparators/python.py
index fcf89481..83a353d1 100644
--- a/diffoscope/comparators/python.py
+++ b/diffoscope/comparators/python.py
@@ -18,13 +18,11 @@
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import binascii
-import dis
import io
-import marshal
+import os
import re
import struct
import time
-import types
from diffoscope.difference import Difference
@@ -78,55 +76,10 @@ def parse_pyc(f):
filesz = struct.unpack("<L", filesz)
yield f"files sz: {filesz[0]}"
- code = marshal.load(f)
- yield from show_code(code)
-
-
-def show_code(code, indent=""):
- yield f"{indent}code"
-
- indent += " "
-
- for x in ("argcount", "nlocals", "stacksize", "flags"):
- yield "{}{: <10}: {!r}".format(indent, x, getattr(code, f"co_{x}"))
-
- yield from show_hex("code", code.co_code, indent=indent)
- s = io.StringIO()
- dis.disassemble(code, file=s)
- for x in s.getvalue().splitlines():
- yield "{}{}".format(indent, re_memory_address.sub("", x))
-
- yield f"{indent}consts"
- for const in code.co_consts:
- if isinstance(const, types.CodeType):
- yield from show_code(const, f"{indent} ")
- else:
- yield f" {indent}{const!r}"
-
- for x in (
- "names",
- "varnames",
- "freevars",
- "cellvars",
- "filename",
- "name",
- "firstlineno",
- ):
- yield "{}{: <10} {!r}".format(indent, x, getattr(code, f"co_{x}"))
-
- yield from show_hex("lnotab", code.co_lnotab, indent=indent)
-
-
-def show_hex(label, val, indent):
- val = hexlify(val)
-
- if len(val) < 60:
- yield f"{indent}{label} {val}"
- return
-
- yield f"{indent}{label}"
- for i in range(0, len(val), 60):
- yield "{} {}".format(indent, val[i : i + 60])
+ start = f.tell()
+ f.seek(0, os.SEEK_END)
+ size = f.tell() - start
+ yield f"code: starts at offset {start} (size: {size} bytes)"
def hexlify(val):
diff --git a/diffoscope/comparators/zip.py b/diffoscope/comparators/zip.py
index 6c38b776..7d720362 100644
--- a/diffoscope/comparators/zip.py
+++ b/diffoscope/comparators/zip.py
@@ -266,10 +266,13 @@ class MozillaZipContainer(ZipContainer):
# This is gross: Monkeypatch zipfile._EndRecData to work with
# Mozilla-optimized ZIPs
_orig_EndRecData = zipfile._EndRecData
+ eocd_offset = None
def _EndRecData(fh):
endrec = _orig_EndRecData(fh)
if endrec:
+ nonlocal eocd_offset
+ eocd_offset = endrec[zipfile._ECD_LOCATION]
endrec[zipfile._ECD_LOCATION] = (
endrec[zipfile._ECD_OFFSET] + endrec[zipfile._ECD_SIZE]
)
@@ -278,6 +281,17 @@ class MozillaZipContainer(ZipContainer):
zipfile._EndRecData = _EndRecData
result = super(MozillaZipContainer, self).open_archive()
zipfile._EndRecData = _orig_EndRecData
+ # fix _end_offset after https://github.com/python/cpython/pull/110016
+ # added a check that fails because the central directory comes before
+ # the entries in these files
+ zinfos = sorted(
+ result.filelist,
+ key=lambda zinfo: zinfo.header_offset,
+ reverse=True,
+ )
+ if zinfos:
+ if hasattr(zinfos[0], "_end_offset"):
+ zinfos[0]._end_offset = eocd_offset
return result
diff --git a/tests/comparators/test_python.py b/tests/comparators/test_python.py
index 7ffc8e7c..95f2485b 100644
--- a/tests/comparators/test_python.py
+++ b/tests/comparators/test_python.py
@@ -17,16 +17,12 @@
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import pytest
-import sys
from diffoscope.comparators.python import PycFile
from ..utils.data import assert_diff_startswith, load_fixture
from ..utils.nonexisting import assert_non_existing
-from ..utils.tools import (
- skipif,
- skip_unless_file_version_is_at_least,
-)
+from ..utils.tools import skip_unless_file_version_is_at_least
pyc1 = load_fixture("test1.pyc-renamed")
pyc2 = load_fixture("test2.pyc-renamed")
@@ -38,7 +34,6 @@ def test_identification(pyc1, pyc2):
assert isinstance(pyc2, PycFile)
-@skipif(sys.version_info >= (3, 10), reason="Unstable on 3.10+")
def test_no_differences(pyc1):
# Disassembling bytecode prior to Python 3.10 is stable when applied to
# itself, otherwise various memory offsets (or memory addresses?) are
@@ -52,15 +47,8 @@ def differences(pyc1, pyc2):
@skip_unless_file_version_is_at_least("5.39")
-@skipif(
- sys.version_info[:2] not in {(3, 9), (3, 10)},
- reason="Only Python 3.9 and 3.10 can de-marshal test1.pyc-renamed",
-)
def test_diff(differences):
- assert_diff_startswith(
- differences[0],
- "pyc_expected_diff",
- )
+ assert_diff_startswith(differences[0], "pyc_expected_diff")
def test_compare_non_existing(monkeypatch, pyc1):
diff --git a/tests/data/pyc_expected_diff b/tests/data/pyc_expected_diff
index b6ecfc43..b7834546 100644
--- a/tests/data/pyc_expected_diff
+++ b/tests/data/pyc_expected_diff
@@ -1,11 +1,6 @@
-@@ -1,9 +1,9 @@
- magic: 0x610d0d0a
--moddate: 0xbd103561 (Sun Sep 5 18:47:25 2021 UTC)
-+moddate: 0xae814d61 (Fri Sep 24 07:43:42 2021 UTC)
- files sz: 14217
- code
- argcount : 0
- nlocals : 0
- stacksize : 3
- flags : 64
- code
+@@ -1,4 +1,4 @@
+ magic: 0xcb0d0d0a
+-moddate: 0x436ebb66 (Tue Aug 13 14:31:31 2024 UTC)
++moddate: 0x3f6ebb66 (Tue Aug 13 14:31:27 2024 UTC)
+ files sz: 13
+ code: starts at offset 16 (size: 121 bytes)
diff --git a/tests/data/test1.pyc-renamed b/tests/data/test1.pyc-renamed
index 5368eeca..2bdf21d2 100644
Binary files a/tests/data/test1.pyc-renamed and b/tests/data/test1.pyc-renamed
differ
diff --git a/tests/data/test2.pyc-renamed b/tests/data/test2.pyc-renamed
index 32238d4f..0a051bc6 100644
Binary files a/tests/data/test2.pyc-renamed and b/tests/data/test2.pyc-renamed
differ