Hi,

> stable-pu: package diffoscope/240+deb12u1

After a tip from Fay Stegerman, I've found a way to simplify the
proposed changes to the testsuite. A full, updated debdiff is
attached.


Regards,

-- 
      ,''`.
     : :'  :     Chris Lamb
     `. `'`      la...@debian.org / chris-lamb.co.uk
       `-
diff --git a/debian/changelog b/debian/changelog
index fdd84fde..6a35a229 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,17 @@
+diffoscope (240+deb12u1) stable; urgency=medium
+
+  [ Chris Lamb ]
+  * Backport a patch by FC (Fay) Stegerman to fix a FTBFS caused by a
+    zipbomb-related security fix uploaded in python3.11 3.11.2-6+deb12u2
+    (#1070133). diffoscope's testsuite deliberately uses a .zip file that
+    has overlapping entries. (Closes: #1078883)
+  * Do not call marshal.loads on precompiled Python bytecode as it is
+    inherently unsafe. Replace, for now, with a brief textual summary of
+    the 'code' section of .pyc files instead.
+    <https://salsa.debian.org/reproducible-builds/diffoscope/-/issues/371>
+
+ -- Chris Lamb <la...@debian.org>  Mon, 26 Aug 2024 11:43:37 +0100
+
 diffoscope (240) unstable; urgency=medium
 
   [ Holger Levsen ]
diff --git a/diffoscope/comparators/python.py b/diffoscope/comparators/python.py
index fcf89481..83a353d1 100644
--- a/diffoscope/comparators/python.py
+++ b/diffoscope/comparators/python.py
@@ -18,13 +18,11 @@
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
 import binascii
-import dis
 import io
-import marshal
+import os
 import re
 import struct
 import time
-import types
 
 from diffoscope.difference import Difference
 
@@ -78,55 +76,10 @@ def parse_pyc(f):
     filesz = struct.unpack("<L", filesz)
     yield f"files sz: {filesz[0]}"
 
-    code = marshal.load(f)
-    yield from show_code(code)
-
-
-def show_code(code, indent=""):
-    yield f"{indent}code"
-
-    indent += "   "
-
-    for x in ("argcount", "nlocals", "stacksize", "flags"):
-        yield "{}{: <10}: {!r}".format(indent, x, getattr(code, f"co_{x}"))
-
-    yield from show_hex("code", code.co_code, indent=indent)
-    s = io.StringIO()
-    dis.disassemble(code, file=s)
-    for x in s.getvalue().splitlines():
-        yield "{}{}".format(indent, re_memory_address.sub("", x))
-
-    yield f"{indent}consts"
-    for const in code.co_consts:
-        if isinstance(const, types.CodeType):
-            yield from show_code(const, f"{indent}   ")
-        else:
-            yield f"   {indent}{const!r}"
-
-    for x in (
-        "names",
-        "varnames",
-        "freevars",
-        "cellvars",
-        "filename",
-        "name",
-        "firstlineno",
-    ):
-        yield "{}{: <10} {!r}".format(indent, x, getattr(code, f"co_{x}"))
-
-    yield from show_hex("lnotab", code.co_lnotab, indent=indent)
-
-
-def show_hex(label, val, indent):
-    val = hexlify(val)
-
-    if len(val) < 60:
-        yield f"{indent}{label} {val}"
-        return
-
-    yield f"{indent}{label}"
-    for i in range(0, len(val), 60):
-        yield "{}   {}".format(indent, val[i : i + 60])
+    start = f.tell()
+    f.seek(0, os.SEEK_END)
+    size = f.tell() - start
+    yield f"code:     starts at offset {start} (size: {size} bytes)"
 
 
 def hexlify(val):
diff --git a/diffoscope/comparators/zip.py b/diffoscope/comparators/zip.py
index 6c38b776..7d720362 100644
--- a/diffoscope/comparators/zip.py
+++ b/diffoscope/comparators/zip.py
@@ -266,10 +266,13 @@ class MozillaZipContainer(ZipContainer):
         # This is gross: Monkeypatch zipfile._EndRecData to work with
         # Mozilla-optimized ZIPs
         _orig_EndRecData = zipfile._EndRecData
+        eocd_offset = None
 
         def _EndRecData(fh):
             endrec = _orig_EndRecData(fh)
             if endrec:
+                nonlocal eocd_offset
+                eocd_offset = endrec[zipfile._ECD_LOCATION]
                 endrec[zipfile._ECD_LOCATION] = (
                     endrec[zipfile._ECD_OFFSET] + endrec[zipfile._ECD_SIZE]
                 )
@@ -278,6 +281,17 @@ class MozillaZipContainer(ZipContainer):
         zipfile._EndRecData = _EndRecData
         result = super(MozillaZipContainer, self).open_archive()
         zipfile._EndRecData = _orig_EndRecData
+        # fix _end_offset after https://github.com/python/cpython/pull/110016
+        # added a check that fails because the central directory comes before
+        # the entries in these files
+        zinfos = sorted(
+            result.filelist,
+            key=lambda zinfo: zinfo.header_offset,
+            reverse=True,
+        )
+        if zinfos:
+            if hasattr(zinfos[0], "_end_offset"):
+                zinfos[0]._end_offset = eocd_offset
         return result
 
 
diff --git a/tests/comparators/test_python.py b/tests/comparators/test_python.py
index 7ffc8e7c..95f2485b 100644
--- a/tests/comparators/test_python.py
+++ b/tests/comparators/test_python.py
@@ -17,16 +17,12 @@
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
 import pytest
-import sys
 
 from diffoscope.comparators.python import PycFile
 
 from ..utils.data import assert_diff_startswith, load_fixture
 from ..utils.nonexisting import assert_non_existing
-from ..utils.tools import (
-    skipif,
-    skip_unless_file_version_is_at_least,
-)
+from ..utils.tools import skip_unless_file_version_is_at_least
 
 pyc1 = load_fixture("test1.pyc-renamed")
 pyc2 = load_fixture("test2.pyc-renamed")
@@ -38,7 +34,6 @@ def test_identification(pyc1, pyc2):
     assert isinstance(pyc2, PycFile)
 
 
-@skipif(sys.version_info >= (3, 10), reason="Unstable on 3.10+")
 def test_no_differences(pyc1):
     # Disassembling bytecode prior to Python 3.10 is stable when applied to
     # itself, otherwise various memory offsets (or memory addresses?) are
@@ -52,15 +47,8 @@ def differences(pyc1, pyc2):
 
 
 @skip_unless_file_version_is_at_least("5.39")
-@skipif(
-    sys.version_info[:2] not in {(3, 9), (3, 10)},
-    reason="Only Python 3.9 and 3.10 can de-marshal test1.pyc-renamed",
-)
 def test_diff(differences):
-    assert_diff_startswith(
-        differences[0],
-        "pyc_expected_diff",
-    )
+    assert_diff_startswith(differences[0], "pyc_expected_diff")
 
 
 def test_compare_non_existing(monkeypatch, pyc1):
diff --git a/tests/data/pyc_expected_diff b/tests/data/pyc_expected_diff
index b6ecfc43..b7834546 100644
--- a/tests/data/pyc_expected_diff
+++ b/tests/data/pyc_expected_diff
@@ -1,11 +1,6 @@
-@@ -1,9 +1,9 @@
- magic:    0x610d0d0a
--moddate:  0xbd103561 (Sun Sep  5 18:47:25 2021 UTC)
-+moddate:  0xae814d61 (Fri Sep 24 07:43:42 2021 UTC)
- files sz: 14217
- code
-    argcount  : 0
-    nlocals   : 0
-    stacksize : 3
-    flags     : 64
-    code
+@@ -1,4 +1,4 @@
+ magic:    0xcb0d0d0a
+-moddate:  0x436ebb66 (Tue Aug 13 14:31:31 2024 UTC)
++moddate:  0x3f6ebb66 (Tue Aug 13 14:31:27 2024 UTC)
+ files sz: 13
+ code:     starts at offset 16 (size: 121 bytes)
diff --git a/tests/data/test1.pyc-renamed b/tests/data/test1.pyc-renamed
index 5368eeca..2bdf21d2 100644
Binary files a/tests/data/test1.pyc-renamed and b/tests/data/test1.pyc-renamed 
differ
diff --git a/tests/data/test2.pyc-renamed b/tests/data/test2.pyc-renamed
index 32238d4f..0a051bc6 100644
Binary files a/tests/data/test2.pyc-renamed and b/tests/data/test2.pyc-renamed 
differ

Reply via email to