Hi Christopher, Elena, Christoph,

I have pushed a small refinement to the patch on salsa. The changes are:

- Cache moved to `__init__`: bytes from file-like sources are now stored at 
construction time, not lazily on first save. This closes the window where a 
second save could fail if the handle was closed before any save occurred.
- Format conversion fix: replaced `img.format or "png"` with `self.format` in 
the fallback path, preventing silent JPEG/GIF-to-PNG conversion.
- `except Exception` narrowed to `except OSError` on `fp.close()`.

Both the original and refined patch pass all 10 tests; unpatched 3.1.5 fails on 
the two BytesIO double-save cases as expected.

Also, Alexandre Detiste has granted me upload rights for this package, so I can 
upload directly if everyone is happy with the patch.

Cheers,
Josenilson
"""
Test suite for Bug #1132566 - openpyxl crash when saving workbook
multiple times with embedded images.

Strategy: monkey-patch the real openpyxl.drawing.image.Image._data
so isinstance() checks in the writer still pass correctly.

Tests three implementations:
  1. ORIGINAL  - openpyxl 3.1.5 (buggy)
  2. PATCH     - Josenilson's patch
  3. PROPOSED  - Claude's suggested fix (cache bytes in __init__)
"""

import sys
import os
import traceback
from io import BytesIO
from PIL import Image as PILImage
import openpyxl
import openpyxl.drawing.image as img_module

# ─────────────────────────────────────────────────────────────
# Helpers
# ─────────────────────────────────────────────────────────────
PASS = "\033[92m✔ PASS\033[0m"
FAIL = "\033[91m✘ FAIL\033[0m"

results = []

def record(impl, name, ok, detail=""):
    tag = PASS if ok else FAIL
    print(f"  [{tag}] [{impl}] {name}" + (f" — {detail}" if detail else ""))
    results.append((impl, name, ok))

def make_bytesio_png():
    buf = BytesIO()
    img = PILImage.new("RGB", (50, 50), color=(0, 200, 0))
    img.save(buf, format="PNG")
    buf.seek(0)
    return buf

def make_workbook_with_image_path():
    wb = openpyxl.Workbook()
    ws = wb.active
    ws["A1"] = "Hello"
    img = openpyxl.drawing.image.Image("/tmp/test_image.png")
    ws.add_image(img, "B2")
    return wb

def make_workbook_with_image_bytesio(buf):
    wb = openpyxl.Workbook()
    ws = wb.active
    ws["A1"] = "Hello"
    img = openpyxl.drawing.image.Image(buf)
    ws.add_image(img, "B2")
    return wb

# ─────────────────────────────────────────────────────────────
# The three _data() implementations
# ─────────────────────────────────────────────────────────────

def _data_original(self):
    """Unmodified openpyxl 3.1.5"""
    from openpyxl.drawing.image import _import_image
    img = _import_image(self.ref)
    if self.format in ['gif', 'jpeg', 'png']:
        img.fp.seek(0)
        fp = img.fp
    else:
        fp = BytesIO()
        img.save(fp, format="png")
        fp.seek(0)
    data = fp.read()
    fp.close()
    return data


def _data_patched(self):
    """Josenilson's patch"""
    from openpyxl.drawing.image import _import_image
    ref = self.ref
    if isinstance(ref, str):
        img = _import_image(ref)
    elif hasattr(ref, "filename") and ref.filename:
        img = _import_image(ref.filename)
    elif hasattr(ref, "read"):
        if getattr(ref, "closed", False):
            if not hasattr(self, "_cached_data"):
                raise ValueError("Image file handle is closed and no cache available.")
            ref = BytesIO(self._cached_data)
        else:
            ref.seek(0)
            self._cached_data = ref.read()
            ref = BytesIO(self._cached_data)
        img = _import_image(ref)
    else:
        img = _import_image(ref)

    if self.format in ["gif", "jpeg", "png"]:
        if getattr(img, "fp", None) and not img.fp.closed:
            img.fp.seek(0)
            fp = img.fp
        else:
            tmp = BytesIO()
            img.save(tmp, format=img.format or "png")
            tmp.seek(0)
            fp = tmp
    else:
        fp = BytesIO()
        img.save(fp, format="png")
        fp.seek(0)

    data = fp.read()
    try:
        fp.close()
    except Exception:
        pass
    return data


def _init_proposed(self, img):
    """Proposed __init__ with early cache"""
    from openpyxl.drawing.image import _import_image
    self.ref = img
    mark_to_close = isinstance(img, str)
    image = _import_image(img)
    self.width, self.height = image.size
    try:
        self.format = image.format.lower()
    except AttributeError:
        self.format = "png"

    # Cache bytes immediately when a file-like is given
    if hasattr(img, "read"):
        img.seek(0)
        self._cached_data = img.read()

    if mark_to_close:
        image.close()


def _data_proposed(self):
    """Proposed _data() that uses cached bytes when available"""
    from openpyxl.drawing.image import _import_image

    if hasattr(self, "_cached_data"):
        return self._cached_data

    # Path-based: re-open safely every time
    img = _import_image(self.ref)
    if self.format in ["gif", "jpeg", "png"]:
        if getattr(img, "fp", None) and not img.fp.closed:
            img.fp.seek(0)
            fp = img.fp
        else:
            tmp = BytesIO()
            img.save(tmp, format=img.format or self.format or "png")
            tmp.seek(0)
            fp = tmp
    else:
        fp = BytesIO()
        img.save(fp, format="png")
        fp.seek(0)

    data = fp.read()
    try:
        fp.close()
    except OSError:
        pass
    return data


# ─────────────────────────────────────────────────────────────
# Test runner
# ─────────────────────────────────────────────────────────────
def run_tests(label, data_fn, init_fn=None):
    """
    Monkey-patch the real Image class, run all tests, then restore.
    This keeps isinstance() checks working inside openpyxl's writer.
    """
    print(f"\n{'─'*57}")
    print(f"  Testing: {label}")
    print(f"{'─'*57}")

    RealImage = img_module.Image
    original_data = RealImage._data
    original_init = RealImage.__init__

    RealImage._data = data_fn
    if init_fn:
        RealImage.__init__ = init_fn

    try:
        # ── Test 1: Single save (path) ────────────────────────
        try:
            wb = make_workbook_with_image_path()
            out = BytesIO(); wb.save(out)
            assert out.tell() > 0
            record(label, "Single save (path)", True)
        except Exception as e:
            record(label, "Single save (path)", False, str(e))

        # ── Test 2: Double save (path) ← original bug ─────────
        try:
            wb = make_workbook_with_image_path()
            out1 = BytesIO(); wb.save(out1)
            out2 = BytesIO(); wb.save(out2)
            assert out1.tell() > 0 and out2.tell() > 0
            record(label, "Double save (path)", True)
        except Exception as e:
            record(label, "Double save (path)", False, str(e))

        # ── Test 3: Triple save (path) ────────────────────────
        try:
            wb = make_workbook_with_image_path()
            outs = [BytesIO() for _ in range(3)]
            for o in outs: wb.save(o)
            assert all(o.tell() > 0 for o in outs)
            record(label, "Triple save (path)", True)
        except Exception as e:
            record(label, "Triple save (path)", False, str(e))

        # ── Test 4: Single save (BytesIO) ─────────────────────
        try:
            buf = make_bytesio_png()
            wb = make_workbook_with_image_bytesio(buf)
            out = BytesIO(); wb.save(out)
            assert out.tell() > 0
            record(label, "Single save (BytesIO)", True)
        except Exception as e:
            record(label, "Single save (BytesIO)", False, str(e))

        # ── Test 5: Double save (BytesIO) ← core bug ──────────
        try:
            buf = make_bytesio_png()
            wb = make_workbook_with_image_bytesio(buf)
            out1 = BytesIO(); wb.save(out1)
            out2 = BytesIO(); wb.save(out2)
            assert out1.tell() > 0 and out2.tell() > 0
            record(label, "Double save (BytesIO)", True)
        except Exception as e:
            record(label, "Double save (BytesIO)", False, str(e))

        # ── Test 6: Double save (BytesIO explicitly closed) ───
        try:
            buf = make_bytesio_png()
            wb = make_workbook_with_image_bytesio(buf)
            out1 = BytesIO(); wb.save(out1)
            buf.close()
            out2 = BytesIO(); wb.save(out2)
            assert out2.tell() > 0
            record(label, "Double save (BytesIO closed after 1st save)", True)
        except Exception as e:
            record(label, "Double save (BytesIO closed after 1st save)", False, str(e))

        # ── Test 7: Image data integrity (path) ───────────────
        try:
            img_obj = RealImage("/tmp/test_image.png")
            data = img_obj._data()
            assert isinstance(data, bytes) and len(data) > 0
            PILImage.open(BytesIO(data)).verify()
            record(label, "Image data integrity (path)", True)
        except Exception as e:
            record(label, "Image data integrity (path)", False, str(e))

        # ── Test 8: Image data integrity (BytesIO) ────────────
        try:
            buf = make_bytesio_png()
            img_obj = RealImage(buf)
            data = img_obj._data()
            assert isinstance(data, bytes) and len(data) > 0
            PILImage.open(BytesIO(data)).verify()
            record(label, "Image data integrity (BytesIO)", True)
        except Exception as e:
            record(label, "Image data integrity (BytesIO)", False, str(e))

        # ── Test 9: JPEG format preserved ─────────────────────
        try:
            img_obj = RealImage("/tmp/test_image.jpg")
            assert img_obj.format == "jpeg", f"expected jpeg, got {img_obj.format}"
            data = img_obj._data()
            assert data[:2] == b'\xff\xd8', "Not a valid JPEG signature"
            record(label, "JPEG format preserved (no silent conversion)", True)
        except Exception as e:
            record(label, "JPEG format preserved (no silent conversion)", False, str(e))

        # ── Test 10: Output is valid xlsx (re-loadable) ────────
        try:
            wb = make_workbook_with_image_path()
            out = BytesIO(); wb.save(out)
            out.seek(0)
            wb2 = openpyxl.load_workbook(out)
            assert wb2.active["A1"].value == "Hello"
            record(label, "Output is valid xlsx (re-loadable)", True)
        except Exception as e:
            record(label, "Output is valid xlsx (re-loadable)", False, str(e))

    finally:
        RealImage._data = original_data
        RealImage.__init__ = original_init


# ─────────────────────────────────────────────────────────────
# Main
# ─────────────────────────────────────────────────────────────
if __name__ == "__main__":
    print("\n" + "═"*57)
    print("  Bug #1132566 — openpyxl multi-save image test suite")
    print("═"*57)

    run_tests("ORIGINAL (3.1.5)",          _data_original)
    run_tests("PATCH    (Josenilson)",      _data_patched)
    run_tests("PROPOSED (cache __init__)",  _data_proposed, _init_proposed)

    print(f"\n{'═'*57}")
    print("  SUMMARY")
    print(f"{'═'*57}")

    by_impl = {}
    for impl, name, ok in results:
        by_impl.setdefault(impl, []).append((name, ok))

    for impl, tests in by_impl.items():
        passed = sum(1 for _, ok in tests if ok)
        total  = len(tests)
        bar    = "█" * passed + "░" * (total - passed)
        color  = "\033[92m" if passed == total else "\033[91m"
        print(f"  {color}{impl:<37} {bar} {passed}/{total}\033[0m")

    all_pass = all(ok for _, _, ok in results)
    print()
    if all_pass:
        print("  \033[92mAll tests passed!\033[0m")
    else:
        failed = [(i, n) for i, n, ok in results if not ok]
        print(f"  \033[91m{len(failed)} test(s) failed:\033[0m")
        for impl, name in failed:
            print(f"    • [{impl}] {name}")
    print()
-- 
debian-science-maintainers mailing list
[email protected]
https://alioth-lists.debian.net/cgi-bin/mailman/listinfo/debian-science-maintainers

Reply via email to