Hi Christopher, Elena, Christoph,
I have pushed a small refinement to the patch on salsa. The changes are:
- Cache moved to `__init__`: bytes from file-like sources are now stored at
construction time, not lazily on first save. This closes the window where a
second save could fail if the handle was closed before any save occurred.
- Format conversion fix: replaced `img.format or "png"` with `self.format` in
the fallback path, preventing silent JPEG/GIF-to-PNG conversion.
- `except Exception` narrowed to `except OSError` on `fp.close()`.
Both the original and refined patch pass all 10 tests; unpatched 3.1.5 fails on
the two BytesIO double-save cases as expected.
Also, Alexandre Detiste has granted me upload rights for this package, so I can
upload directly if everyone is happy with the patch.
Cheers,
Josenilson
"""
Test suite for Bug #1132566 - openpyxl crash when saving workbook
multiple times with embedded images.
Strategy: monkey-patch the real openpyxl.drawing.image.Image._data
so isinstance() checks in the writer still pass correctly.
Tests three implementations:
1. ORIGINAL - openpyxl 3.1.5 (buggy)
2. PATCH - Josenilson's patch
3. PROPOSED - Claude's suggested fix (cache bytes in __init__)
"""
import sys
import os
import traceback
from io import BytesIO
from PIL import Image as PILImage
import openpyxl
import openpyxl.drawing.image as img_module
# âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
# Helpers
# âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
PASS = "\033[92mâ PASS\033[0m"
FAIL = "\033[91mâ FAIL\033[0m"
results = []
def record(impl, name, ok, detail=""):
tag = PASS if ok else FAIL
print(f" [{tag}] [{impl}] {name}" + (f" â {detail}" if detail else ""))
results.append((impl, name, ok))
def make_bytesio_png():
buf = BytesIO()
img = PILImage.new("RGB", (50, 50), color=(0, 200, 0))
img.save(buf, format="PNG")
buf.seek(0)
return buf
def make_workbook_with_image_path():
wb = openpyxl.Workbook()
ws = wb.active
ws["A1"] = "Hello"
img = openpyxl.drawing.image.Image("/tmp/test_image.png")
ws.add_image(img, "B2")
return wb
def make_workbook_with_image_bytesio(buf):
wb = openpyxl.Workbook()
ws = wb.active
ws["A1"] = "Hello"
img = openpyxl.drawing.image.Image(buf)
ws.add_image(img, "B2")
return wb
# âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
# The three _data() implementations
# âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
def _data_original(self):
"""Unmodified openpyxl 3.1.5"""
from openpyxl.drawing.image import _import_image
img = _import_image(self.ref)
if self.format in ['gif', 'jpeg', 'png']:
img.fp.seek(0)
fp = img.fp
else:
fp = BytesIO()
img.save(fp, format="png")
fp.seek(0)
data = fp.read()
fp.close()
return data
def _data_patched(self):
"""Josenilson's patch"""
from openpyxl.drawing.image import _import_image
ref = self.ref
if isinstance(ref, str):
img = _import_image(ref)
elif hasattr(ref, "filename") and ref.filename:
img = _import_image(ref.filename)
elif hasattr(ref, "read"):
if getattr(ref, "closed", False):
if not hasattr(self, "_cached_data"):
raise ValueError("Image file handle is closed and no cache available.")
ref = BytesIO(self._cached_data)
else:
ref.seek(0)
self._cached_data = ref.read()
ref = BytesIO(self._cached_data)
img = _import_image(ref)
else:
img = _import_image(ref)
if self.format in ["gif", "jpeg", "png"]:
if getattr(img, "fp", None) and not img.fp.closed:
img.fp.seek(0)
fp = img.fp
else:
tmp = BytesIO()
img.save(tmp, format=img.format or "png")
tmp.seek(0)
fp = tmp
else:
fp = BytesIO()
img.save(fp, format="png")
fp.seek(0)
data = fp.read()
try:
fp.close()
except Exception:
pass
return data
def _init_proposed(self, img):
"""Proposed __init__ with early cache"""
from openpyxl.drawing.image import _import_image
self.ref = img
mark_to_close = isinstance(img, str)
image = _import_image(img)
self.width, self.height = image.size
try:
self.format = image.format.lower()
except AttributeError:
self.format = "png"
# Cache bytes immediately when a file-like is given
if hasattr(img, "read"):
img.seek(0)
self._cached_data = img.read()
if mark_to_close:
image.close()
def _data_proposed(self):
"""Proposed _data() that uses cached bytes when available"""
from openpyxl.drawing.image import _import_image
if hasattr(self, "_cached_data"):
return self._cached_data
# Path-based: re-open safely every time
img = _import_image(self.ref)
if self.format in ["gif", "jpeg", "png"]:
if getattr(img, "fp", None) and not img.fp.closed:
img.fp.seek(0)
fp = img.fp
else:
tmp = BytesIO()
img.save(tmp, format=img.format or self.format or "png")
tmp.seek(0)
fp = tmp
else:
fp = BytesIO()
img.save(fp, format="png")
fp.seek(0)
data = fp.read()
try:
fp.close()
except OSError:
pass
return data
# âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
# Test runner
# âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
def run_tests(label, data_fn, init_fn=None):
"""
Monkey-patch the real Image class, run all tests, then restore.
This keeps isinstance() checks working inside openpyxl's writer.
"""
print(f"\n{'â'*57}")
print(f" Testing: {label}")
print(f"{'â'*57}")
RealImage = img_module.Image
original_data = RealImage._data
original_init = RealImage.__init__
RealImage._data = data_fn
if init_fn:
RealImage.__init__ = init_fn
try:
# ââ Test 1: Single save (path) ââââââââââââââââââââââââ
try:
wb = make_workbook_with_image_path()
out = BytesIO(); wb.save(out)
assert out.tell() > 0
record(label, "Single save (path)", True)
except Exception as e:
record(label, "Single save (path)", False, str(e))
# ââ Test 2: Double save (path) â original bug âââââââââ
try:
wb = make_workbook_with_image_path()
out1 = BytesIO(); wb.save(out1)
out2 = BytesIO(); wb.save(out2)
assert out1.tell() > 0 and out2.tell() > 0
record(label, "Double save (path)", True)
except Exception as e:
record(label, "Double save (path)", False, str(e))
# ââ Test 3: Triple save (path) ââââââââââââââââââââââââ
try:
wb = make_workbook_with_image_path()
outs = [BytesIO() for _ in range(3)]
for o in outs: wb.save(o)
assert all(o.tell() > 0 for o in outs)
record(label, "Triple save (path)", True)
except Exception as e:
record(label, "Triple save (path)", False, str(e))
# ââ Test 4: Single save (BytesIO) âââââââââââââââââââââ
try:
buf = make_bytesio_png()
wb = make_workbook_with_image_bytesio(buf)
out = BytesIO(); wb.save(out)
assert out.tell() > 0
record(label, "Single save (BytesIO)", True)
except Exception as e:
record(label, "Single save (BytesIO)", False, str(e))
# ââ Test 5: Double save (BytesIO) â core bug ââââââââââ
try:
buf = make_bytesio_png()
wb = make_workbook_with_image_bytesio(buf)
out1 = BytesIO(); wb.save(out1)
out2 = BytesIO(); wb.save(out2)
assert out1.tell() > 0 and out2.tell() > 0
record(label, "Double save (BytesIO)", True)
except Exception as e:
record(label, "Double save (BytesIO)", False, str(e))
# ââ Test 6: Double save (BytesIO explicitly closed) âââ
try:
buf = make_bytesio_png()
wb = make_workbook_with_image_bytesio(buf)
out1 = BytesIO(); wb.save(out1)
buf.close()
out2 = BytesIO(); wb.save(out2)
assert out2.tell() > 0
record(label, "Double save (BytesIO closed after 1st save)", True)
except Exception as e:
record(label, "Double save (BytesIO closed after 1st save)", False, str(e))
# ââ Test 7: Image data integrity (path) âââââââââââââââ
try:
img_obj = RealImage("/tmp/test_image.png")
data = img_obj._data()
assert isinstance(data, bytes) and len(data) > 0
PILImage.open(BytesIO(data)).verify()
record(label, "Image data integrity (path)", True)
except Exception as e:
record(label, "Image data integrity (path)", False, str(e))
# ââ Test 8: Image data integrity (BytesIO) ââââââââââââ
try:
buf = make_bytesio_png()
img_obj = RealImage(buf)
data = img_obj._data()
assert isinstance(data, bytes) and len(data) > 0
PILImage.open(BytesIO(data)).verify()
record(label, "Image data integrity (BytesIO)", True)
except Exception as e:
record(label, "Image data integrity (BytesIO)", False, str(e))
# ââ Test 9: JPEG format preserved âââââââââââââââââââââ
try:
img_obj = RealImage("/tmp/test_image.jpg")
assert img_obj.format == "jpeg", f"expected jpeg, got {img_obj.format}"
data = img_obj._data()
assert data[:2] == b'\xff\xd8', "Not a valid JPEG signature"
record(label, "JPEG format preserved (no silent conversion)", True)
except Exception as e:
record(label, "JPEG format preserved (no silent conversion)", False, str(e))
# ââ Test 10: Output is valid xlsx (re-loadable) ââââââââ
try:
wb = make_workbook_with_image_path()
out = BytesIO(); wb.save(out)
out.seek(0)
wb2 = openpyxl.load_workbook(out)
assert wb2.active["A1"].value == "Hello"
record(label, "Output is valid xlsx (re-loadable)", True)
except Exception as e:
record(label, "Output is valid xlsx (re-loadable)", False, str(e))
finally:
RealImage._data = original_data
RealImage.__init__ = original_init
# âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
# Main
# âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
if __name__ == "__main__":
print("\n" + "â"*57)
print(" Bug #1132566 â openpyxl multi-save image test suite")
print("â"*57)
run_tests("ORIGINAL (3.1.5)", _data_original)
run_tests("PATCH (Josenilson)", _data_patched)
run_tests("PROPOSED (cache __init__)", _data_proposed, _init_proposed)
print(f"\n{'â'*57}")
print(" SUMMARY")
print(f"{'â'*57}")
by_impl = {}
for impl, name, ok in results:
by_impl.setdefault(impl, []).append((name, ok))
for impl, tests in by_impl.items():
passed = sum(1 for _, ok in tests if ok)
total = len(tests)
bar = "â" * passed + "â" * (total - passed)
color = "\033[92m" if passed == total else "\033[91m"
print(f" {color}{impl:<37} {bar} {passed}/{total}\033[0m")
all_pass = all(ok for _, _, ok in results)
print()
if all_pass:
print(" \033[92mAll tests passed!\033[0m")
else:
failed = [(i, n) for i, n, ok in results if not ok]
print(f" \033[91m{len(failed)} test(s) failed:\033[0m")
for impl, name in failed:
print(f" ⢠[{impl}] {name}")
print()
--
debian-science-maintainers mailing list
[email protected]
https://alioth-lists.debian.net/cgi-bin/mailman/listinfo/debian-science-maintainers