[Python-checkins] gh-130273: Fix traceback color output with unicode characters (GH-142529)

ambv Tue, 07 Apr 2026 06:05:42 -0700

https://github.com/python/cpython/commit/dfeb160bc35f0ba16800d07b85cb11598d1cd307
commit: dfeb160bc35f0ba16800d07b85cb11598d1cd307
branch: main
author: grayjk <[email protected]>
committer: ambv <[email protected]>
date: 2026-04-07T15:05:23+02:00
summary:


gh-130273: Fix traceback color output with unicode characters (GH-142529)

Account for the display width of Unicode characters so that colors and 
underlining in traceback output is correct.

Co-authored-by: Łukasz Langa <[email protected]>
Co-authored-by: Victor Stinner <[email protected]>

files:
A Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst
M Lib/_pyrepl/utils.py
M Lib/test/test_traceback.py
M Lib/traceback.py

diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py
index 7175d57a9e319e..d399b4cf53c82a 100644
--- a/Lib/_pyrepl/utils.py
+++ b/Lib/_pyrepl/utils.py
@@ -16,6 +16,7 @@
 from .types import CharBuffer, CharWidths
 from .trace import trace
 
+
 ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
 ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
 ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index 5dc11253e0d5c8..909808825f055e 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -1790,6 +1790,7 @@ def f():
         ]
         self.assertEqual(result_lines, expected)
 
+
 class TestKeywordTypoSuggestions(unittest.TestCase):
     TYPO_CASES = [
         ("with block ad something:\n  pass", "and"),
@@ -5414,6 +5415,92 @@ def expected(t, m, fn, l, f, E, e, z, n):
         ]
         self.assertEqual(actual, expected(**colors))
 
+    def test_colorized_traceback_unicode(self):
+        try:
+            啊哈=1; 啊哈/0####
+        except Exception as e:
+            exc = traceback.TracebackException.from_exception(e)
+
+        actual = "".join(exc.format(colorize=True)).splitlines()
+        def expected(t, m, fn, l, f, E, e, z, n):
+            return [
+                f"    啊哈=1; {e}啊哈{z}{E}/{z}{e}0{z}####",
+                f"            {e}~~~~{z}{E}^{z}{e}~{z}",
+            ]
+        self.assertEqual(actual[2:4], expected(**colors))
+
+        try:
+            ééééé/0
+        except Exception as e:
+            exc = traceback.TracebackException.from_exception(e)
+
+        actual = "".join(exc.format(colorize=True)).splitlines()
+        def expected(t, m, fn, l, f, E, e, z, n):
+            return [
+                f"    {E}ééééé{z}/0",
+                f"    {E}^^^^^{z}",
+            ]
+        self.assertEqual(actual[2:4], expected(**colors))
+
+    def test_colorized_syntax_error_ascii_display_width(self):
+        """Caret alignment for ASCII edge cases handled by _wlen.
+
+        The old ASCII fast track in _display_width returned the raw character
+        offset for ASCII strings, which is wrong for CTRL-Z (display width 2)
+        and ANSI escape sequences (display width 0).
+        """
+        E = colors["E"]
+        z = colors["z"]
+        t = colors["t"]
+        m = colors["m"]
+        fn = colors["fn"]
+        l = colors["l"]
+
+        def _make_syntax_error(text, offset, end_offset):
+            err = SyntaxError("invalid syntax")
+            err.filename = "<string>"
+            err.lineno = 1
+            err.end_lineno = 1
+            err.text = text
+            err.offset = offset
+            err.end_offset = end_offset
+            return err
+
+        # CTRL-Z (\x1a) is ASCII but displayed as ^Z (2 columns).
+        # Verify caret aligns when CTRL-Z precedes the error.
+        err = _make_syntax_error("a\x1a$\n", offset=3, end_offset=4)
+        exc = traceback.TracebackException.from_exception(err)
+        actual = "".join(exc.format(colorize=True))
+        # 'a' (1 col) + '\x1a' (2 cols) = 3 cols before '$'
+        self.assertIn(
+            f'  File {fn}"<string>"{z}, line {l}1{z}\n'
+            f'    a\x1a{E}${z}\n'
+            f'    {" " * 3}{E}^{z}\n'
+            f'{t}SyntaxError{z}: {m}invalid syntax{z}\n',
+            actual,
+        )
+
+        # CTRL-Z in the highlighted (error) region counts as 2 columns.
+        err = _make_syntax_error("$\x1a\n", offset=1, end_offset=3)
+        exc = traceback.TracebackException.from_exception(err)
+        actual = "".join(exc.format(colorize=True))
+        # '$' (1 col) + '\x1a' (2 cols) = 3 columns of carets
+        self.assertIn(
+            f'    {E}$\x1a{z}\n'
+            f'    {E}{"^" * 3}{z}\n',
+            actual,
+        )
+
+        # ANSI escape sequences are ASCII but take 0 display columns.
+        err = _make_syntax_error("a\x1b[1mb$\n", offset=7, end_offset=8)
+        exc = traceback.TracebackException.from_exception(err)
+        actual = "".join(exc.format(colorize=True))
+        # 'a' (1 col) + '\x1b[1m' (0 cols) + 'b' (1 col) = 2 before '$'
+        self.assertIn(
+            f'    a\x1b[1mb{E}${z}\n'
+            f'    {" " * 2}{E}^{z}\n',
+            actual,
+        )
 
 class TestLazyImportSuggestions(unittest.TestCase):
     """Test that lazy imports are not reified when computing AttributeError 
suggestions."""
diff --git a/Lib/traceback.py b/Lib/traceback.py
index 1f9f151ebf5d39..343d0e5f108c35 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -1,9 +1,11 @@
 """Extract, format and print information about Python stack traces."""
 
 import collections.abc
+import functools
 import itertools
 import linecache
 import os
+import re
 import sys
 import textwrap
 import types
@@ -684,12 +686,12 @@ def output_line(lineno):
                         colorized_line_parts = []
                         colorized_carets_parts = []
 
-                        for color, group in 
itertools.groupby(itertools.zip_longest(line, carets, fillvalue=""), key=lambda 
x: x[1]):
+                        for color, group in 
itertools.groupby(_zip_display_width(line, carets), key=lambda x: x[1]):
                             caret_group = list(group)
-                            if color == "^":
+                            if "^" in color:
                                 
colorized_line_parts.append(theme.error_highlight + "".join(char for char, _ in 
caret_group) + theme.reset)
                                 
colorized_carets_parts.append(theme.error_highlight + "".join(caret for _, 
caret in caret_group) + theme.reset)
-                            elif color == "~":
+                            elif "~" in color:
                                 colorized_line_parts.append(theme.error_range 
+ "".join(char for char, _ in caret_group) + theme.reset)
                                 
colorized_carets_parts.append(theme.error_range + "".join(caret for _, caret in 
caret_group) + theme.reset)
                             else:
@@ -971,7 +973,54 @@ def setup_positions(expr, force_valid=True):
 
     return None
 
-_WIDE_CHAR_SPECIFIERS = "WF"
+
+def _zip_display_width(line, carets):
+    carets = iter(carets)
+    if line.isascii() and '\x1a' not in line:
+        for char in line:
+            yield char, next(carets, "")
+        return
+
+    import unicodedata
+    for char in unicodedata.iter_graphemes(line):
+        char = str(char)
+        char_width = _display_width(char)
+        yield char, "".join(itertools.islice(carets, char_width))
+
+
[email protected]
+def _str_width(c: str) -> int:
+    # copied from _pyrepl.utils to fix gh-130273
+
+    if ord(c) < 128:
+        return 1
+    import unicodedata
+    # gh-139246 for zero-width joiner and combining characters
+    if unicodedata.combining(c):
+        return 0
+    category = unicodedata.category(c)
+    if category == "Cf" and c != "\u00ad":
+        return 0
+    w = unicodedata.east_asian_width(c)
+    if w in ("N", "Na", "H", "A"):
+        return 1
+    return 2
+
+
+_ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
+
+
+def _wlen(s: str) -> int:
+    # copied from _pyrepl.utils to fix gh-130273
+
+    if len(s) == 1 and s != "\x1a":
+        return _str_width(s)
+    length = sum(_str_width(i) for i in s)
+    # remove lengths of any escape sequences
+    sequence = _ANSI_ESCAPE_SEQUENCE.findall(s)
+    ctrl_z_cnt = s.count("\x1a")
+    return length - sum(len(i) for i in sequence) + ctrl_z_cnt
+
 
 def _display_width(line, offset=None):
     """Calculate the extra amount of width space the given source
@@ -979,18 +1028,9 @@ def _display_width(line, offset=None):
     width output device. Supports wide unicode characters and emojis."""
 
     if offset is None:
-        offset = len(line)
-
-    # Fast track for ASCII-only strings
-    if line.isascii():
-        return offset
+        return _wlen(line)
 
-    import unicodedata
-
-    return sum(
-        2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1
-        for char in line[:offset]
-    )
+    return _wlen(line[:offset])
 
 
 def _format_note(note, indent, theme):
diff --git 
a/Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst 
b/Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst
new file mode 100644
index 00000000000000..2e0695334fd71e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst
@@ -0,0 +1 @@
+Fix traceback color output with Unicode characters.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

[Python-checkins] gh-130273: Fix traceback color output with unicode characters (GH-142529)

Reply via email to