Pablo Galindo Salgado <pablog...@gmail.com> added the comment: Ok, I was able to reproduce:
❯ gcc --version gcc (GCC) 10.1.0 Copyright (C) 2020 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. make distclean ./configure --with-address-sanitizer --with-undefined-behavior-sanitizer LSAN_OPTIONS="suppressions=asan-suppression.txt,print_suppressions=0" make -j ❯ LSAN_OPTIONS="suppressions=asan-suppression.txt,print_suppressions=0" ./python -m test test_eof 0:00:00 load avg: 1.82 Run tests sequentially 0:00:00 load avg: 1.82 [1/1] test_eof test test_eof failed -- Traceback (most recent call last): File "/home/pablogsal/github/python/master/Lib/test/test_eof.py", line 54, in test_line_continuation_EOF_from_file_bpo2180 self.assertIn(b'unexpected EOF while parsing', err) AssertionError: b'unexpected EOF while parsing' not found in b'Parser/tokenizer.c:978:50: runtime error: pointer index expression with base 0x625000016900 overflowed to 0xbebebebebebee6be\n===================== ============================================\n==27549==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x606000027c51 at pc 0x5612210ca7d4 bp 0x7fffe6e9ff70 sp 0x7fffe6e9ff60\nREAD of size 1 at 0x60600 0027c51 thread T0\n #0 0x5612210ca7d3 in ascii_decode Objects/unicodeobject.c:4941\n #1 0x5612211c9f4a in unicode_decode_utf8 Objects/unicodeobject.c:4999\n #2 0x5612219201bd in byte_offset_to_characte r_offset Parser/pegen.c:148\n #3 0x5612219201bd in _PyPegen_raise_error_known_location Parser/pegen.c:412\n #4 0x561221920e4d in _PyPegen_raise_error Parser/pegen.c:373\n #5 0x561221924981 in tokenizer _error Parser/pegen.c:321\n #6 0x561221924981 in _PyPegen_fill_token Parser/pegen.c:638\n #7 0x56122192777f in _PyPegen_expect_token Parser/pegen.c:753\n #8 0x56122193817a in _tmp_15_rule Parser/parser .c:16184\n #9 0x5612219274f9 in _PyPegen_lookahead (/home/pablogsal/github/python/master/python+0x1c344f9)\n #10 0x56122199de2c in compound_stmt_rule Parser/parser.c:1860\n #11 0x5612219a65c2 in statem ent_rule Parser/parser.c:1224\n #12 0x5612219a65c2 in _loop1_11_rule Parser/parser.c:15954\n #13 0x5612219a65c2 in statements_rule Parser/parser.c:1183\n #14 0x5612219aa4b7 in file_rule Parser/parser.c :716\n #15 0x5612219aa4b7 in _PyPegen_parse Parser/parser.c:24401\n #16 0x56122192a768 in _PyPegen_run_parser Parser/pegen.c:1077\n #17 0x56122192b3ef in _PyPegen_run_parser_from_file_pointer Parser/pe gen.c:1137\n #18 0x5612213da3c6 in PyRun_FileExFlags Python/pythonrun.c:1057\n #19 0x5612213da72c in PyRun_SimpleFileExFlags Python/pythonrun.c:400\n #20 0x561220df0dbb in pymain_run_file Modules/main. c:369\n #21 0x561220df0dbb in pymain_run_python Modules/main.c:553\n #22 0x561220df3154 in Py_RunMain Modules/main.c:632\n #23 0x561220df3154 in pymain_main Modules/main.c:662\n #24 0x561220df3154 i n Py_BytesMain Modules/main.c:686\n #25 0x7f981bf9a001 in __libc_start_main (/usr/lib/libc.so.6+0x27001)\n #26 0x561220ded48d in _start (/home/pablogsal/github/python/master/python+0x10fa48d)\n\n0x6060000 27c51 is located 0 bytes to the right of 49-byte region [0x606000027c20,0x606000027c51)\nallocated by thread T0 here:\n #0 0x7f981ccce459 in __interceptor_malloc /build/gcc/src/gcc/libsanitizer/asan/asan_mal loc_linux.cpp:145\n #1 0x5612210dfa1d in PyUnicode_New Objects/unicodeobject.c:1437\n #2 0x56122121324b in _PyUnicode_Init Objects/unicodeobject.c:15535\n #3 0x5612213ae5c3 in pycore_init_types Python/ pylifecycle.c:599\n #4 0x5612213ae5c3 in pycore_interp_init Python/pylifecycle.c:724\n #5 0x5612213b8b4b in pyinit_config Python/pylifecycle.c:765\n #6 0x5612213b8b4b in pyinit_core Python/pylifecycle. c:926\n #7 0x5612213bab6c in Py_InitializeFromConfig Python/pylifecycle.c:1136\n #8 0x561220ded752 in pymain_init Modules/main.c:66\n #9 0x561220df310a in pymain_main Modules/main.c:653\n #10 0x5612 20df310a in Py_BytesMain Modules/main.c:686\n #11 0x7f981bf9a001 in __libc_start_main (/usr/lib/libc.so.6+0x27001)\n\nSUMMARY: AddressSanitizer: heap-buffer-overflow Objects/unicodeobject.c:4941 in ascii_dec ode\nShadow bytes around the buggy address:\n 0x0c0c7fffcf30: 00 00 00 00 00 00 00 00 fa fa fa fa 00 00 00 00\n 0x0c0c7fffcf40: 00 00 00 07 fa fa fa fa 00 00 00 00 00 00 00 00\n 0x0c0c7fffcf50: fa fa fa fa 0 0 00 00 00 00 00 00 05 fa fa fa fa\n 0x0c0c7fffcf60: 00 00 00 00 00 00 00 00 fa fa fa fa 00 00 00 00\n 0x0c0c7fffcf70: 00 00 00 00 fa fa fa fa 00 00 00 00 00 00 00 01\n=>0x0c0c7fffcf80: fa fa fa fa 00 00 00 0 0 00 00[01]fa fa fa fa fa\n 0x0c0c7fffcf90: 00 00 00 00 00 00 00 00 fa fa fa fa 00 00 00 00\n 0x0c0c7fffcfa0: 00 00 05 fa fa fa fa fa 00 00 00 00 00 00 00 fa\n 0x0c0c7fffcfb0: fa fa fa fa 00 00 00 00 00 00 0 0 00 fa fa fa fa\n 0x0c0c7fffcfc0: fd fd fd fd fd fd fd fd fa fa fa fa fd fd fd fd\n 0x0c0c7fffcfd0: fd fd fd fd fa fa fa fa 00 00 00 00 00 00 00 fa\nShadow byte legend (one shadow byte represents 8 applicati on bytes):\n Addressable: 00\n Partially addressable: 01 02 03 04 05 06 07 \n Heap left redzone: fa\n Freed heap region: fd\n Stack left redzone: f1\n Stack mid redzone: f 2\n Stack right redzone: f3\n Stack after return: f5\n Stack use after scope: f8\n Global redzone: f9\n Global init order: f6\n Poisoned by user: f7\n Container overflow: fc\n Array cookie: ac\n Intra object redzone: bb\n ASan internal: fe\n Left alloca redzone: ca\n Right alloca redzone: cb\n Shadow gap: cc\n==27549==ABORT ING\n' test_eof failed == Tests result: FAILURE == 1 test failed: test_eof Total duration: 359 ms Tests result: FAILURE ---------- With this patch diff --git a/Parser/pegen.c b/Parser/pegen.c index e29910bf86..a9f24ca5fa 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -145,15 +145,15 @@ byte_offset_to_character_offset(PyObject *line, int col_offset) if (!str) { return 0; } + Py_ssize_t linesize = PyUnicode_GET_LENGTH(line); + if (col_offset > linesize) { + col_offset = (int)linesize; + } PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace"); if (!text) { return 0; } Py_ssize_t size = PyUnicode_GET_LENGTH(text); - str = PyUnicode_AsUTF8(text); - if (str != NULL && (int)strlen(str) == col_offset) { - size = strlen(str); - } Py_DECREF(text); return size; } @@ -400,9 +400,6 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, if (!error_line) { Py_ssize_t size = p->tok->inp - p->tok->buf; - if (size && p->tok->buf[size-1] == '\n') { - size--; - } error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace"); if (!error_line) { goto error; ❯ LSAN_OPTIONS="suppressions=asan-suppression.txt,print_suppressions=0" ./python -m test test_eof 0:00:00 load avg: 1.39 Run tests sequentially 0:00:00 load avg: 1.39 [1/1] test_eof == Tests result: SUCCESS == 1 test OK. Total duration: 500 ms Tests result: SUCCESS ---------- _______________________________________ Python tracker <rep...@bugs.python.org> <https://bugs.python.org/issue40958> _______________________________________ _______________________________________________ Python-bugs-list mailing list Unsubscribe: https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com