Pablo Galindo Salgado <pablog...@gmail.com> added the comment:

Ok, I was able to reproduce:

❯ gcc --version
gcc (GCC) 10.1.0
Copyright (C) 2020 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

make distclean
./configure --with-address-sanitizer --with-undefined-behavior-sanitizer
LSAN_OPTIONS="suppressions=asan-suppression.txt,print_suppressions=0"  make -j

❯ LSAN_OPTIONS="suppressions=asan-suppression.txt,print_suppressions=0" 
./python -m test test_eof
0:00:00 load avg: 1.82 Run tests sequentially
0:00:00 load avg: 1.82 [1/1] test_eof
test test_eof failed -- Traceback (most recent call last):
  File "/home/pablogsal/github/python/master/Lib/test/test_eof.py", line 54, in 
test_line_continuation_EOF_from_file_bpo2180
    self.assertIn(b'unexpected EOF while parsing', err)
AssertionError: b'unexpected EOF while parsing' not found in 
b'Parser/tokenizer.c:978:50: runtime error: pointer index expression with base 
0x625000016900 overflowed to 0xbebebebebebee6be\n=====================
============================================\n==27549==ERROR: AddressSanitizer: 
heap-buffer-overflow on address 0x606000027c51 at pc 0x5612210ca7d4 bp 
0x7fffe6e9ff70 sp 0x7fffe6e9ff60\nREAD of size 1 at 0x60600
0027c51 thread T0\n    #0 0x5612210ca7d3 in ascii_decode 
Objects/unicodeobject.c:4941\n    #1 0x5612211c9f4a in unicode_decode_utf8 
Objects/unicodeobject.c:4999\n    #2 0x5612219201bd in byte_offset_to_characte
r_offset Parser/pegen.c:148\n    #3 0x5612219201bd in 
_PyPegen_raise_error_known_location Parser/pegen.c:412\n    #4 0x561221920e4d 
in _PyPegen_raise_error Parser/pegen.c:373\n    #5 0x561221924981 in tokenizer
_error Parser/pegen.c:321\n    #6 0x561221924981 in _PyPegen_fill_token 
Parser/pegen.c:638\n    #7 0x56122192777f in _PyPegen_expect_token 
Parser/pegen.c:753\n    #8 0x56122193817a in _tmp_15_rule Parser/parser
.c:16184\n    #9 0x5612219274f9 in _PyPegen_lookahead 
(/home/pablogsal/github/python/master/python+0x1c344f9)\n    #10 0x56122199de2c 
in compound_stmt_rule Parser/parser.c:1860\n    #11 0x5612219a65c2 in statem
ent_rule Parser/parser.c:1224\n    #12 0x5612219a65c2 in _loop1_11_rule 
Parser/parser.c:15954\n    #13 0x5612219a65c2 in statements_rule 
Parser/parser.c:1183\n    #14 0x5612219aa4b7 in file_rule Parser/parser.c
:716\n    #15 0x5612219aa4b7 in _PyPegen_parse Parser/parser.c:24401\n    #16 
0x56122192a768 in _PyPegen_run_parser Parser/pegen.c:1077\n    #17 
0x56122192b3ef in _PyPegen_run_parser_from_file_pointer Parser/pe
gen.c:1137\n    #18 0x5612213da3c6 in PyRun_FileExFlags 
Python/pythonrun.c:1057\n    #19 0x5612213da72c in PyRun_SimpleFileExFlags 
Python/pythonrun.c:400\n    #20 0x561220df0dbb in pymain_run_file Modules/main.
c:369\n    #21 0x561220df0dbb in pymain_run_python Modules/main.c:553\n    #22 
0x561220df3154 in Py_RunMain Modules/main.c:632\n    #23 0x561220df3154 in 
pymain_main Modules/main.c:662\n    #24 0x561220df3154 i
n Py_BytesMain Modules/main.c:686\n    #25 0x7f981bf9a001 in __libc_start_main 
(/usr/lib/libc.so.6+0x27001)\n    #26 0x561220ded48d in _start 
(/home/pablogsal/github/python/master/python+0x10fa48d)\n\n0x6060000
27c51 is located 0 bytes to the right of 49-byte region 
[0x606000027c20,0x606000027c51)\nallocated by thread T0 here:\n    #0 
0x7f981ccce459 in __interceptor_malloc 
/build/gcc/src/gcc/libsanitizer/asan/asan_mal
loc_linux.cpp:145\n    #1 0x5612210dfa1d in PyUnicode_New 
Objects/unicodeobject.c:1437\n    #2 0x56122121324b in _PyUnicode_Init 
Objects/unicodeobject.c:15535\n    #3 0x5612213ae5c3 in pycore_init_types 
Python/
pylifecycle.c:599\n    #4 0x5612213ae5c3 in pycore_interp_init 
Python/pylifecycle.c:724\n    #5 0x5612213b8b4b in pyinit_config 
Python/pylifecycle.c:765\n    #6 0x5612213b8b4b in pyinit_core 
Python/pylifecycle.
c:926\n    #7 0x5612213bab6c in Py_InitializeFromConfig 
Python/pylifecycle.c:1136\n    #8 0x561220ded752 in pymain_init 
Modules/main.c:66\n    #9 0x561220df310a in pymain_main Modules/main.c:653\n    
#10 0x5612
20df310a in Py_BytesMain Modules/main.c:686\n    #11 0x7f981bf9a001 in 
__libc_start_main (/usr/lib/libc.so.6+0x27001)\n\nSUMMARY: AddressSanitizer: 
heap-buffer-overflow Objects/unicodeobject.c:4941 in ascii_dec
ode\nShadow bytes around the buggy address:\n  0x0c0c7fffcf30: 00 00 00 00 00 
00 00 00 fa fa fa fa 00 00 00 00\n  0x0c0c7fffcf40: 00 00 00 07 fa fa fa fa 00 
00 00 00 00 00 00 00\n  0x0c0c7fffcf50: fa fa fa fa 0
0 00 00 00 00 00 00 05 fa fa fa fa\n  0x0c0c7fffcf60: 00 00 00 00 00 00 00 00 
fa fa fa fa 00 00 00 00\n  0x0c0c7fffcf70: 00 00 00 00 fa fa fa fa 00 00 00 00 
00 00 00 01\n=>0x0c0c7fffcf80: fa fa fa fa 00 00 00 0
0 00 00[01]fa fa fa fa fa\n  0x0c0c7fffcf90: 00 00 00 00 00 00 00 00 fa fa fa 
fa 00 00 00 00\n  0x0c0c7fffcfa0: 00 00 05 fa fa fa fa fa 00 00 00 00 00 00 00 
fa\n  0x0c0c7fffcfb0: fa fa fa fa 00 00 00 00 00 00 0
0 00 fa fa fa fa\n  0x0c0c7fffcfc0: fd fd fd fd fd fd fd fd fa fa fa fa fd fd 
fd fd\n  0x0c0c7fffcfd0: fd fd fd fd fa fa fa fa 00 00 00 00 00 00 00 
fa\nShadow byte legend (one shadow byte represents 8 applicati
on bytes):\n  Addressable:           00\n  Partially addressable: 01 02 03 04 
05 06 07 \n  Heap left redzone:       fa\n  Freed heap region:       fd\n  
Stack left redzone:      f1\n  Stack mid redzone:       f
2\n  Stack right redzone:     f3\n  Stack after return:      f5\n  Stack use 
after scope:   f8\n  Global redzone:          f9\n  Global init order:       
f6\n  Poisoned by user:        f7\n  Container overflow:
      fc\n  Array cookie:            ac\n  Intra object redzone:    bb\n  ASan 
internal:           fe\n  Left alloca redzone:     ca\n  Right alloca redzone:  
  cb\n  Shadow gap:              cc\n==27549==ABORT
ING\n'

test_eof failed

== Tests result: FAILURE ==

1 test failed:
    test_eof

Total duration: 359 ms
Tests result: FAILURE


----------

With this patch

diff --git a/Parser/pegen.c b/Parser/pegen.c
index e29910bf86..a9f24ca5fa 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -145,15 +145,15 @@ byte_offset_to_character_offset(PyObject *line, int 
col_offset)
     if (!str) {
         return 0;
     }
+    Py_ssize_t linesize = PyUnicode_GET_LENGTH(line);
+    if (col_offset > linesize) {
+        col_offset = (int)linesize;
+    }
     PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
     if (!text) {
         return 0;
     }
     Py_ssize_t size = PyUnicode_GET_LENGTH(text);
-    str = PyUnicode_AsUTF8(text);
-    if (str != NULL && (int)strlen(str) == col_offset) {
-        size = strlen(str);
-    }
     Py_DECREF(text);
     return size;
 }
@@ -400,9 +400,6 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject 
*errtype,

     if (!error_line) {
         Py_ssize_t size = p->tok->inp - p->tok->buf;
-        if (size && p->tok->buf[size-1] == '\n') {
-            size--;
-        }
         error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
         if (!error_line) {
             goto error;


❯ LSAN_OPTIONS="suppressions=asan-suppression.txt,print_suppressions=0" 
./python -m test test_eof
0:00:00 load avg: 1.39 Run tests sequentially
0:00:00 load avg: 1.39 [1/1] test_eof

== Tests result: SUCCESS ==

1 test OK.

Total duration: 500 ms
Tests result: SUCCESS

----------

_______________________________________
Python tracker <rep...@bugs.python.org>
<https://bugs.python.org/issue40958>
_______________________________________
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to