https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114872

--- Comment #23 from Sergei Trofimovich <slyfox at gcc dot gnu.org> ---
At SIGSEGV site the code is an unconditional NULL dereference due to
dereference of `xor %esi,%esi` result from `gdb`.

797         if (op != _Py_NULL) {
   0x00007f940c871563 <+2563>:  cmpq   $0x0,-0xc8(%rbp)
   0x00007f940c87156b <+2571>:  je     0x7f940c871583
<__pyx_pf_4sage_4libs_3gap_7element_19GapElement_Function_2__call__+2595>

242         return _Py_CAST(PY_INT32_T, op->ob_refcnt) < 0;
   0x00007f940c87156d <+2573>:  xor    %esi,%esi
=> 0x00007f940c87156f <+2575>:  mov    (%rsi),%rax

In `element-verbose.S` it is:

# /usr/include/python3.12/object.h:797:     if (op != _Py_NULL) {
    .loc 5 797 8 is_stmt 0 view .LVU65876
    cmpq $0, -200(%rbp)<>#, %sfp
    je<---->.L12727>#,
    .loc 5 798 9 is_stmt 1 view .LVU65877
.LVL15705:
.LBB49946:
.LBI49946:
    .loc 5 696 37 view .LVU65878
.LBB49947:
    .loc 5 700 5 view .LVU65879
.LBB49948:
.LBI49948:
    .loc 5 239 36 view .LVU65880
.LBB49949:
    .loc 5 242 5 view .LVU65881
# /usr/include/python3.12/object.h:242:     return _Py_CAST(PY_INT32_T,
op->ob_refcnt) < 0;
    .loc 5 242 12 is_stmt 0 view .LVU65882
    xorl %esi, %esi # r
    movq (%rsi), %rax # __pyx_t_6_208(ab)->D.11083.ob_refcnt, _991

Looking at other sites in `element-verbose.S` for comparison do try to use
`-0xc8(%rbp)` contents:

# /usr/include/python3.12/object.h:797:     if (op != _Py_NULL) {
    .loc 5 797 8 is_stmt 0 view .LVU66162
    cmpq $0, -200(%rbp) #, %sfp
    je .L12782>#,
    .loc 5 798 9 is_stmt 1 view .LVU66163
.LVL15760:
.LBB50093:
.LBI50093:
    .loc 5 696 37 view .LVU66164
.LBB50094:
    .loc 5 700 5 view .LVU66165
.LBB50095:
.LBI50095:
    .loc 5 239 36 view .LVU66166
.LBB50096:
    .loc 5 242 5 view .LVU66167
# /usr/include/python3.12/object.h:242:     return _Py_CAST(PY_INT32_T,
op->ob_refcnt) < 0;
    .loc 5 242 12 is_stmt 0 view .LVU66168
    movq -200(%rbp), %rdx # %sfp, r
    movq (%rdx), %rax # __pyx_t_6_10(ab)->D.11083.ob_refcnt, _1070

Thus my guess is that something clobbered `-200(%rbp)` value across
setjmp()/longjmp().

Trying to trace:

$ gdb -p `pgrep sage-ipython`
(gdb) break __pyx_pf_4sage_4libs_3gap_7element_19GapElement_Function_2__call__
(gdb) continue

    # trigger break with with ` libgap.AbelianGroup(0,0,0)`

(gdb) disassemble
Dump of assembler code for function
__pyx_pf_4sage_4libs_3gap_7element_19GapElement_Function_2__call__:
=> 0x00007f4ed9981b60 <+0>:     push   %rbp
   0x00007f4ed9981b61 <+1>:     mov    %rsp,%rbp

    # Populating `%rbp`:

(gdb) nexti
(gdb) nexti
(gdb) disassemble
Dump of assembler code for function
__pyx_pf_4sage_4libs_3gap_7element_19GapElement_Function_2__call__:
   0x00007f4ed9981b60 <+0>:     push   %rbp
   0x00007f4ed9981b61 <+1>:     mov    %rsp,%rbp
=> 0x00007f4ed9981b64 <+4>:     push   %r15

(gdb) print $rbp-200
$2 = (void *) 0x7ffd2824c5e8

(gdb) watch *(int*)(void *) 0x7ffd2824c5e8
Hardware watchpoint 2: *(int*)(void *) 0x7ffd2824c5e8

(gdb) continue
Continuing.

Thread 1 "sage-ipython" hit Hardware watchpoint 2: *(int*)(void *)
0x7ffd2824c5e8

Old value = 673498624
New value = 0
0x00007f98e609d2a8 in
__pyx_pf_4sage_4libs_3gap_7element_19GapElement_Function_2__call__ (
    __pyx_v_self=__pyx_v_self@entry=0x7f98dfe70dc0,
    __pyx_v_args=__pyx_v_args@entry=(<sage.rings.integer.Integer at remote
0x7f98e4722c40>, <sage.rings.integer.Integer at remote 0x7f98dfe7afd0>,
<sage.rings.integer.Integer at remote 0x7f98e01dd5c0>))
    at
/usr/src/debug/sci-mathematics/sagemath-standard-10.3/sagemath-standard-10.3-python3_12/build/cythonized/sage/libs/gap/element.c:26192
26192       __pyx_t_6 = NULL;

NULL store.

(gdb) continue
Continuing.

Thread 1 "sage-ipython" hit Hardware watchpoint 2: *(int*)(void *)
0x7ffd2824c5e8

Old value = 0
New value = -538669696
__Pyx_GetItemInt_List_Fast (wraparound=0, boundscheck=1, i=2,
    o=[<sage.libs.gap.element.GapElement_Integer at remote 0x7f98e0ac5c00>,
<sage.libs.gap.element.GapElement_Integer at remote 0x7f98dfe4b500>,
<sage.libs.gap.element.GapElement_Integer at remote 0x7f98dfe48d80>])
    at
/usr/src/debug/sci-mathematics/sagemath-standard-10.3/sagemath-standard-10.3-python3_12/build/cythonized/sage/libs/gap/element.c:38070
38070           Py_INCREF(r);

Create an object?

(gdb) continue
Continuing.

Thread 1 "sage-ipython" received signal SIGABRT, Aborted.
0x00007f99428617a7 in __GI_kill () at ../sysdeps/unix/syscall-template.S:120
120     T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS)

Abort.

(gdb) continue
Continuing.

Thread 1 "sage-ipython" received signal SIGSEGV, Segmentation fault.
0x00007f98e609c56f in _Py_IsImmortal (op=0x0) at
/usr/include/python3.12/object.h:242
242         return _Py_CAST(PY_INT32_T, op->ob_refcnt) < 0;

SIGSEGV.

Note that all two memory references happen before longjmp() (the ABORT).

Why did `gcc` generate unconditional NULL dereference here? I suspect it
somehow inferred that `__pyx_t_6 = NULL;` in that branch, but not before
comparison.

Reply via email to