Hi,
I've tried poking this bug, here are the finding so far. Unoptimized
binary works fine. Binary with optimizations and additional -ggdb flag
reliably crashes in one of two places (probably depends on memory
layout or some other external factors):
Crash #1
========
Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0xf6e426e0 (LWP 2216)]
0xf787c264 in names_mark_index (nt=<value optimized out>, nidx=<value
optimized out>) at ./src/iname.c:338
338 if (pnstr->mark)
(gdb) list
333 bool
334 names_mark_index(name_table * nt, name_index_t nidx)
335 {
336 name_string_t *pnstr = names_index_string_inline(nt,
nidx);
337
338 if (pnstr->mark)
339 return false;
340 pnstr->mark = 1;
341 return true;
(gdb) disass
Dump of assembler code for function names_mark_index:
0xf787c244 <names_mark_index+0>: and %o1, 0xff, %g1
0xf787c248 <names_mark_index+4>: srl %o1, 8, %o1
0xf787c24c <names_mark_index+8>: add %o1, 0x203, %o1
0xf787c250 <names_mark_index+12>: sll %o1, 3, %o1
0xf787c254 <names_mark_index+16>: add %o0, %o1, %o0
0xf787c258 <names_mark_index+20>: sll %g1, 3, %g4
0xf787c25c <names_mark_index+24>: ld [ %o0 + 8 ], %g2
0xf787c260 <names_mark_index+28>: sethi %hi(0x4000), %g3
0xf787c264 <names_mark_index+32>: ld [ %g2 + %g4 ], %g1
0xf787c268 <names_mark_index+36>: btst %g1, %g3
0xf787c26c <names_mark_index+40>: bne 0xf787c280 <names_mark_index+60>
0xf787c270 <names_mark_index+44>: clr %o0
0xf787c274 <names_mark_index+48>: or %g1, %g3, %g1
0xf787c278 <names_mark_index+52>: mov 1, %o0
0xf787c27c <names_mark_index+56>: st %g1, [ %g2 + %g4 ]
0xf787c280 <names_mark_index+60>: retl
0xf787c284 <names_mark_index+64>: nop
End of assembler dump.
(gdb) bt
#0 0xf787c264 in names_mark_index (nt=<value optimized out>, nidx=<value
optimized out>) at ./src/iname.c:338
#1 0xf7879260 in gc_trace (rp=<value optimized out>, pstate=0xff8154c0,
pmstack=<value optimized out>) at ./src/igc.c:974
#2 0xf78799ac in gs_gc_reclaim (pspaces=0x436a8, global=1) at ./src/igc.c:326
#3 0xf78f73f4 in context_reclaim (pspaces=0x436a8, global=1) at
./src/zcontext.c:283
#4 0xf7855d8c in ireclaim (dmem=0x436a4, space=8) at ./src/ireclaim.c:153
#5 0xf7851bfc in interp_reclaim (pi_ctx_p=0x22174, space=8) at
./src/interp.c:427
#6 0xf7848824 in gs_main_finit (minst=0x22120, exit_status=0, code=0) at
./src/imain.c:752
#7 0xf784c500 in gsapi_exit (lib=<value optimized out>) at ./src/iapi.c:261
#8 0x000109b4 in main (argc=1, argv=<value optimized out>) at
./src/dxmainc.c:88
By breaking at the entry point of this function I was able to figure
out that the crash here is most probably due to a atypically large
value of nidx (nidx == 46812) passed by the caller:
(gdb) up
#1 0xf7879260 in gc_trace (rp=<value optimized out>, pstate=0xff8154c0,
pmstack=<value optimized out>) at ./src/igc.c:974
974 mark_name(names_index(nt, rptr));
(gdb) list
969 case t_mixedarray:
970 case t_shortarray:
971 nptr = rptr->value.writable_packed;
972 goto rr;
973 case t_name:
974 mark_name(names_index(nt, rptr));
975 nr:pptr = (ref_packed *) (rptr + 1);
976 goto tr;
977 case t_string:
978 if (gc_string_mark(rptr->value.bytes, r_size(rptr),
true, pstate))
(gdb) disass
[...]
0xf7879250 <gc_trace+968>: lduh [ %l0 + 2 ], %o1
0xf7879254 <gc_trace+972>: ld [ %fp + -28 ], %o0
0xf7879258 <gc_trace+976>: call 0xf7c98024 <[EMAIL PROTECTED]>
0xf787925c <gc_trace+980>: mov %l4, %l0
0xf7879260 <gc_trace+984>: b 0xf787908c <gc_trace+516>
0xf7879264 <gc_trace+988>: ld [ %l1 + 4 ], %g1
0xf7879268 <gc_trace+992>: b 0xf7879088 <gc_trace+512>
0xf787926c <gc_trace+996>: add %l0, 8, %l0
[...]
nidx is passed in %o1, which comes from the memory location %l0 + 2.
After working out all the defines, it turns out the %l0 contains rptr
(or so I think).
Crash #2
========
Program received signal SIGBUS, Bus error.
[Switching to Thread 0xf6dce6e0 (LWP 2220)]
gc_trace (rp=<value optimized out>, pstate=0xffad94c0, pmstack=<value optimized
out>) at ./src/igc.c:920
920 nptr = rptr->value.pfile;
(gdb) list
915 }
916 sp->ptr = rptr + 1;
917 switch (r_type(rptr)) {
918 /* Struct cases */
919 case t_file:
920 nptr = rptr->value.pfile;
921 rs:sp[1].is_refs = false;
922 sp[1].index = 0;
923 if (sp == stop) {
924 ptp = ptr_struct_type;
(gdb) print $pc
$1 = (void (*)()) 0xf7805144 <gc_trace+700>
(gdb) disass
[...]
0xf780513c <gc_trace+692>: jmp %g2 + %g3
0xf7805140 <gc_trace+696>: nop
0xf7805144 <gc_trace+700>: ld [ %l0 + 4 ], %l3
0xf7805148 <gc_trace+704>: clr [ %l1 + 0x14 ]
0xf780514c <gc_trace+708>: clr [ %l2 + 4 ]
0xf7805150 <gc_trace+712>: cmp %i4, %l1
0xf7805154 <gc_trace+716>: be 0xf780531c <gc_trace+1172>
0xf7805158 <gc_trace+720>: mov %l2, %i2
0xf780515c <gc_trace+724>: cmp %l3, 0
0xf7805160 <gc_trace+728>: be 0xf7805184 <gc_trace+764>
0xf7805164 <gc_trace+732>: st %l3, [ %fp + -24 ]
0xf7805168 <gc_trace+736>: ld [ %l3 + -16 ], %g2
0xf780516c <gc_trace+740>: sethi %hi(0x7ffffc00), %g4
0xf7805170 <gc_trace+744>: or %g4, 0x3ff, %g4 ! 0x7fffffff
0xf7805174 <gc_trace+748>: and %g2, %g4, %g1
0xf7805178 <gc_trace+752>: cmp %g1, %g4
0xf780517c <gc_trace+756>: be 0xf78051b0 <gc_trace+808>
0xf7805180 <gc_trace+760>: add %l3, -16, %g3
0xf7805184 <gc_trace+764>: b 0xf7805088 <gc_trace+512>
0xf7805188 <gc_trace+768>: mov %l4, %l0
[...]
I believe that %l0 contains rptr and the third line of this assembly
dump tries to load rptr->value.pfile into %l3. So, it seems like rptr,
which is passed around in %l0 gets corrupted somehow. Next step is to
try and figure out where this corruption happens.
Cheers.
--
Jurij Smakov [EMAIL PROTECTED]
Key: http://www.wooyd.org/pgpkey/ KeyID: C99E03CC
--
To UNSUBSCRIBE, email to [EMAIL PROTECTED]
with a subject of "unsubscribe". Trouble? Contact [EMAIL PROTECTED]