Given the amount of soak-time / perf/scale / stress testing which the tw_timer 
code has experienced, it’s reasonably likely that your application is 
responsible.

Caution is required when dealing with timers other than the timer which has 
expired.

If you have > 1 timer per object and you manipulate timer B when timer A 
expires, there’s no guarantee that timer B isn’t already on the expired timer 
list. That’s almost always good for trouble.

HTH... Dave

From: vpp-dev@lists.fd.io <vpp-dev@lists.fd.io> On Behalf Of Rajith PR via 
lists.fd.io
Sent: Wednesday, September 2, 2020 12:39 AM
To: vpp-dev <vpp-dev@lists.fd.io>
Subject: [vpp-dev]: Crash in Timer wheel infra

Hi All,

We are facing a crash in VPP's Timer wheel INFRA. Please find the details below.

Version : 19.08
Configuration: 2 workers and the main thread.
Bactraces: thread apply all bt


Thread 1 (Thread 0x7ff41d586d00 (LWP 253)):

---Type <return> to continue, or q <return> to quit---

#0  0x00007ff41c696722 in __GI___waitpid (pid=707,

    stat_loc=stat_loc@entry=0x7ff39f18ca18, options=options@entry=0)

    at ../sysdeps/unix/sysv/linux/waitpid.c:30

#1  0x00007ff41c601107 in do_system (line=<optimized out>)

    at ../sysdeps/posix/system.c:149

#2  0x00007ff41d11a76b in bd_signal_handler_cb (signo=6)

    at /development/librtbrickinfra/bd/src/bd.c:770

#3  0x00007ff410ce907b in rtb_bd_signal_handler (signo=6)

    at /development/libvpp/src/vlib/unix/main.c:80

#4  0x00007ff410ce9416 in unix_signal_handler (signum=6, si=0x7ff39f18d1f0,

    uc=0x7ff39f18d0c0) at /development/libvpp/src/vlib/unix/main.c:180

#5  <signal handler called>

#6  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51

#7  0x00007ff41c5f28b1 in __GI_abort () at abort.c:79

#8  0x00007ff41248ee66 in os_panic ()

    at /development/libvpp/src/vpp/vnet/main.c:559

#9  0x00007ff410922825 in debugger ()

    at /development/libvpp/src/vppinfra/error.c:84

#10 0x00007ff410922bf4 in _clib_error (how_to_die=2, function_name=0x0,

    line_number=0, fmt=0x7ff4109e8a78 "%s:%d (%s) assertion `%s' fails")

    at /development/libvpp/src/vppinfra/error.c:143

#11 0x00007ff4109a64dd in tw_timer_expire_timers_internal_1t_3w_1024sl_ov (

    tw=0x7ff39fdf7a40, now=327.59939269500001,

---Type <return> to continue, or q <return> to quit---

    callback_vector_arg=0x7ff39fdfab00)

    at /development/libvpp/src/vppinfra/tw_timer_template.c:753

#12 0x00007ff4109a6b36 in tw_timer_expire_timers_vec_1t_3w_1024sl_ov (

    tw=0x7ff39fdf7a40, now=327.59939269500001, vec=0x7ff39fdfab00)

    at /development/libvpp/src/vppinfra/tw_timer_template.c:814

#13 0x00007ff410c8321a in vlib_main_or_worker_loop (

    vm=0x7ff410f22e40 <vlib_global_main>, is_main=1)

    at /development/libvpp/src/vlib/main.c:1859

#14 0x00007ff410c83965 in vlib_main_loop (vm=0x7ff410f22e40 <vlib_global_main>)

    at /development/libvpp/src/vlib/main.c:1930

#15 0x00007ff410c8462c in vlib_main (vm=0x7ff410f22e40 <vlib_global_main>,

    input=0x7ff39f18dfb0) at /development/libvpp/src/vlib/main.c:2147

#16 0x00007ff410ceabc9 in thread0 (arg=140686233054784)

    at /development/libvpp/src/vlib/unix/main.c:666

#17 0x00007ff410943600 in clib_calljmp ()

   from /usr/local/lib/libvppinfra.so.1.0.1

#18 0x00007ffe4d981390 in ?? ()

#19 0x00007ff410ceb13f in vlib_unix_main (argc=55, argv=0x556c398eb100)

    at /development/libvpp/src/vlib/unix/main.c:736

#20 0x00007ff41248e7cb in rtb_vpp_core_init (argc=55, argv=0x556c398eb100)

    at /development/libvpp/src/vpp/vnet/main.c:483

#21 0x00007ff41256189a in rtb_vpp_main ()

    at /development/libvpp/src/vpp/rtbrick/rtb_vpp_main.c:113

---Type <return> to continue, or q <return> to quit---

#22 0x00007ff41d11a15a in bd_load_daemon_lib (

    dmn_lib_cfg=0x7ff41d337860 <bd_json_global+21408>)

    at /development/librtbrickinfra/bd/src/bd.c:627

#23 0x00007ff41d11a205 in bd_load_all_daemon_libs ()

    at /development/librtbrickinfra/bd/src/bd.c:646

#24 0x00007ff41d11b676 in bd_start_process ()

    at /development/librtbrickinfra/bd/src/bd.c:1128

#25 0x00007ff419e92200 in bds_bd_init ()

    at /development/librtbrickinfra/libbds/code/bds/src/bds.c:651

#26 0x00007ff419f1aa5d in pubsub_bd_init_expiry (data=0x0)

    at /development/librtbrickinfra/libbds/code/pubsub/src/pubsub_helper.c:1412

#27 0x00007ff41cc23070 in timer_dispatch (item=0x556c39997cf0, p=QB_LOOP_HIGH)

    at /development/librtbrickinfra/libqb/lib/loop_timerlist.c:56

#28 0x00007ff41cc1f006 in qb_loop_run_level (level=0x556c366fb3e0)

    at /development/librtbrickinfra/libqb/lib/loop.c:43

#29 0x00007ff41cc1f77b in qb_loop_run (lp=0x556c366fb370)

    at /development/librtbrickinfra/libqb/lib/loop.c:210

#30 0x00007ff41cc30b3f in lib_qb_service_start_event_loop ()

    at /development/librtbrickinfra/libqb/lib/wrapper/lib_qb_service.c:257

#31 0x0000556c358c7153 in main ()

Thread 11 (Thread 0x7ff35b622700 (LWP 413)):

#0  rtb_vpp_shm_rx_burst (port_id=3, queue_id=0, burst_size=64 '@')

    at /development/libvpp/src/vpp/rtbrick/rtb_vpp_shm_node.c:317

#1  0x00007ff4125ee043 in rtb_vpp_shm_device_input (vm=0x7ff39f89ac80,

    shmm=0x7ff41285e180 <rtb_vpp_shm_main_ctx>, shmif=0x7ff39f8ad940,

    node=0x7ff39d461480, frame=0x0, thread_index=2, queue_id=0)

    at /development/libvpp/src/vpp/rtbrick/rtb_vpp_shm_node.c:359

#2  0x00007ff4125ee839 in rtb_vpp_shm_input_node_fn (vm=0x7ff39f89ac80,

    node=0x7ff39d461480, f=0x0)

    at /development/libvpp/src/vpp/rtbrick/rtb_vpp_shm_node.c:452

#3  0x00007ff410c80cef in dispatch_node (vm=0x7ff39f89ac80,

    node=0x7ff39d461480, type=VLIB_NODE_TYPE_INPUT,

    dispatch_state=VLIB_NODE_STATE_POLLING, frame=0x0,

    last_time_stamp=11572457044265548)

    at /development/libvpp/src/vlib/main.c:1207

#4  0x00007ff410c82e50 in vlib_main_or_worker_loop (vm=0x7ff39f89ac80,

    is_main=0) at /development/libvpp/src/vlib/main.c:1781

#5  0x00007ff410c83985 in vlib_worker_loop (vm=0x7ff39f89ac80)

    at /development/libvpp/src/vlib/main.c:1936

#6  0x00007ff410cc26af in vlib_worker_thread_fn (arg=0x7ff39d6eba80)

    at /development/libvpp/src/vlib/threads.c:1803

#7  0x00007ff410943600 in clib_calljmp ()

---Type <return> to continue, or q <return> to quit---

   from /usr/local/lib/libvppinfra.so.1.0.1

#8  0x00007ff35b621ec0 in ?? ()

#9  0x00007ff410cbcc46 in vlib_worker_thread_bootstrap_fn (arg=0x7ff39d6eba80)

    at /development/libvpp/src/vlib/threads.c:573

Backtrace stopped: previous frame inner to this frame (corrupt stack?)



Thread 10 (Thread 0x7ff35be23700 (LWP 412)):

#0  vlib_node_runtime_update_main_loop_vector_stats (vm=0x7ff39f89a540,

    node=0x7ff3a0248940, n_vectors=0)

    at /development/libvpp/src/vlib/node_funcs.h:1066

#1  0x00007ff410c7e5df in vlib_node_runtime_update_stats (vm=0x7ff39f89a540,

    node=0x7ff3a0248940, n_calls=1, n_vectors=0, n_clocks=8543, n_ticks0=0,

    n_ticks1=0) at /development/libvpp/src/vlib/main.c:657

#2  0x00007ff410c80df9 in dispatch_node (vm=0x7ff39f89a540,

    node=0x7ff3a0248940, type=VLIB_NODE_TYPE_INPUT,

    dispatch_state=VLIB_NODE_STATE_POLLING, frame=0x0,

    last_time_stamp=11572457044240565)

    at /development/libvpp/src/vlib/main.c:1227

#3  0x00007ff410c82e50 in vlib_main_or_worker_loop (vm=0x7ff39f89a540,

    is_main=0) at /development/libvpp/src/vlib/main.c:1781

#4  0x00007ff410c83985 in vlib_worker_loop (vm=0x7ff39f89a540)

    at /development/libvpp/src/vlib/main.c:1936

#5  0x00007ff410cc26af in vlib_worker_thread_fn (arg=0x7ff39d6eb980)

---Type <return> to continue, or q <return> to quit---

    at /development/libvpp/src/vlib/threads.c:1803

#6  0x00007ff410943600 in clib_calljmp ()

   from /usr/local/lib/libvppinfra.so.1.0.1

#7  0x00007ff35be22ec0 in ?? ()

#8  0x00007ff410cbcc46 in vlib_worker_thread_bootstrap_fn (arg=0x7ff39d6eb980)

    at /development/libvpp/src/vlib/threads.c:573

Backtrace stopped: previous frame inner to this frame (corrupt stack?)
The ASSERT at line 753 seems to be hit, the next_index seems to be invalid.


750<http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L750>
       /* Construct vector of expired timer handles to give the user */

751<http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L751>
       while (next_index != ts->head_index)

752<http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L752>
         {

753<http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L753>
           t<http://rajith/lxr/http/ident?sn=vpp-19-08;i=t> = 
pool_elt_at_index<http://rajith/lxr/http/ident?sn=vpp-19-08;i=pool_elt_at_index>
 (tw->timers, 
next_index<http://rajith/lxr/http/ident?sn=vpp-19-08;i=next_index>);

754<http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L754>
           next_index<http://rajith/lxr/http/ident?sn=vpp-19-08;i=next_index> = 
t<http://rajith/lxr/http/ident?sn=vpp-19-08;i=t>->next;

755<http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L755>
           vec_add1<http://rajith/lxr/http/ident?sn=vpp-19-08;i=vec_add1> 
(callback_vector, 
t<http://rajith/lxr/http/ident?sn=vpp-19-08;i=t>->user_handle);

756<http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L756>
 #if 
TW_START_STOP_TRACE_SIZE<http://rajith/lxr/http/ident?sn=vpp-19-08;i=TW_START_STOP_TRACE_SIZE>
 > 0

757<http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L757>
           TW<http://rajith/lxr/http/ident?sn=vpp-19-08;i=TW> 
(tw_timer_trace<http://rajith/lxr/http/ident?sn=vpp-19-08;i=tw_timer_trace>) 
(tw, 0xfe, t<http://rajith/lxr/http/ident?sn=vpp-19-08;i=t>->user_handle, 
t<http://rajith/lxr/http/ident?sn=vpp-19-08;i=t> - tw->timers);

758<http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L758>
 #endif

759<http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L759>
           pool_put (tw->timers, 
t<http://rajith/lxr/http/ident?sn=vpp-19-08;i=t>);

760<http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L760>
         }



Thanks,

Rajith


-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.

View/Reply Online (#17317): https://lists.fd.io/g/vpp-dev/message/17317
Mute This Topic: https://lists.fd.io/mt/76576572/21656
Group Owner: vpp-dev+ow...@lists.fd.io
Unsubscribe: https://lists.fd.io/g/vpp-dev/unsub  [arch...@mail-archive.com]
-=-=-=-=-=-=-=-=-=-=-=-

Reply via email to