Hi All, We are facing a crash in VPP's Timer wheel INFRA. Please find the details below.
Version : *19.08* Configuration: *2 workers and the main thread.* Bactraces: thread apply all bt Thread 1 (Thread 0x7ff41d586d00 (LWP 253)): ---Type <return> to continue, or q <return> to quit--- #0 0x00007ff41c696722 in __GI___waitpid (pid=707, stat_loc=stat_loc@entry=0x7ff39f18ca18, options=options@entry=0) at ../sysdeps/unix/sysv/linux/waitpid.c:30 #1 0x00007ff41c601107 in do_system (line=<optimized out>) at ../sysdeps/posix/system.c:149 #2 0x00007ff41d11a76b in bd_signal_handler_cb (signo=6) at /development/librtbrickinfra/bd/src/bd.c:770 #3 0x00007ff410ce907b in rtb_bd_signal_handler (signo=6) at /development/libvpp/src/vlib/unix/main.c:80 #4 0x00007ff410ce9416 in unix_signal_handler (signum=6, si=0x7ff39f18d1f0, uc=0x7ff39f18d0c0) at /development/libvpp/src/vlib/unix/main.c:180 #5 <signal handler called> #6 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51 #7 0x00007ff41c5f28b1 in __GI_abort () at abort.c:79 #8 0x00007ff41248ee66 in os_panic () at /development/libvpp/src/vpp/vnet/main.c:559 #9 0x00007ff410922825 in debugger () at /development/libvpp/src/vppinfra/error.c:84 #10 0x00007ff410922bf4 in _clib_error (how_to_die=2, function_name=0x0, line_number=0, fmt=0x7ff4109e8a78 "%s:%d (%s) assertion `%s' fails") at /development/libvpp/src/vppinfra/error.c:143 #11 0x00007ff4109a64dd in tw_timer_expire_timers_internal_1t_3w_1024sl_ov ( tw=0x7ff39fdf7a40, now=327.59939269500001, ---Type <return> to continue, or q <return> to quit--- callback_vector_arg=0x7ff39fdfab00) at /development/libvpp/src/vppinfra/tw_timer_template.c:753 #12 0x00007ff4109a6b36 in tw_timer_expire_timers_vec_1t_3w_1024sl_ov ( tw=0x7ff39fdf7a40, now=327.59939269500001, vec=0x7ff39fdfab00) at /development/libvpp/src/vppinfra/tw_timer_template.c:814 #13 0x00007ff410c8321a in vlib_main_or_worker_loop ( vm=0x7ff410f22e40 <vlib_global_main>, is_main=1) at /development/libvpp/src/vlib/main.c:1859 #14 0x00007ff410c83965 in vlib_main_loop (vm=0x7ff410f22e40 <vlib_global_main>) at /development/libvpp/src/vlib/main.c:1930 #15 0x00007ff410c8462c in vlib_main (vm=0x7ff410f22e40 <vlib_global_main>, input=0x7ff39f18dfb0) at /development/libvpp/src/vlib/main.c:2147 #16 0x00007ff410ceabc9 in thread0 (arg=140686233054784) at /development/libvpp/src/vlib/unix/main.c:666 #17 0x00007ff410943600 in clib_calljmp () from /usr/local/lib/libvppinfra.so.1.0.1 #18 0x00007ffe4d981390 in ?? () #19 0x00007ff410ceb13f in vlib_unix_main (argc=55, argv=0x556c398eb100) at /development/libvpp/src/vlib/unix/main.c:736 #20 0x00007ff41248e7cb in rtb_vpp_core_init (argc=55, argv=0x556c398eb100) at /development/libvpp/src/vpp/vnet/main.c:483 #21 0x00007ff41256189a in rtb_vpp_main () at /development/libvpp/src/vpp/rtbrick/rtb_vpp_main.c:113 ---Type <return> to continue, or q <return> to quit--- #22 0x00007ff41d11a15a in bd_load_daemon_lib ( dmn_lib_cfg=0x7ff41d337860 <bd_json_global+21408>) at /development/librtbrickinfra/bd/src/bd.c:627 #23 0x00007ff41d11a205 in bd_load_all_daemon_libs () at /development/librtbrickinfra/bd/src/bd.c:646 #24 0x00007ff41d11b676 in bd_start_process () at /development/librtbrickinfra/bd/src/bd.c:1128 #25 0x00007ff419e92200 in bds_bd_init () at /development/librtbrickinfra/libbds/code/bds/src/bds.c:651 #26 0x00007ff419f1aa5d in pubsub_bd_init_expiry (data=0x0) at /development/librtbrickinfra/libbds/code/pubsub/src/pubsub_helper.c:1412 #27 0x00007ff41cc23070 in timer_dispatch (item=0x556c39997cf0, p=QB_LOOP_HIGH) at /development/librtbrickinfra/libqb/lib/loop_timerlist.c:56 #28 0x00007ff41cc1f006 in qb_loop_run_level (level=0x556c366fb3e0) at /development/librtbrickinfra/libqb/lib/loop.c:43 #29 0x00007ff41cc1f77b in qb_loop_run (lp=0x556c366fb370) at /development/librtbrickinfra/libqb/lib/loop.c:210 #30 0x00007ff41cc30b3f in lib_qb_service_start_event_loop () at /development/librtbrickinfra/libqb/lib/wrapper/lib_qb_service.c:257 #31 0x0000556c358c7153 in main () Thread 11 (Thread 0x7ff35b622700 (LWP 413)): #0 rtb_vpp_shm_rx_burst (port_id=3, queue_id=0, burst_size=64 '@') at /development/libvpp/src/vpp/rtbrick/rtb_vpp_shm_node.c:317 #1 0x00007ff4125ee043 in rtb_vpp_shm_device_input (vm=0x7ff39f89ac80, shmm=0x7ff41285e180 <rtb_vpp_shm_main_ctx>, shmif=0x7ff39f8ad940, node=0x7ff39d461480, frame=0x0, thread_index=2, queue_id=0) at /development/libvpp/src/vpp/rtbrick/rtb_vpp_shm_node.c:359 #2 0x00007ff4125ee839 in rtb_vpp_shm_input_node_fn (vm=0x7ff39f89ac80, node=0x7ff39d461480, f=0x0) at /development/libvpp/src/vpp/rtbrick/rtb_vpp_shm_node.c:452 #3 0x00007ff410c80cef in dispatch_node (vm=0x7ff39f89ac80, node=0x7ff39d461480, type=VLIB_NODE_TYPE_INPUT, dispatch_state=VLIB_NODE_STATE_POLLING, frame=0x0, last_time_stamp=11572457044265548) at /development/libvpp/src/vlib/main.c:1207 #4 0x00007ff410c82e50 in vlib_main_or_worker_loop (vm=0x7ff39f89ac80, is_main=0) at /development/libvpp/src/vlib/main.c:1781 #5 0x00007ff410c83985 in vlib_worker_loop (vm=0x7ff39f89ac80) at /development/libvpp/src/vlib/main.c:1936 #6 0x00007ff410cc26af in vlib_worker_thread_fn (arg=0x7ff39d6eba80) at /development/libvpp/src/vlib/threads.c:1803 #7 0x00007ff410943600 in clib_calljmp () ---Type <return> to continue, or q <return> to quit--- from /usr/local/lib/libvppinfra.so.1.0.1 #8 0x00007ff35b621ec0 in ?? () #9 0x00007ff410cbcc46 in vlib_worker_thread_bootstrap_fn (arg=0x7ff39d6eba80) at /development/libvpp/src/vlib/threads.c:573 Backtrace stopped: previous frame inner to this frame (corrupt stack?) Thread 10 (Thread 0x7ff35be23700 (LWP 412)): #0 vlib_node_runtime_update_main_loop_vector_stats (vm=0x7ff39f89a540, node=0x7ff3a0248940, n_vectors=0) at /development/libvpp/src/vlib/node_funcs.h:1066 #1 0x00007ff410c7e5df in vlib_node_runtime_update_stats (vm=0x7ff39f89a540, node=0x7ff3a0248940, n_calls=1, n_vectors=0, n_clocks=8543, n_ticks0=0, n_ticks1=0) at /development/libvpp/src/vlib/main.c:657 #2 0x00007ff410c80df9 in dispatch_node (vm=0x7ff39f89a540, node=0x7ff3a0248940, type=VLIB_NODE_TYPE_INPUT, dispatch_state=VLIB_NODE_STATE_POLLING, frame=0x0, last_time_stamp=11572457044240565) at /development/libvpp/src/vlib/main.c:1227 #3 0x00007ff410c82e50 in vlib_main_or_worker_loop (vm=0x7ff39f89a540, is_main=0) at /development/libvpp/src/vlib/main.c:1781 #4 0x00007ff410c83985 in vlib_worker_loop (vm=0x7ff39f89a540) at /development/libvpp/src/vlib/main.c:1936 #5 0x00007ff410cc26af in vlib_worker_thread_fn (arg=0x7ff39d6eb980) ---Type <return> to continue, or q <return> to quit--- at /development/libvpp/src/vlib/threads.c:1803 #6 0x00007ff410943600 in clib_calljmp () from /usr/local/lib/libvppinfra.so.1.0.1 #7 0x00007ff35be22ec0 in ?? () #8 0x00007ff410cbcc46 in vlib_worker_thread_bootstrap_fn (arg=0x7ff39d6eb980) at /development/libvpp/src/vlib/threads.c:573 Backtrace stopped: previous frame inner to this frame (corrupt stack?) The ASSERT at line 753 seems to be hit, the next_index seems to be invalid. 750 <http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L750> */* Construct vector of expired timer handles to give the user */*751 <http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L751> while (next_index != ts->head_index)752 <http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L752> {753 <http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L753> t <http://rajith/lxr/http/ident?sn=vpp-19-08;i=t> = pool_elt_at_index <http://rajith/lxr/http/ident?sn=vpp-19-08;i=pool_elt_at_index> (tw->timers, next_index <http://rajith/lxr/http/ident?sn=vpp-19-08;i=next_index>);754 <http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L754> next_index <http://rajith/lxr/http/ident?sn=vpp-19-08;i=next_index> = t <http://rajith/lxr/http/ident?sn=vpp-19-08;i=t>->next;755 <http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L755> vec_add1 <http://rajith/lxr/http/ident?sn=vpp-19-08;i=vec_add1> (callback_vector, t <http://rajith/lxr/http/ident?sn=vpp-19-08;i=t>->user_handle);756 <http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L756> #if TW_START_STOP_TRACE_SIZE <http://rajith/lxr/http/ident?sn=vpp-19-08;i=TW_START_STOP_TRACE_SIZE> > 0757 > <http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L757> TW <http://rajith/lxr/http/ident?sn=vpp-19-08;i=TW> (tw_timer_trace <http://rajith/lxr/http/ident?sn=vpp-19-08;i=tw_timer_trace>) (tw, 0xfe, t <http://rajith/lxr/http/ident?sn=vpp-19-08;i=t>->user_handle, t <http://rajith/lxr/http/ident?sn=vpp-19-08;i=t> - tw->timers);758 <http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L758> #endif759 <http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L759> pool_put (tw->timers, t <http://rajith/lxr/http/ident?sn=vpp-19-08;i=t>);760 <http://rajith/lxr/http/source/src/vppinfra/tw_timer_template.c?sn=vpp-19-08#L760> } Thanks, Rajith
-=-=-=-=-=-=-=-=-=-=-=- Links: You receive all messages sent to this group. View/Reply Online (#17310): https://lists.fd.io/g/vpp-dev/message/17310 Mute This Topic: https://lists.fd.io/mt/76576572/21656 Group Owner: vpp-dev+ow...@lists.fd.io Unsubscribe: https://lists.fd.io/g/vpp-dev/unsub [arch...@mail-archive.com] -=-=-=-=-=-=-=-=-=-=-=-