Hi Benoit, I have all those fixes. I had reported this issue (27407), the others i found during my tests and added barrier protection in all those places. This ASSERT seems to be not due to pool expansion, as what I have seen mostly in those cases is an invalid pointer access causing a SIGSEGV. Here its an abort triggered from *pool_elt_at_index *as the index is already freed.
Is it not possible that the main thread freed the pool entry, while the worker thread was holding an index. The backtraces I have attached indicate that the main thread was freeing fib entries. Thanks, Rajith On Tue, Jul 7, 2020 at 5:50 PM Benoit Ganne (bganne) <bga...@cisco.com> wrote: > Hi Rajith, > > You are probably missing https://gerrit.fd.io/r/c/vpp/+/27407 > https://gerrit.fd.io/r/c/vpp/+/27454 and maybe > https://gerrit.fd.io/r/c/vpp/+/27448 > > Best > ben > > > -----Original Message----- > > From: vpp-dev@lists.fd.io <vpp-dev@lists.fd.io> On Behalf Of Rajith PR > via > > lists.fd.io > > Sent: mardi 7 juillet 2020 14:11 > > To: vpp-dev <vpp-dev@lists.fd.io> > > Subject: [vpp-dev]: ASSERT in load_balance_get() > > > > Hi All, > > > > During our scale testing of routes we have hit an ASSERT in > > load_balance_get() . From the code it looks like the lb_index(148) > > referred to is already returned to the pool by the main thread causing > the > > ASSERT in the worker. The version in 19.08. We have two workers and a > > main thread. > > > > Any inputs to fix the issue is highly appreciated. > > > > The complete bt is pasted below: > > > > Thread 11 (Thread 0x7f988ebe3700 (LWP 398)): > > #0 0x00007f99464cef54 in vlib_worker_thread_barrier_check () at > > /development/libvpp/src/vlib/threads.h:430 > > #1 0x00007f99464d6b9b in vlib_main_or_worker_loop (vm=0x7f98d8ba3540, > > is_main=0) at /development/libvpp/src/vlib/main.c:1744 > > #2 0x00007f99464d7971 in vlib_worker_loop (vm=0x7f98d8ba3540) at > > /development/libvpp/src/vlib/main.c:1934 > > #3 0x00007f994651669b in vlib_worker_thread_fn (arg=0x7f98d6191a40) at > > /development/libvpp/src/vlib/threads.c:1803 > > #4 0x00007f9946197600 in clib_calljmp () from > > /usr/local/lib/libvppinfra.so.1.0.1 > > #5 0x00007f988ebe2ec0 in ?? () > > #6 0x00007f9946510c32 in vlib_worker_thread_bootstrap_fn > > (arg=0x7f98d6191a40) at /development/libvpp/src/vlib/threads.c:573 > > Backtrace stopped: previous frame inner to this frame (corrupt stack?) > > > > Thread 10 (Thread 0x7f988f3e4700 (LWP 397)): > > #0 0x00007f995217d722 in __GI___waitpid (pid=2595, > > stat_loc=stat_loc@entry=0x7f98d8fd0118, options=options@entry=0) at > > ../sysdeps/unix/sysv/linux/waitpid.c:30 > > #1 0x00007f99520e8107 in do_system (line=<optimized out>) at > > ../sysdeps/posix/system.c:149 > > #2 0x00007f9952c016ca in bd_signal_handler_cb (signo=6) at > > /development/librtbrickinfra/bd/src/bd.c:770 > > #3 0x00007f994653d0ac in rtb_bd_signal_handler (signo=6) at > > /development/libvpp/src/vlib/unix/main.c:80 > > #4 0x00007f994653d447 in unix_signal_handler (signum=6, > > si=0x7f98d8fd08f0, uc=0x7f98d8fd07c0) at > > /development/libvpp/src/vlib/unix/main.c:180 > > #5 <signal handler called> > > #6 __GI_raise (sig=sig@entry=6) at > ../sysdeps/unix/sysv/linux/raise.c:51 > > #7 0x00007f99520d98b1 in __GI_abort () at abort.c:79 > > #8 0x00007f9947cdec86 in os_panic () at > > /development/libvpp/src/vpp/vnet/main.c:559 > > #9 0x00007f9946176825 in debugger () at > > /development/libvpp/src/vppinfra/error.c:84 > > #10 0x00007f9946176bf4 in _clib_error (how_to_die=2, function_name=0x0, > > line_number=0, fmt=0x7f99475271b8 "%s:%d (%s) assertion `%s' fails") > > at /development/libvpp/src/vppinfra/error.c:143 > > #11 0x00007f99468d046c in load_balance_get (lbi=148) at > > /development/libvpp/src/vnet/dpo/load_balance.h:222 > > #12 0x00007f99468d4d44 in ip4_local_check_src (b=0x1002535e00, > > ip0=0x1002535f52, last_check=0x7f98d8fd1234, error0=0x7f98d8fd11e8 > > "\016\r") > > at /development/libvpp/src/vnet/ip/ip4_forward.c:1583 > > #13 0x00007f99468d58e1 in ip4_local_inline (vm=0x7f98d8ba2e40, > > node=0x7f98d8711f40, frame=0x7f98d9585bc0, head_of_feature_arc=1) > > at /development/libvpp/src/vnet/ip/ip4_forward.c:1870 > > #14 0x00007f99468d5a08 in ip4_local_node_fn_avx2 (vm=0x7f98d8ba2e40, > > node=0x7f98d8711f40, frame=0x7f98d9585bc0) at > > /development/libvpp/src/vnet/ip/ip4_forward.c:1889 > > #15 0x00007f99464d4cef in dispatch_node (vm=0x7f98d8ba2e40, > > node=0x7f98d8711f40, type=VLIB_NODE_TYPE_INTERNAL, > > dispatch_state=VLIB_NODE_STATE_POLLING, > > frame=0x7f98d9585bc0, last_time_stamp=531887125605324) at > > /development/libvpp/src/vlib/main.c:1207 > > #16 0x00007f99464d54aa in dispatch_pending_node (vm=0x7f98d8ba2e40, > > pending_frame_index=3, last_time_stamp=531887125605324) at > > /development/libvpp/src/vlib/main.c:1375 > > #17 0x00007f99464d70ee in vlib_main_or_worker_loop (vm=0x7f98d8ba2e40, > > is_main=0) at /development/libvpp/src/vlib/main.c:1826 > > #18 0x00007f99464d7971 in vlib_worker_loop (vm=0x7f98d8ba2e40) at > > /development/libvpp/src/vlib/main.c:1934 > > #19 0x00007f994651669b in vlib_worker_thread_fn (arg=0x7f98d6191940) at > > /development/libvpp/src/vlib/threads.c:1803 > > #20 0x00007f9946197600 in clib_calljmp () from > > /usr/local/lib/libvppinfra.so.1.0.1 > > #21 0x00007f988f3e3ec0 in ?? () > > #22 0x00007f9946510c32 in vlib_worker_thread_bootstrap_fn > > (arg=0x7f98d6191940) at /development/libvpp/src/vlib/threads.c:573 > > Backtrace stopped: previous frame inner to this frame (corrupt stack?) > > > > Thread 1 (Thread 0x7f995306c740 (LWP 249)): > > #0 0x00007f994650dd9e in clib_time_now (c=0x7f9946776e40 > > <vlib_global_main>) at /development/libvpp/src/vppinfra/time.h:217 > > #1 0x00007f994650de74 in vlib_time_now (vm=0x7f9946776e40 > > <vlib_global_main>) at /development/libvpp/src/vlib/main.h:268 > > #2 0x00007f99465159ab in vlib_worker_thread_barrier_sync_int > > (vm=0x7f9946776e40 <vlib_global_main>, > > func_name=0x7f994764e2d0 <__FUNCTION__.42472> "adj_last_lock_gone") > at > > /development/libvpp/src/vlib/threads.c:1486 > > #3 0x00007f994743fb9f in adj_last_lock_gone (adj=0x7f98d5f0fc00) at > > /development/libvpp/src/vnet/adj/adj.c:256 > > #4 0x00007f994744062e in adj_node_last_lock_gone (node=0x7f98d5f0fc00) > at > > /development/libvpp/src/vnet/adj/adj.c:546 > > #5 0x00007f99473e74be in fib_node_unlock (node=0x7f98d5f0fc00) at > > /development/libvpp/src/vnet/fib/fib_node.c:215 > > #6 0x00007f9947440186 in adj_unlock (adj_index=37) at > > /development/libvpp/src/vnet/adj/adj.c:346 > > #7 0x00007f99474277ba in adj_dpo_unlock (dpo=0x7f98d8bb5288) at > > /development/libvpp/src/vnet/adj/adj_nbr.c:1008 > > #8 0x00007f994744b9bd in dpo_unlock (dpo=0x7f98d8bb5288) at > > /development/libvpp/src/vnet/dpo/dpo.c:378 > > #9 0x00007f994744b379 in dpo_copy (dst=0x7f98d896dba0, > > src=0x7f98d8bb52b8) at /development/libvpp/src/vnet/dpo/dpo.c:274 > > #10 0x00007f994744b2ef in dpo_reset (dpo=0x7f98d896dba0) at > > /development/libvpp/src/vnet/dpo/dpo.c:239 > > #11 0x00007f994745b394 in load_balance_destroy (lb=0x7f98d896db80) at > > /development/libvpp/src/vnet/dpo/load_balance.c:880 > > #12 0x00007f994745b777 in load_balance_unlock (dpo=0x7f98d8bb5398) at > > /development/libvpp/src/vnet/dpo/load_balance.c:906 > > #13 0x00007f994744b9bd in dpo_unlock (dpo=0x7f98d8bb5398) at > > /development/libvpp/src/vnet/dpo/dpo.c:378 > > #14 0x00007f994744b379 in dpo_copy (dst=0x7f98d88ecd58, > > src=0x7f98d8bb53c8) at /development/libvpp/src/vnet/dpo/dpo.c:274 > > #15 0x00007f994744b2ef in dpo_reset (dpo=0x7f98d88ecd58) at > > /development/libvpp/src/vnet/dpo/dpo.c:239 > > #16 0x00007f99473f5025 in fib_entry_src_action_uninstall > > (fib_entry=0x7f98d88ecd30) at > > /development/libvpp/src/vnet/fib/fib_entry_src.c:707 > > #17 0x00007f99473ec71c in fib_entry_source_removed > > (fib_entry=0x7f98d88ecd30, old_flags=FIB_ENTRY_FLAG_NONE) at > > /development/libvpp/src/vnet/fib/fib_entry.c:992 > > #18 0x00007f99473ec926 in fib_entry_path_remove (fib_entry_index=4159, > > source=FIB_SOURCE_API, rpaths=0x7f98d9e73a00) > > at /development/libvpp/src/vnet/fib/fib_entry.c:1082 > > #19 0x00007f99473d8d1d in fib_table_entry_path_remove2 (fib_index=5, > > prefix=0x7f98d8bb56d0, source=FIB_SOURCE_API, rpaths=0x7f98d9e73a00) > > at /development/libvpp/src/vnet/fib/fib_table.c:663 > > > > #20 0x00007f9947dcf3a6 in rtb_vpp_route_mapping_process > > (table=0x5576fd90ef3c, object=0x557701f7f07c, action=1 '\001') > > at /development/libvpp/src/vpp/rtbrick/rtb_vpp_route.c:919 > > #21 0x00007f9947dcf477 in rtb_vpp_route_mapping_del_cb > > (table=0x5576fd90ef3c, object=0x557701f7f07c) at > > /development/libvpp/src/vpp/rtbrick/rtb_vpp_route.c:947 > > #22 0x00007f994f98bd9a in bds_handle_object_plugin > > (table_handle=0x5576fd90ef3c, index=0x557701f7f07c, old_index=0x0, > > type=BDS_PC_DEL) > > at /development/librtbrickinfra/libbds/code/bds/src/bds_plugin.c:992 > > #23 0x00007f994f98ab6c in bds_deferred_object_plugins > > (table_handle=0x5576fd90ef3c, obj_handle=0x557701f7f07c, > > obj_deleted=0x7f98d8bb58c2 "") > > at /development/librtbrickinfra/libbds/code/bds/src/bds_plugin.c:458 > > #24 0x00007f994f98b2af in bds_deferred_plugin_cb > > (table_handle=0x5576fd90ef3c, obj_handle=0x557701f7f07c, > > obj_deleted=0x7f98d8bb58c2 "") > > at /development/librtbrickinfra/libbds/code/bds/src/bds_plugin.c:656 > > #25 0x00007f994f97fed9 in bds_object_handle_plugin_publish > > (table_handle=0x5576fd90ef3c, obj_handle=0x557701f7f07c, > > old_obj_handle=0x0) > > ---Type <return> to continue, or q <return> to quit--- > > at /development/librtbrickinfra/libbds/code/bds/src/bds_object.c:3403 > > #26 0x00007f994f983600 in bds_object_delete (obj_template=0x5576f8637c5c) > > at /development/librtbrickinfra/libbds/code/bds/src/bds_object.c:4867 > > #27 0x00007f9947dcd772 in rtb_vpp_route_mapping_object_add_del > > (route_mapping=0x7f98d8bb5a70, action=1 '\001') > > at /development/libvpp/src/vpp/rtbrick/rtb_vpp_route.c:383 > > #28 0x00007f9947dd0986 in rtb_vpp_adj_adjacency_route_handle > > (adj_api_out=0x7f98d8bb5ba0, action=1 '\001') at > > /development/libvpp/src/vpp/rtbrick/rtb_vpp_adj.c:132 > > #29 0x00007f9947dd0b93 in rtb_vpp_adj_api_out_process > > (table=0x5576fdc0b13c, object=0x55770212be6c, action=1 '\001') > > at /development/libvpp/src/vpp/rtbrick/rtb_vpp_adj.c:178 > > #30 0x00007f9947dd0c57 in rtb_vpp_adj_api_out_chng_cb > > (table=0x5576fdc0b13c, object=0x55770102780c, object_old=0x55770212be6c) > > at /development/libvpp/src/vpp/rtbrick/rtb_vpp_adj.c:207 > > #31 0x00007f994f98bd9a in bds_handle_object_plugin > > (table_handle=0x5576fdc0b13c, index=0x55770102780c, > > old_index=0x55770212be6c, type=BDS_PC_CHANGE) > > at /development/librtbrickinfra/libbds/code/bds/src/bds_plugin.c:992 > > #32 0x00007f994f98ad7f in bds_deferred_object_plugins > > (table_handle=0x5576fdc0b13c, obj_handle=0x55770102780c, > > obj_deleted=0x7f98d8bb5de2 "") > > at /development/librtbrickinfra/libbds/code/bds/src/bds_plugin.c:492 > > #33 0x00007f994f98b2af in bds_deferred_plugin_cb > > (table_handle=0x5576fdc0b13c, obj_handle=0x55770102780c, > > obj_deleted=0x7f98d8bb5de2 "") > > at /development/librtbrickinfra/libbds/code/bds/src/bds_plugin.c:656 > > #34 0x00007f994f97fed9 in bds_object_handle_plugin_publish > > (table_handle=0x5576fdc0b13c, obj_handle=0x55770102780c, > > old_obj_handle=0x55770212be6c) > > at /development/librtbrickinfra/libbds/code/bds/src/bds_object.c:3403 > > #35 0x00007f994f9801b9 in bds_object_handle_features > > (table=0x5576fdc0b13c, index=0x55770102780c, old_index=0x55770212be6c, > > add_vector=3, is_diff_index=0 '\000') > > at /development/librtbrickinfra/libbds/code/bds/src/bds_object.c:3515 > > #36 0x00007f994f980dc5 in bds_build_index (table=0x5576fdc0b13c, > > obj_map=0x557702d6fefc, cp_offset=0x7f98d8bb5f80, cp_len=0x7f98d8bb6180, > > data_offset=38, > > add_bitmap=65535, is_diff_index=0 '\000') at > > /development/librtbrickinfra/libbds/code/bds/src/bds_object.c:3832 > > > > #37 0x00007f994f9816bd in bds_create_object_index_map > > (obj_template=0x5576f8083e5c, use_seq=0 '\000', skip_index=0 '\000') > > at /development/librtbrickinfra/libbds/code/bds/src/bds_object.c:4024 > > #38 0x00007f994f98206c in bds_object_add (object_template=0x5576f8083e5c) > > at /development/librtbrickinfra/libbds/code/bds/src/bds_object.c:4267 > > #39 0x00007f9945acfd12 in fib_adjacency_api_out_obj_add_del > > (fib_adjacency=0x7f98d8bb6820, action=0 '\000') > > at > > /development/libforwarding/pi/fib/code/src/fib_adjacency_api_out.c:342 > > #40 0x00007f9945acec9a in fib_adjacency_local_obj_process > > (table=0x5576fdc0512c, obj_handle=0x5577018317fc, action=0 '\000') > > at /development/libforwarding/pi/fib/code/src/fib_adjacency.c:90 > > #41 0x00007f9945aced39 in fib_adjacency_local_obj_chng_cb > > (tblHndl=0x5576fdc0512c, newObjHndl=0x5577018317fc, > > oldObjHndl=0x5577016fed0c) > > at /development/libforwarding/pi/fib/code/src/fib_adjacency.c:111 > > #42 0x00007f994f98bd9a in bds_handle_object_plugin > > (table_handle=0x5576fdc0512c, index=0x5577018317fc, > > old_index=0x5577016fed0c, type=BDS_PC_CHANGE) > > at /development/librtbrickinfra/libbds/code/bds/src/bds_plugin.c:992 > > #43 0x00007f994f98ad7f in bds_deferred_object_plugins > > (table_handle=0x5576fdc0512c, obj_handle=0x5577018317fc, > > obj_deleted=0x7f98d8bb6d8f "") > > at /development/librtbrickinfra/libbds/code/bds/src/bds_plugin.c:492 > > #44 0x00007f994f98b2af in bds_deferred_plugin_cb > > (table_handle=0x5576fdc0512c, obj_handle=0x5577018317fc, > > obj_deleted=0x7f98d8bb6d8f "") > > at /development/librtbrickinfra/libbds/code/bds/src/bds_plugin.c:656 > > #45 0x00007f994f98b390 in bds_deferred_plugin_cb_expiry > > (table_handle=0x5576fdc0512c) at > > /development/librtbrickinfra/libbds/code/bds/src/bds_plugin.c:696 > > #46 0x00007f995270a070 in timer_dispatch (item=0x557701811cb0, > > p=QB_LOOP_HIGH) at > > /development/librtbrickinfra/libqb/lib/loop_timerlist.c:56 > > #47 0x00007f9952706006 in qb_loop_run_level (level=0x5576f78f5830) at > > /development/librtbrickinfra/libqb/lib/loop.c:43 > > #48 0x00007f9952707038 in qb_loop_run_vpp_wrapper (lp=0x5576f78f57c0) at > > /development/librtbrickinfra/libqb/lib/loop.c:350 > > #49 0x00007f9952717cd8 in lib_qb_service_start_event_wrapper_loop () at > > /development/librtbrickinfra/libqb/lib/wrapper/lib_qb_service.c:290 > > #50 0x00007f98cfb8386c in rtb_vpp_epoll_process (vm=0x7f9946776e40 > > <vlib_global_main>, rt=0x7f98d8ba6000, f=0x0) > > ---Type <return> to continue, or q <return> to quit--- > > at /development/libvpp/src/plugins/rtbrick/rtb_node.c:81 > > #51 0x00007f99464d571b in vlib_process_bootstrap (_a=140294421412800) at > > /development/libvpp/src/vlib/main.c:1468 > > #52 0x00007f9946197600 in clib_calljmp () from > > /usr/local/lib/libvppinfra.so.1.0.1 > > #53 0x00007f98d726fb90 in ?? () > > #54 0x00007f99464d5820 in vlib_process_startup (vm=0x7f99464d588a > > <vlib_process_resume+96>, p=0x8, f=0x7f9946776e40 <vlib_global_main>) > > at /development/libvpp/src/vlib/main.c:1490 > > Backtrace stopped: previous frame inner to this frame (corrupt stack?) > > > > > > Thanks, > > Rajith >
-=-=-=-=-=-=-=-=-=-=-=- Links: You receive all messages sent to this group. View/Reply Online (#16904): https://lists.fd.io/g/vpp-dev/message/16904 Mute This Topic: https://lists.fd.io/mt/75353568/21656 Group Owner: vpp-dev+ow...@lists.fd.io Unsubscribe: https://lists.fd.io/g/vpp-dev/unsub [arch...@mail-archive.com] -=-=-=-=-=-=-=-=-=-=-=-