Hi Matthew, Many thanks for opening my eyes. Now everything is perfectly clear for me.
On Mon, 18 Jan 2021 at 18:10, Matthew Smith <mgsm...@netgate.com> wrote: > > Hi Stanislav, > > As you noted, vlib_buffer_validate_alloc_free() is the only place that > would change the "known state" of a buffer. All of the calls to that > function are in vlib/buffer_funcs.h in inline functions in conditional > blocks with a condition like 'if (CLIB_DEBUG > 0)'. One of those calls is > made when vlib_buffer_alloc() is called - it > calls vlib_buffer_alloc_on_numa() which calls vlib_buffer_alloc_from_pool() > which has several invocations of vlib_buffer_validate_alloc_free() which > all look like this: > > if (CLIB_DEBUG > 0) > vlib_buffer_validate_alloc_free (vm, buffers, n_buffers, > VLIB_BUFFER_KNOWN_FREE); > > Since these calls occur in inline functions and are only made when > CLIB_DEBUG is defined as a value greater than 0, it seems like the issue is > probably that VPP and the router plugin are being built with CLIB_DEBUG > defined to different values - one of them is being built with CLIB_DEBUG == > 0 and one of them is built with CLIB_DEBUG > 0. Since your backtrace > shows vlib_buffer_validate_alloc_free() being called from one of the VPP > virtio nodes, that suggests that VPP was built with CLIB_DEBUG > 0. You are > probably building the router plugin without explicitly defining a value for > CLIB_DEBUG so it is ending up defined to 0. To remedy the issue, you could > either build VPP with CLIB_DEBUG set to 0 (run 'make build-release' instead > of 'make build') or explicitly define CLIB_DEBUG when you build the router > plugin. > > -Matt > > > On Sat, Jan 16, 2021 at 5:50 AM Stanislav Zaikin <zsta...@gmail.com> > wrote: > >> I've dug more into this issue. And I noticed strange thing: >> >> 187 { >> (gdb) next >> 188 tap_inject_main_t * im = tap_inject_get_main (); >> (gdb) >> 190 u32 bi = ~0; >> (gdb) >> 194 sw_if_index = tap_inject_lookup_sw_if_index_from_tap_fd (fd); >> (gdb) >> 195 if (sw_if_index == ~0) >> (gdb) >> 199 if( vlib_buffer_alloc (vm, &bi, 1) != 1 ) >> (gdb) >> 202 b = vlib_get_buffer (vm, bi); >> (gdb) >> 207 n_bytes = read (fd, b->data, vlib_buffer_get_default_data_size (vm)); >> (gdb) >> 208 if (n_bytes < 0) >> (gdb) print b >> $31 = (vlib_buffer_t *) 0x10027db5c0 >> (gdb) print *b >> $32 = {{cacheline0 = 0x10027db5c0 "", current_data = 0, current_length = 0, >> flags = 0, flow_id = 0, ref_count = 1 '\001', buffer_pool_index = 0 '\000', >> error = 0, next_buffer = 0, {current_config_index = 0, >> punt_reason = 0}, opaque = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, >> template_end = 0x10027db600 "", second_half = 0x10027db600 "", trace_handle >> = 0, total_length_not_including_first_buffer = 0, opaque2 = { >> 0 <repeats 14 times>}, headroom = 0x10027db640 "", pre_data = '\000' >> <repeats 126 times>, "\001", data = 0x10027db6c0 "33"}} >> (gdb) print bi >> $33 = 653015 >> (gdb) call vlib_buffer_is_known (vm, bi) >> $34 = VLIB_BUFFER_KNOWN_FREE >> >> For some reason the allocated buffer was marked as free. The only function >> that could change its state is vlib_buffer_validate_alloc_free. >> >> always_inline __clib_warn_unused_result u32 >> vlib_buffer_alloc_from_pool (vlib_main_t * vm, u32 * buffers, u32 >> n_buffers, >> u8 buffer_pool_index) >> { >> /* omit */ >> if (CLIB_DEBUG > 0) >> vlib_buffer_validate_alloc_free (vm, buffers, n_buffers, >> VLIB_BUFFER_KNOWN_FREE); >> return n_buffers; >> >> But there's another strange issue. I've tried to debug it with gdb and >> sometimes vlib_buffer_validate_alloc_free isn't calling for some reason: >> >> Thread 1 "vpp_main" hit Breakpoint 1, vlib_buffer_alloc_from_pool >> (buffer_pool_index=0 '\000', n_buffers=1, buffers=0x7fffafc727e0, >> vm=0x7ffff6d67bc0 <vlib_global_main>) >> at /usr/include/vlib/buffer_funcs.h:569 >> 569 vlib_buffer_main_t *bm = vm->buffer_main; >> (gdb) next >> 585 bp = vec_elt_at_index (bm->buffer_pools, buffer_pool_index); >> (gdb) >> 586 bpt = vec_elt_at_index (bp->threads, vm->thread_index); >> (gdb) >> 588 dst = buffers; >> (gdb) >> 589 n_left = n_buffers; >> (gdb) >> 590 len = bpt->n_cached; >> (gdb) >> 593 if (len >= n_buffers) >> (gdb) >> 595 src = bpt->cached_buffers + len - n_buffers; >> (gdb) >> 596 vlib_buffer_copy_indices (dst, src, n_buffers); >> (gdb) >> 597 bpt->n_cached -= n_buffers; >> (gdb) >> 602 return n_buffers; >> (gdb) nextvlib_buffer_alloc_on_numa (numa_node=0, n_buffers=1, >> buffers=0x7fffafc727e0, vm=0x7ffff6d67bc0 <vlib_global_main>) at >> /usr/include/vlib/buffer_funcs.h:664 >> 664 return vlib_buffer_alloc_from_pool (vm, buffers, n_buffers, index); >> (gdb) cont >> Continuing. >> [New Thread 0x7fffacc9b700 (LWP 127836)] >> [Thread 0x7fffacc9b700 (LWP 127836) exited] >> >> Thread 1 "vpp_main" hit Breakpoint 1, vlib_buffer_alloc_from_pool >> (vm=0x7ffff6d67bc0 <vlib_global_main>, buffers=0x7fffb74e4dfc, n_buffers=64, >> buffer_pool_index=0 '\000') >> at /home/zstas/vpp/src/vlib/buffer_funcs.h:569 >> 569 vlib_buffer_main_t *bm = vm->buffer_main; >> (gdb) next >> 585 bp = vec_elt_at_index (bm->buffer_pools, buffer_pool_index); >> (gdb) >> 586 bpt = vec_elt_at_index (bp->threads, vm->thread_index); >> (gdb) >> 588 dst = buffers; >> (gdb) >> 589 n_left = n_buffers; >> (gdb) >> 590 len = bpt->n_cached; >> (gdb) >> 593 if (len >= n_buffers) >> (gdb) >> 595 src = bpt->cached_buffers + len - n_buffers; >> (gdb) >> 596 vlib_buffer_copy_indices (dst, src, n_buffers); >> (gdb) >> 597 bpt->n_cached -= n_buffers; >> (gdb) *600 vlib_buffer_validate_alloc_free (vm, buffers, n_buffers, >> *(gdb) >> 602 return n_buffers; >> (gdb) >> 648 } >> (gdb) cont >> >> I have no idea why in the first run vlib_buffer_validate_alloc_free hadn't >> been called (second run was ok). Because there is the same code according to >> debug info in gdb. >> >> I hope you'll point me where I'm wrong. >> >> >> On Sat, 16 Jan 2021 at 12:07, Stanislav Zaikin via lists.fd.io <zstaseg= >> gmail....@lists.fd.io> wrote: >> >>> Hello Damjan, >>> Thanks for the response. >>> >>> Nothing special happens, I just want to send this packet to another >>> interface. >>> Interface TX is just a regular VPP node (TX for tapv2/virtio). This >>> packet went to error-node because the interface was down. But if I set >>> interface up, everything looks just the same (error on freeing this buffer): >>> >>> (gdb) bt >>> #0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 >>> #1 0x00007ffff6936859 in __GI_abort () at abort.c:79 >>> #2 0x00007ffff6bd73bf in os_panic () at >>> /home/zstas/vpp/src/vppinfra/unix-misc.c:177 >>> #3 0x00007ffff6b37ea4 in debugger () at >>> /home/zstas/vpp/src/vppinfra/error.c:84 >>> #4 0x00007ffff6b38295 in _clib_error (how_to_die=2, >>> function_name=0x7ffff6d39c40 <__FUNCTION__.16> >>> "vlib_buffer_validate_alloc_free", line_number=366, fmt=0x7ffff6d3974a "%s >>> %U buffer 0x%x") >>> at /home/zstas/vpp/src/vppinfra/error.c:143 >>> #5 0x00007ffff6c644fc in vlib_buffer_validate_alloc_free >>> (vm=0x7ffff6d67bc0 <vlib_global_main>, buffers=0x7fffafc730b0, n_buffers=1, >>> expected_state=VLIB_BUFFER_KNOWN_ALLOCATED) >>> at /home/zstas/vpp/src/vlib/buffer.c:366 >>> #6 0x00007ffff6f753ec in vlib_buffer_pool_put (vm=0x7ffff6d67bc0 >>> <vlib_global_main>, buffer_pool_index=0 '\000', buffers=0x7fffafc730b0, >>> n_buffers=1) at /home/zstas/vpp/src/vlib/buffer_funcs.h:754 >>> #7 0x00007ffff6f75c29 in vlib_buffer_free_inline (vm=0x7ffff6d67bc0 >>> <vlib_global_main>, buffers=0x7fffb74f0044, n_buffers=0, maybe_next=1) at >>> /home/zstas/vpp/src/vlib/buffer_funcs.h:924 >>> #8 0x00007ffff6f75c79 in vlib_buffer_free (vm=0x7ffff6d67bc0 >>> <vlib_global_main>, buffers=0x7fffb74f0040, n_buffers=1) at >>> /home/zstas/vpp/src/vlib/buffer_funcs.h:943 >>> #9 0x00007ffff6f75d1a in vlib_buffer_free_from_ring (vm=0x7ffff6d67bc0 >>> <vlib_global_main>, ring=0x7fffb74f0040, start=0, ring_size=256, >>> n_buffers=1) at /home/zstas/vpp/src/vlib/buffer_funcs.h:991 >>> #10 0x00007ffff6f79af4 in virtio_free_used_device_desc_split >>> (vm=0x7ffff6d67bc0 <vlib_global_main>, vring=0x7fffb75ab540, >>> node_index=679) at /home/zstas/vpp/src/vnet/devices/virtio/device.c:200 >>> #11 0x00007ffff6f79dc2 in virtio_free_used_device_desc (vm=0x7ffff6d67bc0 >>> <vlib_global_main>, vring=0x7fffb75ab540, node_index=679, packed=0) at >>> /home/zstas/vpp/src/vnet/devices/virtio/device.c:277 >>> #12 0x00007ffff6f7baed in virtio_device_class_tx_fn_hsw (vm=0x7ffff6d67bc0 >>> <vlib_global_main>, node=0x7fffb6b73800, frame=0x7fffb96ff8c0) at >>> /home/zstas/vpp/src/vnet/devices/virtio/device.c:1017 >>> #13 0x00007ffff6cba2e2 in dispatch_node (vm=0x7ffff6d67bc0 >>> <vlib_global_main>, node=0x7fffb6b73800, type=VLIB_NODE_TYPE_INTERNAL, >>> dispatch_state=VLIB_NODE_STATE_POLLING, frame=0x7fffb96ff8c0, >>> last_time_stamp=1108992938029) at /home/zstas/vpp/src/vlib/main.c:1223 >>> #14 0x00007ffff6cbaa19 in dispatch_pending_node (vm=0x7ffff6d67bc0 >>> <vlib_global_main>, pending_frame_index=1, last_time_stamp=1108992938029) >>> at /home/zstas/vpp/src/vlib/main.c:1384 >>> #15 0x00007ffff6cbcdd3 in vlib_main_or_worker_loop (vm=0x7ffff6d67bc0 >>> <vlib_global_main>, is_main=1) at /home/zstas/vpp/src/vlib/main.c:1912 >>> #16 0x00007ffff6cbd848 in vlib_main_loop (vm=0x7ffff6d67bc0 >>> <vlib_global_main>) at /home/zstas/vpp/src/vlib/main.c:2040 >>> #17 0x00007ffff6cbe66b in vlib_main (vm=0x7ffff6d67bc0 <vlib_global_main>, >>> input=0x7fffafc73fb0) at /home/zstas/vpp/src/vlib/main.c:2286 >>> #18 0x00007ffff6d29ac5 in thread0 (arg=140737334639552) at >>> /home/zstas/vpp/src/vlib/unix/main.c:670 >>> #19 0x00007ffff6b56f10 in clib_calljmp () at >>> /home/zstas/vpp/src/vppinfra/longjmp.S:123 >>> #20 0x00007fffffffd150 in ?? () >>> #21 0x00007ffff6d2a095 in vlib_unix_main (argc=28, argv=0x555555631e50) at >>> /home/zstas/vpp/src/vlib/unix/main.c:747 >>> #22 0x000055555555cd8b in main (argc=28, argv=0x555555631e50) at >>> /home/zstas/vpp/src/vpp/vnet/main.c:338 >>> >>> >>> On Fri, 15 Jan 2021 at 19:57, Damjan Marion <dmar...@me.com> wrote: >>> >>>> >>>> This looks right (except the wrong assumption that >>>> vlib_get_frame_to_node will give you empty frame). >>>> >>>> >>>> What happens between this code and error-drop node? Is interface TX >>>> also your custom code? >>>> >>>> — >>>> Damjan >>>> >>>> >>>> On 15.01.2021., at 17:03, Stanislav Zaikin <zsta...@gmail.com> wrote: >>>> >>>> Hello, >>>> >>>> I'm trying to reincarnate router plugin. But I stuck with a pretty >>>> simple thing, when I need to allocate a buffer to store a packet from a tap >>>> device, everything is going well. But when the next node decides to free >>>> this buffer I get a clib_error. >>>> >>>> Basically, code is pretty simple and straightforward: >>>> u32 bi = ~0; >>>> vlib_buffer_t * b; >>>> >>>> if( vlib_buffer_alloc (vm, &bi, 1) != 1 ) >>>> //err handling >>>> >>>> b = vlib_get_buffer (vm, bi); >>>> VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b); >>>> >>>> n_bytes = read (fd, b->data, vlib_buffer_get_default_data_size (vm)); >>>> // err handling >>>> >>>> b->current_length = n_bytes; >>>> vnet_buffer (b)->sw_if_index[VLIB_RX] = sw_if_index; >>>> vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; >>>> >>>> vnet_hw_interface_t * hw; >>>> vlib_frame_t * new_frame; >>>> u32 * to_next; >>>> >>>> hw = vnet_get_hw_interface (vnet_get_main (), sw_if_index); >>>> >>>> new_frame = vlib_get_frame_to_node (vm, hw->output_node_index); >>>> to_next = vlib_frame_vector_args (new_frame); >>>> to_next[0] = bi; >>>> new_frame->n_vectors = 1; >>>> >>>> vlib_put_frame_to_node (vm, hw->output_node_index, new_frame); >>>> >>>> Error looks like: >>>> >>>> gdb) bt >>>> #0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 >>>> #1 0x00007ffff6936859 in __GI_abort () at abort.c:79 >>>> #2 0x00007ffff6bd73bf in os_panic () at >>>> /home/zstas/vpp/src/vppinfra/unix-misc.c:177 >>>> #3 0x00007ffff6b37ea4 in debugger () at >>>> /home/zstas/vpp/src/vppinfra/error.c:84 >>>> #4 0x00007ffff6b38295 in _clib_error (how_to_die=2, >>>> function_name=0x7ffff6d39c40 <__FUNCTION__.16> >>>> "vlib_buffer_validate_alloc_free", line_number=366, fmt=0x7ffff6d3974a "%s >>>> %U buffer 0x%x") >>>> at /home/zstas/vpp/src/vppinfra/error.c:143 >>>> #5 0x00007ffff6c644fc in vlib_buffer_validate_alloc_free >>>> (vm=0x7ffff6d67bc0 <vlib_global_main>, buffers=0x7fffafc725f0, >>>> n_buffers=1, expected_state=VLIB_BUFFER_KNOWN_ALLOCATED) >>>> at /home/zstas/vpp/src/vlib/buffer.c:366 >>>> #6 0x00007ffff6c4a08c in vlib_buffer_pool_put (vm=0x7ffff6d67bc0 >>>> <vlib_global_main>, buffer_pool_index=0 '\000', buffers=0x7fffafc725f0, >>>> n_buffers=1) at /home/zstas/vpp/src/vlib/buffer_funcs.h:754 >>>> #7 0x00007ffff6c4a8c9 in vlib_buffer_free_inline (vm=0x7ffff6d67bc0 >>>> <vlib_global_main>, buffers=0x7fffb6b37ed4, n_buffers=0, maybe_next=1) at >>>> /home/zstas/vpp/src/vlib/buffer_funcs.h:924 >>>> #8 0x00007ffff6c4a919 in vlib_buffer_free (vm=0x7ffff6d67bc0 >>>> <vlib_global_main>, buffers=0x7fffb6b37ed0, n_buffers=1) at >>>> /home/zstas/vpp/src/vlib/buffer_funcs.h:943 >>>> #9 0x00007ffff6c4b3b5 in process_drop_punt (vm=0x7ffff6d67bc0 >>>> <vlib_global_main>, node=0x7fffb6b772c0, frame=0x7fffb6b37ec0, >>>> disposition=ERROR_DISPOSITION_DROP) at /home/zstas/vpp/src/vlib/drop.c:231 >>>> #10 0x00007ffff6c4b4bf in error_drop_node_fn_hsw (vm=0x7ffff6d67bc0 >>>> <vlib_global_main>, node=0x7fffb6b772c0, frame=0x7fffb6b37ec0) at >>>> /home/zstas/vpp/src/vlib/drop.c:247 >>>> #11 0x00007ffff6cba2e2 in dispatch_node (vm=0x7ffff6d67bc0 >>>> <vlib_global_main>, node=0x7fffb6b772c0, type=VLIB_NODE_TYPE_INTERNAL, >>>> dispatch_state=VLIB_NODE_STATE_POLLING, frame=0x7fffb6b37ec0, >>>> last_time_stamp=77969722318942) at /home/zstas/vpp/src/vlib/main.c:1223 >>>> #12 0x00007ffff6cbaa19 in dispatch_pending_node (vm=0x7ffff6d67bc0 >>>> <vlib_global_main>, pending_frame_index=2, last_time_stamp=77969722318942) >>>> at /home/zstas/vpp/src/vlib/main.c:1384 >>>> #13 0x00007ffff6cbcdd3 in vlib_main_or_worker_loop (vm=0x7ffff6d67bc0 >>>> <vlib_global_main>, is_main=1) at /home/zstas/vpp/src/vlib/main.c:1912 >>>> #14 0x00007ffff6cbd848 in vlib_main_loop (vm=0x7ffff6d67bc0 >>>> <vlib_global_main>) at /home/zstas/vpp/src/vlib/main.c:2040 >>>> #15 0x00007ffff6cbe66b in vlib_main (vm=0x7ffff6d67bc0 <vlib_global_main>, >>>> input=0x7fffafc73fb0) at /home/zstas/vpp/src/vlib/main.c:2286 >>>> #16 0x00007ffff6d29ac5 in thread0 (arg=140737334639552) at >>>> /home/zstas/vpp/src/vlib/unix/main.c:670 >>>> #17 0x00007ffff6b56f10 in clib_calljmp () at >>>> /home/zstas/vpp/src/vppinfra/longjmp.S:123 >>>> #18 0x00007fffffffd150 in ?? () >>>> #19 0x00007ffff6d2a095 in vlib_unix_main (argc=28, argv=0x555555631e50) at >>>> /home/zstas/vpp/src/vlib/unix/main.c:747 >>>> #20 0x000055555555cd8b in main (argc=28, argv=0x555555631e50) at >>>> /home/zstas/vpp/src/vpp/vnet/main.c:338 >>>> >>>> Buffer index seems right: >>>> >>>> Thread 1 "vpp_main" hit Breakpoint 1, tap_rx (vm=0x7ffff6d67bc0 >>>> <vlib_global_main>, node=0x7fffb6a75d00, f=0x0, fd=23) at >>>> router/tap_inject_node.c:187 >>>> warning: Source file is more recent than executable. >>>> (gdb) info localsim = 0x7fffb002b7c0 <tap_inject_main>sw_if_index = 1bi = >>>> 653093b = 0x10027dc940n_bytes = 90__FUNCTION__ = "tap_rx" >>>> >>>> /* later */ >>>> >>>> Thread 1 "vpp_main" received signal SIGABRT, Aborted.__GI_raise >>>> (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 >>>> 50 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory. >>>> (gdb) bt >>>> #0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 >>>> #1 0x00007ffff6936859 in __GI_abort () at abort.c:79 >>>> #2 0x00007ffff6bd73bf in os_panic () at >>>> /home/zstas/vpp/src/vppinfra/unix-misc.c:177 >>>> #3 0x00007ffff6b37ea4 in debugger () at >>>> /home/zstas/vpp/src/vppinfra/error.c:84 >>>> #4 0x00007ffff6b38295 in _clib_error (how_to_die=2, >>>> function_name=0x7ffff6d39c40 <__FUNCTION__.16> >>>> "vlib_buffer_validate_alloc_free", line_number=366, fmt=0x7ffff6d3974a "%s >>>> %U buffer 0x%x") >>>> at /home/zstas/vpp/src/vppinfra/error.c:143 >>>> #5 0x00007ffff6c644fc in vlib_buffer_validate_alloc_free >>>> (vm=0x7ffff6d67bc0 <vlib_global_main>, buffers=0x7fffafc725f0, >>>> n_buffers=1, expected_state=VLIB_BUFFER_KNOWN_ALLOCATED) >>>> at /home/zstas/vpp/src/vlib/buffer.c:366 >>>> #6 0x00007ffff6c4a08c in vlib_buffer_pool_put (vm=0x7ffff6d67bc0 >>>> <vlib_global_main>, buffer_pool_index=0 '\000', buffers=0x7fffafc725f0, >>>> n_buffers=1) at /home/zstas/vpp/src/vlib/buffer_funcs.h:754 >>>> /* omit */ >>>> (gdb) select 6 >>>> (gdb) print buffers[0] >>>> $6 = 653093 >>>> >>>> Maybe I need to set some flags on the allocated buffer? Or what else am I >>>> missing? >>>> >>>> -- >>>> Best regards >>>> Stanislav Zaikin >>>> >>>> >>>> >>>> >>>> >>> >>> -- >>> Best regards >>> Stanislav Zaikin >>> >>> >>> >>> >> >> -- >> Best regards >> Stanislav Zaikin >> >> >> >> -- Best regards Stanislav Zaikin
-=-=-=-=-=-=-=-=-=-=-=- Links: You receive all messages sent to this group. View/Reply Online (#18537): https://lists.fd.io/g/vpp-dev/message/18537 Mute This Topic: https://lists.fd.io/mt/79705076/21656 Group Owner: vpp-dev+ow...@lists.fd.io Unsubscribe: https://lists.fd.io/g/vpp-dev/unsub [arch...@mail-archive.com] -=-=-=-=-=-=-=-=-=-=-=-