On 1/20/2023 8:40 AM, Ankur Dwivedi wrote: > Adds trace points for ethdev functions. > Moved the rte_ethdev_trace_rx_burst and rte_ethdev_trace_tx_burst to > a new file rte_ethdev_trace_fp_burst.h. This is needed to resolve > cyclic dependency between rte_ethdev.h and rte_ethdev_trace_fp.h. > > Signed-off-by: Ankur Dwivedi <adwiv...@marvell.com>
<...> > +RTE_TRACE_POINT_REGISTER(rte_eth_trace_rx_hairpin_queue_setup, > + lib.ethdev.rx.hairpin_queue_setup) > + s/rx.hairpin_queue_setup/rx_hairpin_queue_setup/ ? > +RTE_TRACE_POINT_REGISTER(rte_eth_trace_tx_hairpin_queue_setup, > + lib.ethdev.tx.hairpin_queue_setup) > + s/tx.hairpin_queue_setup/tx_hairpin_queue_setup/ ? <...> > diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build > index 39250b5da1..f5c0865023 100644 > --- a/lib/ethdev/meson.build > +++ b/lib/ethdev/meson.build > @@ -24,6 +24,7 @@ headers = files( > 'rte_ethdev.h', > 'rte_ethdev_trace.h', > 'rte_ethdev_trace_fp.h', > + 'rte_ethdev_trace_fp_burst.h', Why these trace headers are public? Aren't trace points only used by the APIs, so I expect them to be internal, so applications shouldn't need them. Why they are expsed to user. @David, what do you think? <...> > @@ -258,6 +259,7 @@ rte_eth_iterator_init(struct rte_dev_iterator *iter, > const char *devargs_str) > > end: > iter->cls = rte_class_find_by_name("eth"); > + rte_eth_trace_iterator_init(devargs_str); > rte_devargs_reset(&devargs); > return 0; Why not add trace call just before return, as a separate block, like: `` end: iter->cls = rte_class_find_by_name("eth"); rte_devargs_reset(&devargs); rte_eth_trace_iterator_init(devargs_str); return 0; `` > > @@ -274,6 +276,8 @@ rte_eth_iterator_init(struct rte_dev_iterator *iter, > const char *devargs_str) > uint16_t > rte_eth_iterator_next(struct rte_dev_iterator *iter) > { > + uint16_t id; > + > if (iter == NULL) { > RTE_ETHDEV_LOG(ERR, > "Cannot get next device from NULL iterator\n"); > @@ -297,8 +301,11 @@ rte_eth_iterator_next(struct rte_dev_iterator *iter) > /* A device is matching bus part, need to check ethdev part. */ > iter->class_device = iter->cls->dev_iterate( > iter->class_device, iter->cls_str, iter); > - if (iter->class_device != NULL) > - return eth_dev_to_id(iter->class_device); /* match */ > + if (iter->class_device != NULL) { > + id = eth_dev_to_id(iter->class_device); > + rte_eth_trace_iterator_next(iter, id); > + return id; /* match */ please move 'id' declaration within the if block, and again put trace call into separate block to highlight it, otherwise easy to miss, like: `` if (iter->class_device != NULL) { uint16_t id = eth_dev_to_id(iter->class_device); rte_eth_trace_iterator_next(iter, id); return id; /* match */ } > + } > } while (iter->bus != NULL); /* need to try next rte_device */ > > /* No more ethdev port to iterate. */ > @@ -316,6 +323,7 @@ rte_eth_iterator_cleanup(struct rte_dev_iterator *iter) > > if (iter->bus_str == NULL) > return; /* nothing to free in pure class filter */ > + rte_eth_trace_iterator_cleanup(iter); Can you please make trace call a separate block, I won't comment same for bellow, can you please apply this to all. > free(RTE_CAST_FIELD(iter, bus_str, char *)); /* workaround const */ > free(RTE_CAST_FIELD(iter, cls_str, char *)); /* workaround const */ > memset(iter, 0, sizeof(*iter)); > @@ -324,12 +332,18 @@ rte_eth_iterator_cleanup(struct rte_dev_iterator *iter) > uint16_t > rte_eth_find_next(uint16_t port_id) > { > + rte_eth_trace_find_next(port_id); > + Why tracing previous port_id, and other one below records next port_id, won't it be confusing to have both with same name. I suggest just keep last one, (next port_id). > while (port_id < RTE_MAX_ETHPORTS && > rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED) > port_id++; > > - if (port_id >= RTE_MAX_ETHPORTS) > + if (port_id >= RTE_MAX_ETHPORTS) { > + rte_eth_trace_find_next(RTE_MAX_ETHPORTS); Is there a specific reason to trace all paths, why not just keep the last one? > return RTE_MAX_ETHPORTS; > + } > + > + rte_eth_trace_find_next(port_id); > > return port_id; > } > @@ -347,10 +361,15 @@ uint16_t > rte_eth_find_next_of(uint16_t port_id, const struct rte_device *parent) > { > port_id = rte_eth_find_next(port_id); > + > + rte_eth_trace_find_next_of(port_id); > + > while (port_id < RTE_MAX_ETHPORTS && > rte_eth_devices[port_id].device != parent) > port_id = rte_eth_find_next(port_id + 1); > > + rte_eth_trace_find_next_of(port_id); > + Same here, lets keep only the last one. > return port_id; > } > > @@ -358,6 +377,9 @@ uint16_t > rte_eth_find_next_sibling(uint16_t port_id, uint16_t ref_port_id) > { > RTE_ETH_VALID_PORTID_OR_ERR_RET(ref_port_id, RTE_MAX_ETHPORTS); > + > + rte_eth_trace_find_next_sibling(port_id, ref_port_id); > + This doesn't record the return values, function should be updated to keep the interim return value of 'rte_eth_find_next_of()' and trace functions should record it. > return rte_eth_find_next_of(port_id, > rte_eth_devices[ref_port_id].device); > } > @@ -372,10 +394,13 @@ int > rte_eth_dev_is_valid_port(uint16_t port_id) > { > if (port_id >= RTE_MAX_ETHPORTS || > - (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED)) > + (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED)) { > + rte_ethdev_trace_is_valid_port(port_id, 0); > return 0; > - else > + } else { > + rte_ethdev_trace_is_valid_port(port_id, 1); > return 1; > + } What about to create an interim 'is_valid' variable and record it with single trace call? <...> > uint32_t > rte_eth_speed_bitflag(uint32_t speed, int duplex) > { > + rte_eth_trace_speed_bitflag(speed, duplex); Let's create an interim return value and record it for the trace function, and please move trace function to the bottom of the function. <...> > @@ -2433,6 +2529,7 @@ void > rte_eth_tx_buffer_drop_callback(struct rte_mbuf **pkts, uint16_t unsent, > void *userdata __rte_unused) > { > + rte_eth_trace_tx_buffer_drop_callback(pkts, unsent); Since only pointer value is recorded, function can be moved down, please put emtpy line in between. <...> > @@ -2495,7 +2598,12 @@ rte_eth_tx_done_cleanup(uint16_t port_id, uint16_t > queue_id, uint32_t free_cnt) > /* Call driver to free pending mbufs. */ > ret = (*dev->dev_ops->tx_done_cleanup)(dev->data->tx_queues[queue_id], > free_cnt); > - return eth_err(port_id, ret); > + > + ret = eth_err(port_id, ret); Please don't add new empty line _before_ this functions. <...> > @@ -2700,6 +2834,8 @@ rte_eth_link_to_str(char *str, size_t len, const struct > rte_eth_link *eth_link) > return -EINVAL; > } > > + rte_eth_trace_link_to_str(len, eth_link); > + Why not record return value and move trace function to the end of the function and record 'str' too. > if (eth_link->link_status == RTE_ETH_LINK_DOWN) > return snprintf(str, len, "Link down"); > else > @@ -2715,6 +2851,7 @@ int > rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats) > { > struct rte_eth_dev *dev; > + int ret; > > RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); > dev = &rte_eth_devices[port_id]; > @@ -2730,7 +2867,12 @@ rte_eth_stats_get(uint16_t port_id, struct > rte_eth_stats *stats) > if (*dev->dev_ops->stats_get == NULL) > return -ENOTSUP; > stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed; > - return eth_err(port_id, (*dev->dev_ops->stats_get)(dev, stats)); > + > + ret = eth_err(port_id, (*dev->dev_ops->stats_get)(dev, stats)); Pleaes don't add empyt line above. <...> > @@ -3229,13 +3384,19 @@ int > rte_eth_xstats_reset(uint16_t port_id) > { > struct rte_eth_dev *dev; > + int ret; > > RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); > dev = &rte_eth_devices[port_id]; > > /* implemented by the driver */ > - if (dev->dev_ops->xstats_reset != NULL) > - return eth_err(port_id, (*dev->dev_ops->xstats_reset)(dev)); > + if (dev->dev_ops->xstats_reset != NULL) { > + ret = eth_err(port_id, (*dev->dev_ops->xstats_reset)(dev)); Can you please move 'ret' decleration in if block. `` int ret = eth_err(port_id, (*dev->dev_ops->xstats_reset)(dev)); `` > + > + rte_eth_trace_xstats_reset(port_id, ret); > + > + return ret; > + } > > /* fallback to default */ > return rte_eth_stats_reset(port_id); > @@ -3268,24 +3429,37 @@ int > rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id, uint16_t > tx_queue_id, > uint8_t stat_idx) > { > - return eth_err(port_id, eth_dev_set_queue_stats_mapping(port_id, > + int ret; > + > + ret = eth_err(port_id, eth_dev_set_queue_stats_mapping(port_id, > tx_queue_id, > stat_idx, STAT_QMAP_TX)); > + > + rte_ethdev_trace_set_tx_queue_stats_mapping(port_id, tx_queue_id, > stat_idx, ret); > + In below function 'rte_ethdev_trace_set_rx_queue_stats_mapping()' call wrapped to fit 80 lines, but this one not, please be consistent and if possible break both to fit as done below. <...> > +RTE_TRACE_POINT( > + rte_eth_trace_iterator_next, > + RTE_TRACE_POINT_ARGS(struct rte_dev_iterator *iter, uint16_t id), > + rte_trace_point_emit_ptr(iter); > + rte_trace_point_emit_string(iter->bus_str); > + rte_trace_point_emit_string(iter->cls_str); > + rte_trace_point_emit_u16(id); > +) > + Trace functions don't chage parameters, what do you think to update all parameters with 'const' keywords for this: `` RTE_TRACE_POINT( rte_eth_trace_iterator_next, RTE_TRACE_POINT_ARGS(const struct rte_dev_iterator *iter, uint16_t id), rte_trace_point_emit_ptr(iter); rte_trace_point_emit_string(iter->bus_str); rte_trace_point_emit_string(iter->cls_str); rte_trace_point_emit_u16(id); ) `` This comment is not just for this trace point, but for *all* trace points. <...> > +RTE_TRACE_POINT( > + rte_eth_trace_rx_hairpin_queue_setup, > + RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t rx_queue_id, > + uint16_t nb_rx_desc, const struct rte_eth_hairpin_conf *conf, > + int ret), > + uint32_t peer_count = conf->peer_count; > + uint32_t tx_explicit = conf->tx_explicit; > + uint32_t manual_bind = conf->manual_bind; > + uint32_t use_locked_device_memory = conf->use_locked_device_memory; > + uint32_t use_rte_memory = conf->use_rte_memory; > + uint32_t force_memory = conf->force_memory; > + > + rte_trace_point_emit_u16(port_id); > + rte_trace_point_emit_u16(rx_queue_id); > + rte_trace_point_emit_u16(nb_rx_desc); > + rte_trace_point_emit_ptr(conf); > + rte_trace_point_emit_u32(peer_count); > + rte_trace_point_emit_u32(tx_explicit); > + rte_trace_point_emit_u32(manual_bind); > + rte_trace_point_emit_u32(use_locked_device_memory); > + rte_trace_point_emit_u32(use_rte_memory); > + rte_trace_point_emit_u32(force_memory); > + rte_trace_point_emit_int(ret); Do we need temporary variables like 'peer_count', why not directly use them: `` rte_trace_point_emit_u32(conf->peer_count); `` > +) > + > +RTE_TRACE_POINT( > + rte_eth_trace_tx_hairpin_queue_setup, > + RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t tx_queue_id, > + uint16_t nb_tx_desc, const struct rte_eth_hairpin_conf *conf, > + int ret), > + uint32_t peer_count = conf->peer_count; > + uint32_t tx_explicit = conf->tx_explicit; > + uint32_t manual_bind = conf->manual_bind; > + uint32_t use_locked_device_memory = conf->use_locked_device_memory; > + uint32_t use_rte_memory = conf->use_rte_memory; > + uint32_t force_memory = conf->force_memory; > + > + rte_trace_point_emit_u16(port_id); > + rte_trace_point_emit_u16(tx_queue_id); > + rte_trace_point_emit_u16(nb_tx_desc); > + rte_trace_point_emit_ptr(conf); > + rte_trace_point_emit_u32(peer_count); > + rte_trace_point_emit_u32(tx_explicit); > + rte_trace_point_emit_u32(manual_bind); > + rte_trace_point_emit_u32(use_locked_device_memory); > + rte_trace_point_emit_u32(use_rte_memory); > + rte_trace_point_emit_u32(force_memory); > + rte_trace_point_emit_int(ret); > +) Same as above. <...> > +RTE_TRACE_POINT( > + rte_eth_trace_allmulticast_disable, > + RTE_TRACE_POINT_ARGS(uint16_t port_id, int all_multicast, int diag), > + rte_trace_point_emit_u16(port_id); > + rte_trace_point_emit_int(all_multicast); > + rte_trace_point_emit_int(diag); > +) > + Can you replace 'diag' with 'ret' for consistency, same for '*promiscuous/allmulticast_enable/disable'. <...> > +RTE_TRACE_POINT_FP( > + rte_eth_trace_find_next, > + RTE_TRACE_POINT_ARGS(uint16_t port_id), > + rte_trace_point_emit_u16(port_id); > +) > + Why 'rte_eth_trace_find_next' added as fast path? Can you please add comment for all why it is added as fast path, this help to evaluate/review this later. > +RTE_TRACE_POINT_FP( > + rte_eth_trace_find_next_of, > + RTE_TRACE_POINT_ARGS(uint16_t port_id), > + rte_trace_point_emit_u16(port_id); > +) > + Why not record second parameter of the API, "struct rte_device *parent" too? <...> > +RTE_TRACE_POINT_FP( > + rte_eth_trace_hairpin_get_peer_ports, > + RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t *peer_ports, > + size_t len, uint32_t direction, int ret), > + rte_trace_point_emit_u16(port_id); > + rte_trace_point_emit_ptr(peer_ports); > + rte_trace_point_emit_size_t(len); > + rte_trace_point_emit_u32(direction); > + rte_trace_point_emit_int(ret); > +) > + Some of these functions I am not clear why fast path trace point is used, 'rte_eth_trace_hairpin_get_peer_ports' is one of them, can you please comment the justification to the code as suggested above. <...> > +RTE_TRACE_POINT_FP( > + rte_eth_trace_link_get, > + RTE_TRACE_POINT_ARGS(uint16_t port_id, struct rte_eth_link *link), > + uint16_t link_duplex = link->link_duplex; > + uint16_t link_autoneg = link->link_autoneg; > + uint16_t link_status = link->link_status; > + > + rte_trace_point_emit_u16(port_id); > + rte_trace_point_emit_u32(link->link_speed); > + rte_trace_point_emit_u16(link_duplex); > + rte_trace_point_emit_u16(link_autoneg); > + rte_trace_point_emit_u16(link_status); > +) Why creating varible for 'link_duplex' for 'link->link_duplex' but using 'link->link_speed', why not use all as 'link->xxx'? <...> > +RTE_TRACE_POINT_FP( > + rte_eth_trace_xstats_get_names, > + RTE_TRACE_POINT_ARGS(uint16_t port_id, > + struct rte_eth_xstat_name *xstats_names, > + unsigned int size, int cnt_used_entries), > + rte_trace_point_emit_u16(port_id); > + rte_trace_point_emit_string(xstats_names->name); > + rte_trace_point_emit_u32(size); > + rte_trace_point_emit_int(cnt_used_entries); > +) > + 'xstats_names' is an array, whose size is 'cnt_used_entries', just printing 'xstats_names->name' for first element can be misleading, can print all values as done in 'rte_eth_xstats_get()' <...> > +RTE_TRACE_POINT_FP( > + rte_eth_trace_xstats_get, > + RTE_TRACE_POINT_ARGS(uint16_t port_id, struct rte_eth_xstat xstats, > + int i), > + rte_trace_point_emit_u16(port_id); > + rte_trace_point_emit_u64(xstats.id); > + rte_trace_point_emit_u64(xstats.value); > + rte_trace_point_emit_u32(i); > +) > + as far as I can see "id == i", so maybe 'i' can be dropped. <...> > @@ -298,6 +298,72 @@ EXPERIMENTAL { > rte_flow_get_q_aged_flows; > rte_mtr_meter_policy_get; > rte_mtr_meter_profile_get; > + > + # added in 23.03 > + __rte_eth_trace_allmulticast_disable; > + __rte_eth_trace_allmulticast_enable; > + __rte_eth_trace_allmulticast_get; > + __rte_eth_trace_call_rx_callbacks; > + __rte_eth_trace_call_tx_callbacks; > + __rte_eth_trace_find_next; > + __rte_eth_trace_find_next_of; > + __rte_eth_trace_find_next_owned_by; > + __rte_eth_trace_find_next_sibling; > + __rte_eth_trace_hairpin_bind; > + __rte_eth_trace_hairpin_get_peer_ports; > + __rte_eth_trace_hairpin_unbind; > + __rte_eth_trace_iterator_cleanup; > + __rte_eth_trace_iterator_init; > + __rte_eth_trace_iterator_next; > + __rte_eth_trace_link_get; > + __rte_eth_trace_link_get_nowait; > + __rte_eth_trace_link_speed_to_str; > + __rte_eth_trace_link_to_str; > + __rte_eth_trace_promiscuous_disable; > + __rte_eth_trace_promiscuous_enable; > + __rte_eth_trace_promiscuous_get; > + __rte_eth_trace_rx_hairpin_queue_setup; > + __rte_eth_trace_speed_bitflag; > + __rte_eth_trace_stats_get; > + __rte_eth_trace_stats_reset; > + __rte_eth_trace_tx_buffer_count_callback; > + __rte_eth_trace_tx_buffer_drop_callback; > + __rte_eth_trace_tx_buffer_init; > + __rte_eth_trace_tx_buffer_set_err_callback; > + __rte_eth_trace_tx_done_cleanup; > + __rte_eth_trace_tx_hairpin_queue_setup; > + __rte_eth_trace_xstats_get; > + __rte_eth_trace_xstats_get_by_id; > + __rte_eth_trace_xstats_get_id_by_name; > + __rte_eth_trace_xstats_get_names; > + __rte_eth_trace_xstats_get_names_by_id; > + __rte_eth_trace_xstats_reset; > + __rte_ethdev_trace_capability_name; > + __rte_ethdev_trace_count_avail; > + __rte_ethdev_trace_count_total; > + __rte_ethdev_trace_fw_version_get; > + __rte_ethdev_trace_get_name_by_port; > + __rte_ethdev_trace_get_port_by_name; > + __rte_ethdev_trace_get_sec_ctx; > + __rte_ethdev_trace_is_removed; > + __rte_ethdev_trace_is_valid_port; > + __rte_ethdev_trace_owner_delete; > + __rte_ethdev_trace_owner_get; > + __rte_ethdev_trace_owner_new; > + __rte_ethdev_trace_owner_set; > + __rte_ethdev_trace_owner_unset; > + __rte_ethdev_trace_reset; > + __rte_ethdev_trace_rx_offload_name; > + __rte_ethdev_trace_rx_queue_start; > + __rte_ethdev_trace_rx_queue_stop; > + __rte_ethdev_trace_set_link_down; > + __rte_ethdev_trace_set_link_up; > + __rte_ethdev_trace_set_rx_queue_stats_mapping; > + __rte_ethdev_trace_set_tx_queue_stats_mapping; > + __rte_ethdev_trace_socket_id; > + __rte_ethdev_trace_tx_offload_name; > + __rte_ethdev_trace_tx_queue_start; > + __rte_ethdev_trace_tx_queue_stop; Can you please drop these trace objects?