Hi David > -----Original Message----- > From: David Marchand <david.march...@redhat.com> > Sent: Friday, May 5, 2023 1:31 PM > > As reported by Ilya [1], unconditionally calling > rte_flow_get_restore_info() impacts an application performance for drivers > that do not provide this ops. > It could also impact processing of packets that require no call to > rte_flow_get_restore_info() at all. > > Advertise in mbuf (via a dynamic flag) whether the driver has more > metadata to provide via rte_flow_get_restore_info(). > The application can then call it only when required. > > Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8- > 7f659bfa4...@ovn.org/ > Signed-off-by: David Marchand <david.march...@redhat.com> > --- > Note: I did not test this RFC patch yet but I hope we can resume and > maybe conclude on the discussion for the tunnel offloading API. >
I think your approach has a good base but what happens if it is not relevant for all flows? In this case your solution will not work. In any case I think there are some issues with the implementation. > --- > app/test-pmd/util.c | 9 +++++---- > drivers/net/mlx5/linux/mlx5_os.c | 14 ++++++++++---- > drivers/net/mlx5/mlx5.h | 3 ++- > drivers/net/mlx5/mlx5_flow.c | 26 ++++++++++++++++++++------ > drivers/net/mlx5/mlx5_rx.c | 2 +- > drivers/net/mlx5/mlx5_rx.h | 1 + > drivers/net/mlx5/mlx5_trigger.c | 4 ++-- > drivers/net/sfc/sfc_dp.c | 9 ++------- > lib/ethdev/ethdev_driver.h | 8 ++++++++ > lib/ethdev/rte_flow.c | 29 +++++++++++++++++++++++++++++ > lib/ethdev/rte_flow.h | 19 ++++++++++++++++++- > lib/ethdev/version.map | 4 ++++ > 12 files changed, 102 insertions(+), 26 deletions(-) > > diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c > index f9df5f69ef..5aa69ed545 100644 > --- a/app/test-pmd/util.c > +++ b/app/test-pmd/util.c > @@ -88,18 +88,20 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, > struct rte_mbuf *pkts[], > char print_buf[MAX_STRING_LEN]; > size_t buf_size = MAX_STRING_LEN; > size_t cur_len = 0; > + uint64_t restore_info_dynflag; > > if (!nb_pkts) > return; > + restore_info_dynflag = rte_flow_restore_info_dynflag(); > MKDUMPSTR(print_buf, buf_size, cur_len, > "port %u/queue %u: %s %u packets\n", port_id, queue, > is_rx ? "received" : "sent", (unsigned int) nb_pkts); > for (i = 0; i < nb_pkts; i++) { > - int ret; > struct rte_flow_error error; > struct rte_flow_restore_info info = { 0, }; > > mb = pkts[i]; > + ol_flags = mb->ol_flags; > if (rxq_share > 0) > MKDUMPSTR(print_buf, buf_size, cur_len, "port %u, ", > mb->port); > @@ -107,8 +109,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, > struct rte_mbuf *pkts[], > eth_type = RTE_BE_TO_CPU_16(eth_hdr->ether_type); > packet_type = mb->packet_type; > is_encapsulation = RTE_ETH_IS_TUNNEL_PKT(packet_type); > - ret = rte_flow_get_restore_info(port_id, mb, &info, &error); > - if (!ret) { > + if ((ol_flags & restore_info_dynflag) != 0 && > + rte_flow_get_restore_info(port_id, mb, &info, > &error) == 0) { > MKDUMPSTR(print_buf, buf_size, cur_len, > "restore info:"); > if (info.flags & RTE_FLOW_RESTORE_INFO_TUNNEL) { > @@ -153,7 +155,6 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, > struct rte_mbuf *pkts[], > " - pool=%s - type=0x%04x - length=%u - > nb_segs=%d", > mb->pool->name, eth_type, (unsigned int) mb- > >pkt_len, > (int)mb->nb_segs); > - ol_flags = mb->ol_flags; > if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) { > MKDUMPSTR(print_buf, buf_size, cur_len, > " - RSS hash=0x%x", > diff --git a/drivers/net/mlx5/linux/mlx5_os.c > b/drivers/net/mlx5/linux/mlx5_os.c > index 980234e2ac..e6e3784013 100644 > --- a/drivers/net/mlx5/linux/mlx5_os.c > +++ b/drivers/net/mlx5/linux/mlx5_os.c > @@ -575,11 +575,17 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv) > goto error; > } > #endif > - if (!sh->tunnel_hub && sh->config.dv_miss_info) > + if (!sh->tunnel_hub && sh->config.dv_miss_info) { > + err = mlx5_flow_restore_info_register(); > + if (err) { > + DRV_LOG(ERR, "Could not register mbuf dynflag for > rte_flow_get_restore_info"); > + goto error; > + } > err = mlx5_alloc_tunnel_hub(sh); > - if (err) { > - DRV_LOG(ERR, "mlx5_alloc_tunnel_hub failed err=%d", err); > - goto error; > + if (err) { > + DRV_LOG(ERR, "mlx5_alloc_tunnel_hub failed > err=%d", err); > + goto error; > + } > } > if (sh->config.reclaim_mode == MLX5_RCM_AGGR) { > mlx5_glue->dr_reclaim_domain_memory(sh->rx_domain, 1); > diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h > index 9eae692037..77cdc802da 100644 > --- a/drivers/net/mlx5/mlx5.h > +++ b/drivers/net/mlx5/mlx5.h > @@ -2116,7 +2116,8 @@ int mlx5_flow_query_counter(struct rte_eth_dev > *dev, struct rte_flow *flow, > int mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev, struct rte_flow > *flow, > FILE *file, struct rte_flow_error *error); > #endif > -void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev); > +int mlx5_flow_restore_info_register(void); > +void mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev); > int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts, > uint32_t nb_contexts, struct rte_flow_error *error); > int mlx5_validate_action_ct(struct rte_eth_dev *dev, > diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c > index d0275fdd00..715b7d327d 100644 > --- a/drivers/net/mlx5/mlx5_flow.c > +++ b/drivers/net/mlx5/mlx5_flow.c > @@ -1779,6 +1779,20 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev) > priv->sh->shared_mark_enabled = 0; > } > > +static uint64_t mlx5_restore_info_dynflag; > + > +int > +mlx5_flow_restore_info_register(void) > +{ > + int err = 0; > + > + if (mlx5_restore_info_dynflag == 0) { > + if > (rte_flow_restore_info_dynflag_register(&mlx5_restore_info_dynflag) < 0) > + err = ENOMEM; > + } > + return err; > +} > + > /** > * Set the Rx queue dynamic metadata (mask and offset) for a flow > * > @@ -1786,7 +1800,7 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev) > * Pointer to the Ethernet device structure. > */ > void > -mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev) > +mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev) Why did you change the name? > { > struct mlx5_priv *priv = dev->data->dev_private; > unsigned int i; > @@ -1809,6 +1823,9 @@ mlx5_flow_rxq_dynf_metadata_set(struct > rte_eth_dev *dev) > data->flow_meta_offset = > rte_flow_dynf_metadata_offs; > data->flow_meta_port_mask = priv->sh- > >dv_meta_mask; > } > + data->mark_flag = RTE_MBUF_F_RX_FDIR_ID; > + if (is_tunnel_offload_active(dev)) > + data->mark_flag |= mlx5_restore_info_dynflag; > } > } > > @@ -11453,11 +11470,8 @@ mlx5_flow_tunnel_get_restore_info(struct > rte_eth_dev *dev, > const struct mlx5_flow_tbl_data_entry *tble; > const uint64_t mask = RTE_MBUF_F_RX_FDIR | > RTE_MBUF_F_RX_FDIR_ID; > > - if (!is_tunnel_offload_active(dev)) { > - info->flags = 0; > - return 0; > - } > - > + if ((ol_flags & mlx5_restore_info_dynflag) == 0) > + goto err; > if ((ol_flags & mask) != mask) > goto err; > tble = tunnel_mark_decode(dev, m->hash.fdir.hi); > diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c > index a2be523e9e..71c4638251 100644 > --- a/drivers/net/mlx5/mlx5_rx.c > +++ b/drivers/net/mlx5/mlx5_rx.c > @@ -857,7 +857,7 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct > rte_mbuf *pkt, > if (MLX5_FLOW_MARK_IS_VALID(mark)) { > pkt->ol_flags |= RTE_MBUF_F_RX_FDIR; > if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) { > - pkt->ol_flags |= RTE_MBUF_F_RX_FDIR_ID; > + pkt->ol_flags |= rxq->mark_flag; > pkt->hash.fdir.hi = > mlx5_flow_mark_get(mark); > } > } > diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h > index 52c35c83f8..3514edd84e 100644 > --- a/drivers/net/mlx5/mlx5_rx.h > +++ b/drivers/net/mlx5/mlx5_rx.h > @@ -136,6 +136,7 @@ struct mlx5_rxq_data { > struct mlx5_uar_data uar_data; /* CQ doorbell. */ > uint32_t cqn; /* CQ number. */ > uint8_t cq_arm_sn; /* CQ arm seq number. */ > + uint64_t mark_flag; /* ol_flags to set with marks. */ > uint32_t tunnel; /* Tunnel information. */ > int timestamp_offset; /* Dynamic mbuf field for timestamp. */ > uint64_t timestamp_rx_flag; /* Dynamic mbuf flag for timestamp. */ > diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c > index bbaa7d2aa0..7bdb897612 100644 > --- a/drivers/net/mlx5/mlx5_trigger.c > +++ b/drivers/net/mlx5/mlx5_trigger.c > @@ -1282,8 +1282,8 @@ mlx5_dev_start(struct rte_eth_dev *dev) > dev->data->port_id); > goto error; > } > - /* Set a mask and offset of dynamic metadata flows into Rx queues. > */ > - mlx5_flow_rxq_dynf_metadata_set(dev); > + /* Set dynamic fields and flags into Rx queues. */ > + mlx5_flow_rxq_dynf_set(dev); > /* Set flags and context to convert Rx timestamps. */ > mlx5_rxq_timestamp_set(dev); > /* Set a mask and offset of scheduling on timestamp into Tx queues. > */ > diff --git a/drivers/net/sfc/sfc_dp.c b/drivers/net/sfc/sfc_dp.c > index 9f2093b353..8b2dbea325 100644 > --- a/drivers/net/sfc/sfc_dp.c > +++ b/drivers/net/sfc/sfc_dp.c > @@ -11,6 +11,7 @@ > #include <string.h> > #include <errno.h> > > +#include <ethdev_driver.h> > #include <rte_log.h> > #include <rte_mbuf_dyn.h> > > @@ -135,12 +136,8 @@ sfc_dp_ft_ctx_id_register(void) > .size = sizeof(uint8_t), > .align = __alignof__(uint8_t), > }; > - static const struct rte_mbuf_dynflag ft_ctx_id_valid = { > - .name = "rte_net_sfc_dynflag_ft_ctx_id_valid", > - }; > > int field_offset; > - int flag; > > SFC_GENERIC_LOG(INFO, "%s() entry", __func__); > > @@ -156,15 +153,13 @@ sfc_dp_ft_ctx_id_register(void) > return -1; > } > > - flag = rte_mbuf_dynflag_register(&ft_ctx_id_valid); > - if (flag < 0) { > + if (rte_flow_restore_info_dynflag_register(&sfc_dp_ft_ctx_id_valid) < > 0) { > SFC_GENERIC_LOG(ERR, "%s() failed to register ft_ctx_id > dynflag", > __func__); > return -1; > } > > sfc_dp_ft_ctx_id_offset = field_offset; > - sfc_dp_ft_ctx_id_valid = UINT64_C(1) << flag; > > SFC_GENERIC_LOG(INFO, "%s() done", __func__); > > diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h > index 2c9d615fb5..6c17d84d1b 100644 > --- a/lib/ethdev/ethdev_driver.h > +++ b/lib/ethdev/ethdev_driver.h > @@ -1949,6 +1949,14 @@ __rte_internal > int > rte_eth_ip_reassembly_dynfield_register(int *field_offset, int *flag); > > +/** > + * @internal > + * Register mbuf dynamic flag for rte_flow_get_restore_info. > + */ > +__rte_internal > +int > +rte_flow_restore_info_dynflag_register(uint64_t *flag); > + > > /* > * Legacy ethdev API used internally by drivers. > diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c > index 69e6e749f7..10cd9d12ba 100644 > --- a/lib/ethdev/rte_flow.c > +++ b/lib/ethdev/rte_flow.c > @@ -1401,6 +1401,35 @@ rte_flow_get_restore_info(uint16_t port_id, > NULL, rte_strerror(ENOTSUP)); > } > > +static struct { > + const struct rte_mbuf_dynflag desc; > + uint64_t value; > +} flow_restore_info_dynflag = { > + .desc = { .name = "RTE_MBUF_F_RX_RESTORE_INFO", }, > +}; Why not have this structure in the register function? > + > +uint64_t > +rte_flow_restore_info_dynflag(void) > +{ > + return flow_restore_info_dynflag.value; > +} > + > +int > +rte_flow_restore_info_dynflag_register(uint64_t *flag) > +{ > + if (flow_restore_info_dynflag.value == 0) { > + int offset = > rte_mbuf_dynflag_register(&flow_restore_info_dynflag.desc); > + > + if (offset < 0) > + return -1; > + flow_restore_info_dynflag.value = RTE_BIT64(offset); > + } > + if (*flag) > + *flag = rte_flow_restore_info_dynflag(); > + > + return 0; > +} > + > int > rte_flow_tunnel_action_decap_release(uint16_t port_id, > struct rte_flow_action *actions, > diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h > index 713ba8b65c..5ce2db4bbd 100644 > --- a/lib/ethdev/rte_flow.h > +++ b/lib/ethdev/rte_flow.h > @@ -4918,7 +4918,24 @@ rte_flow_tunnel_match(uint16_t port_id, > struct rte_flow_error *error); > > /** > - * Populate the current packet processing state, if exists, for the given > mbuf. > + * On reception of a mbuf from HW, a call to rte_flow_get_restore_info() > may be > + * required to retrieve some metadata. > + * This function returns the associated mbuf ol_flags. > + * > + * Note: the dynamic flag is registered during the probing of the first > device > + * that requires it. If this function returns a non 0 value, this value won't > + * change for the rest of the life of the application. > + * > + * @return > + * The offload flag indicating rte_flow_get_restore_info() must be called. > + */ > +__rte_experimental > +uint64_t > +rte_flow_restore_info_dynflag(void); > + > +/** > + * If a mbuf contains the rte_flow_restore_info_dynflag() flag in ol_flags, > + * populate the current packet processing state. > * > * One should negotiate tunnel metadata delivery from the NIC to the HW. > * @see rte_eth_rx_metadata_negotiate() > diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map > index 357d1a88c0..bf5668e928 100644 > --- a/lib/ethdev/version.map > +++ b/lib/ethdev/version.map > @@ -299,6 +299,9 @@ EXPERIMENTAL { > rte_flow_action_handle_query_update; > rte_flow_async_action_handle_query_update; > rte_flow_async_create_by_index; > + > + # added in 23.07 > + rte_flow_restore_info_dynflag; > }; > > INTERNAL { > @@ -328,4 +331,5 @@ INTERNAL { > rte_eth_representor_id_get; > rte_eth_switch_domain_alloc; > rte_eth_switch_domain_free; > + rte_flow_restore_info_dynflag_register; > }; > -- > 2.40.0 Best, Ori