Hi Maxime, > -----Original Message----- > From: Maxime Coquelin <maxime.coque...@redhat.com> > Sent: Wednesday, June 8, 2022 8:50 PM > To: dev@dpdk.org; jasow...@redhat.com; Xia, Chenbo <chenbo....@intel.com>; > david.march...@redhat.com; Matz, Olivier <olivier.m...@6wind.com>; Ma, > WenwuX <wenwux...@intel.com>; Zhang, Yuying <yuying.zh...@intel.com>; > Singh, Aman Deep <aman.deep.si...@intel.com> > Cc: Maxime Coquelin <maxime.coque...@redhat.com> > Subject: [PATCH v2 5/6] net/vhost: perform SW checksum in Rx path > > Virtio specification supports host checksum offloading > for L4, which is enabled with VIRTIO_NET_F_CSUM feature > negotiation. However, the Vhost PMD does not advertise > Rx checksum offload capabilities, so we can end-up with > the VIRTIO_NET_F_CSUM feature being negotiated, implying > the Vhost library returns packets with checksum being > offloaded while the application did not request for it. > > Advertising these offload capabilities at the ethdev level > is not enough, because we could still end-up with the > application not enabling these offloads while the guest > still negotiate them. > > This patch advertises the Rx checksum offload capabilities, > and introduces a compatibility layer to cover the case > VIRTIO_NET_F_CSUM has been negotiated but the application > does not configure the Rx checksum offloads. This function > performis the L4 Rx checksum in SW for UDP and TCP. Note
performs > that it is not needed to calculate the pseudo-header > checksum, because the Virtio specification requires that > the driver do it. > > This patch does not advertise SCTP checksum offloading > capability for now, but it could be handled later if the > need arises. > > Reported-by: Jason Wang <jasow...@redhat.com> > Signed-off-by: Maxime Coquelin <maxime.coque...@redhat.com> > --- > doc/guides/nics/features/vhost.ini | 1 + > drivers/net/vhost/rte_eth_vhost.c | 83 ++++++++++++++++++++++++++++++ > 2 files changed, 84 insertions(+) > > diff --git a/doc/guides/nics/features/vhost.ini > b/doc/guides/nics/features/vhost.ini > index ef81abb439..15f4dfe5e8 100644 > --- a/doc/guides/nics/features/vhost.ini > +++ b/doc/guides/nics/features/vhost.ini > @@ -7,6 +7,7 @@ > Link status = Y > Free Tx mbuf on demand = Y > Queue status event = Y > +L4 checksum offload = P > Basic stats = Y > Extended stats = Y > x86-32 = Y > diff --git a/drivers/net/vhost/rte_eth_vhost.c > b/drivers/net/vhost/rte_eth_vhost.c > index e931d59053..42f0d52ebc 100644 > --- a/drivers/net/vhost/rte_eth_vhost.c > +++ b/drivers/net/vhost/rte_eth_vhost.c > @@ -12,6 +12,7 @@ > #include <ethdev_vdev.h> > #include <rte_malloc.h> > #include <rte_memcpy.h> > +#include <rte_net.h> > #include <rte_bus_vdev.h> > #include <rte_kvargs.h> > #include <rte_vhost.h> > @@ -85,10 +86,12 @@ struct pmd_internal { > char *iface_name; > uint64_t flags; > uint64_t disable_flags; > + uint64_t features; > uint16_t max_queues; > int vid; > rte_atomic32_t started; > bool vlan_strip; > + bool rx_sw_csum; > }; > > struct internal_list { > @@ -275,6 +278,70 @@ vhost_dev_xstats_get(struct rte_eth_dev *dev, struct > rte_eth_xstat *xstats, > return nstats; > } > > +static void > +vhost_dev_csum_configure(struct rte_eth_dev *eth_dev) > +{ > + struct pmd_internal *internal = eth_dev->data->dev_private; > + const struct rte_eth_rxmode *rxmode = ð_dev->data- > >dev_conf.rxmode; > + > + internal->rx_sw_csum = false; > + > + /* SW checksum is not compatible with legacy mode */ > + if (!(internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS)) > + return; > + > + if (internal->features & (1ULL << VIRTIO_NET_F_CSUM)) { > + if (!(rxmode->offloads & > + (RTE_ETH_RX_OFFLOAD_UDP_CKSUM | > RTE_ETH_RX_OFFLOAD_TCP_CKSUM))) { > + VHOST_LOG(NOTICE, "Rx csum will be done in SW, may > impact performance."); Missing \n With above fixed: Reviewed-by: Chenbo Xia <chenbo....@intel.com> > + internal->rx_sw_csum = true; > + } > + } > +} > + > +static void > +vhost_dev_rx_sw_csum(struct rte_mbuf *mbuf) > +{ > + struct rte_net_hdr_lens hdr_lens; > + uint32_t ptype, hdr_len; > + uint16_t csum = 0, csum_offset; > + > + /* Return early if the L4 checksum was not offloaded */ > + if ((mbuf->ol_flags & RTE_MBUF_F_RX_L4_CKSUM_MASK) != > RTE_MBUF_F_RX_L4_CKSUM_NONE) > + return; > + > + ptype = rte_net_get_ptype(mbuf, &hdr_lens, RTE_PTYPE_ALL_MASK); > + > + hdr_len = hdr_lens.l2_len + hdr_lens.l3_len; > + > + switch (ptype & RTE_PTYPE_L4_MASK) { > + case RTE_PTYPE_L4_TCP: > + csum_offset = offsetof(struct rte_tcp_hdr, cksum) + hdr_len; > + break; > + case RTE_PTYPE_L4_UDP: > + csum_offset = offsetof(struct rte_udp_hdr, dgram_cksum) + > hdr_len; > + break; > + default: > + /* Unsupported packet type */ > + return; > + } > + > + /* The pseudo-header checksum is already performed, as per Virtio > spec */ > + if (rte_raw_cksum_mbuf(mbuf, hdr_len, rte_pktmbuf_pkt_len(mbuf) - > hdr_len, &csum) < 0) > + return; > + > + csum = ~csum; > + /* See RFC768 */ > + if (unlikely((ptype & RTE_PTYPE_L4_UDP) && csum == 0)) > + csum = 0xffff; > + > + if (rte_pktmbuf_data_len(mbuf) >= csum_offset + 1) > + *rte_pktmbuf_mtod_offset(mbuf, uint16_t *, csum_offset) = csum; > + > + mbuf->ol_flags &= ~RTE_MBUF_F_RX_L4_CKSUM_MASK; > + mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; > +} > + > static uint16_t > eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) > { > @@ -315,6 +382,9 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t > nb_bufs) > if (r->internal->vlan_strip) > rte_vlan_strip(bufs[i]); > > + if (r->internal->rx_sw_csum) > + vhost_dev_rx_sw_csum(bufs[i]); > + > r->stats.bytes += bufs[i]->pkt_len; > } > > @@ -711,6 +781,11 @@ new_device(int vid) > eth_dev->data->numa_node = newnode; > #endif > > + if (rte_vhost_get_negotiated_features(vid, &internal->features)) { > + VHOST_LOG(ERR, "Failed to get device features\n"); > + return -1; > + } > + > internal->vid = vid; > if (rte_atomic32_read(&internal->started) == 1) { > queue_setup(eth_dev, internal); > @@ -733,6 +808,8 @@ new_device(int vid) > > eth_dev->data->dev_link.link_status = RTE_ETH_LINK_UP; > > + vhost_dev_csum_configure(eth_dev); > + > rte_atomic32_set(&internal->dev_attached, 1); > update_queuing_status(eth_dev); > > @@ -1039,6 +1116,8 @@ eth_dev_configure(struct rte_eth_dev *dev) > > internal->vlan_strip = !!(rxmode->offloads & > RTE_ETH_RX_OFFLOAD_VLAN_STRIP); > > + vhost_dev_csum_configure(dev); > + > return 0; > } > > @@ -1189,6 +1268,10 @@ eth_dev_info(struct rte_eth_dev *dev, > dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS | > RTE_ETH_TX_OFFLOAD_VLAN_INSERT; > dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP; > + if (internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS) { > + dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_UDP_CKSUM | > + RTE_ETH_RX_OFFLOAD_TCP_CKSUM; > + } > > return 0; > } > -- > 2.35.3