Hello, On Fri, Oct 15, 2021 at 7:27 AM Xiaoyun Li <xiaoyun...@intel.com> wrote: > > In csum forwarding mode, software UDP/TCP csum calculation only takes > the first segment into account while using the whole packet length so > the calculation will read invalid memory region with multi-segments > packets and will get wrong value. > This patch fixes this issue. > > Fixes: af75078fece3 ("first public release") > Cc: sta...@dpdk.org > > Signed-off-by: Xiaoyun Li <xiaoyun...@intel.com> > --- > app/test-pmd/csumonly.c | 31 +++++++++++++++++++++++-------- > 1 file changed, 23 insertions(+), 8 deletions(-) > > diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c > index 090797318a..5df3be0a6f 100644 > --- a/app/test-pmd/csumonly.c > +++ b/app/test-pmd/csumonly.c > @@ -18,7 +18,7 @@ > #include <rte_log.h> > #include <rte_debug.h> > #include <rte_cycles.h> > -#include <rte_memory.h> > +#include <rte_malloc.h>
This include caught my eye. > #include <rte_memcpy.h> > #include <rte_launch.h> > #include <rte_eal.h> > @@ -56,6 +56,11 @@ > #define GRE_SUPPORTED_FIELDS (GRE_CHECKSUM_PRESENT | GRE_KEY_PRESENT |\ > GRE_SEQUENCE_PRESENT) > > +/* When UDP or TCP or outer UDP csum offload is off, sw l4 csum is needed */ > +#define UDP_TCP_CSUM (DEV_TX_OFFLOAD_UDP_CKSUM |\ > + DEV_TX_OFFLOAD_TCP_CKSUM |\ > + DEV_TX_OFFLOAD_OUTER_UDP_CKSUM) > + > /* We cannot use rte_cpu_to_be_16() on a constant in a switch/case */ > #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN > #define _htons(x) ((uint16_t)((((x) & 0x00ffU) << 8) | (((x) & 0xff00U) >> > 8))) > @@ -602,12 +607,8 @@ process_outer_cksums(void *outer_l3_hdr, struct > testpmd_offload_info *info, > /* do not recalculate udp cksum if it was 0 */ > if (udp_hdr->dgram_cksum != 0) { > udp_hdr->dgram_cksum = 0; > - if (info->outer_ethertype == _htons(RTE_ETHER_TYPE_IPV4)) > - udp_hdr->dgram_cksum = > - rte_ipv4_udptcp_cksum(ipv4_hdr, udp_hdr); > - else > - udp_hdr->dgram_cksum = > - rte_ipv6_udptcp_cksum(ipv6_hdr, udp_hdr); > + udp_hdr->dgram_cksum = get_udptcp_checksum(outer_l3_hdr, > + udp_hdr, info->outer_ethertype); > } > > return ol_flags; > @@ -802,6 +803,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) > struct rte_mbuf *m, *p; > struct rte_ether_hdr *eth_hdr; > void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */ > + uint8_t *l3_buf = NULL; > void **gro_ctx; > uint16_t gro_pkts_num; > uint8_t gro_enable; > @@ -877,7 +879,19 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) > rte_ether_addr_copy(&ports[fs->tx_port].eth_addr, > ð_hdr->src_addr); > parse_ethernet(eth_hdr, &info); > - l3_hdr = (char *)eth_hdr + info.l2_len; > + /* When sw csum is needed, multi-segs needs a buf to contain > + * the whole packet for later UDP/TCP csum calculation. > + */ > + if (m->nb_segs > 1 && !(tx_ol_flags & PKT_TX_TCP_SEG) && > + !(tx_offloads & UDP_TCP_CSUM)) { > + l3_buf = rte_zmalloc("csum l3_buf", > + info.pkt_len - info.l2_len, > + RTE_CACHE_LINE_SIZE); Rather than call a dyn allocation in datapath, can't we have a static buffer on the stack? > + rte_pktmbuf_read(m, info.l2_len, > + info.pkt_len - info.l2_len, l3_buf); > + l3_hdr = l3_buf; > + } else > + l3_hdr = (char *)eth_hdr + info.l2_len; > > /* check if it's a supported tunnel */ > if (txp->parse_tunnel) { > @@ -1051,6 +1065,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) > printf("tx: flags=%s", buf); > printf("\n"); > } > + rte_free(l3_buf); > } > > if (unlikely(gro_enable)) { > -- > 2.25.1 > -- David Marchand