On 6/8/21 3:29 PM, Olivier Matz wrote: > Hi Ferruh, Andrew, > > On Tue, Jun 08, 2021 at 01:23:33PM +0300, Andrew Rybchenko wrote: >> On 4/30/21 6:42 PM, Ferruh Yigit wrote: >>> On 4/27/2021 2:57 PM, Olivier Matz wrote: >>>> Since commit d5df2ae0428a ("net: fix unneeded replacement of TCP >>>> checksum 0"), the functions rte_ipv4_udptcp_cksum() and >>>> rte_ipv6_udptcp_cksum() can return either 0x0000 or 0xffff when used to >>>> verify a packet containing a valid checksum. >>>> >>>> Since these functions should be used to calculate the checksum to set in >>>> a packet, introduce 2 new helpers for checksum verification. They return >>>> 0 if the checksum is valid in the packet. >>>> >>>> Use this new helper in net/tap driver. >>>> >>>> Signed-off-by: Olivier Matz <olivier.m...@6wind.com> >>>> --- >>>> drivers/net/tap/rte_eth_tap.c | 7 +- >>>> lib/net/rte_ip.h | 124 +++++++++++++++++++++++++++------- >>>> 2 files changed, 104 insertions(+), 27 deletions(-) >>>> >>>> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c >>>> index 71282e8065..b14d5a1d55 100644 >>>> --- a/drivers/net/tap/rte_eth_tap.c >>>> +++ b/drivers/net/tap/rte_eth_tap.c >>>> @@ -365,11 +365,12 @@ tap_verify_csum(struct rte_mbuf *mbuf) >>>> return; >>>> } >>>> } >>>> - cksum = rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr); >>>> + cksum_ok = !rte_ipv4_udptcp_cksum_verify(l3_hdr, >>>> + l4_hdr); >>>> } else { /* l3 == RTE_PTYPE_L3_IPV6, checked above */ >>>> - cksum = rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr); >>>> + cksum_ok = !rte_ipv6_udptcp_cksum_verify(l3_hdr, >>>> + l4_hdr); >>>> } >>>> - cksum_ok = (cksum == 0) || (cksum == 0xffff); >>>> mbuf->ol_flags |= cksum_ok ? >>>> PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD; >>>> } >>>> diff --git a/lib/net/rte_ip.h b/lib/net/rte_ip.h >>>> index 8c189009b0..ef84bcc5bf 100644 >>>> --- a/lib/net/rte_ip.h >>>> +++ b/lib/net/rte_ip.h >>>> @@ -344,20 +344,10 @@ rte_ipv4_phdr_cksum(const struct rte_ipv4_hdr >>>> *ipv4_hdr, uint64_t ol_flags) >>>> } >>>> >>>> /** >>>> - * Process the IPv4 UDP or TCP checksum. >>>> - * >>>> - * The IP and layer 4 checksum must be set to 0 in the packet by >>>> - * the caller. >>>> - * >>>> - * @param ipv4_hdr >>>> - * The pointer to the contiguous IPv4 header. >>>> - * @param l4_hdr >>>> - * The pointer to the beginning of the L4 header. >>>> - * @return >>>> - * The complemented checksum to set in the IP packet. >>>> + * @internal Calculate the non-complemented IPv4 L4 checksum >>>> */ >>>> static inline uint16_t >>>> -rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void >>>> *l4_hdr) >>>> +__rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void >>>> *l4_hdr) >>>> { >>>> uint32_t cksum; >>>> uint32_t l3_len, l4_len; >>>> @@ -374,16 +364,62 @@ rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr >>>> *ipv4_hdr, const void *l4_hdr) >>>> cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0); >>>> >>>> cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); >>>> - cksum = (~cksum) & 0xffff; >>>> + >>>> + return (uint16_t)cksum; >>>> +} >>>> + >>>> +/** >>>> + * Process the IPv4 UDP or TCP checksum. >>>> + * >>>> + * The IP and layer 4 checksum must be set to 0 in the packet by >>>> + * the caller. >>>> + * >>>> + * @param ipv4_hdr >>>> + * The pointer to the contiguous IPv4 header. >>>> + * @param l4_hdr >>>> + * The pointer to the beginning of the L4 header. >>>> + * @return >>>> + * The complemented checksum to set in the IP packet. >>>> + */ >>>> +static inline uint16_t >>>> +rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void >>>> *l4_hdr) >>>> +{ >>>> + uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr); >>>> + >>>> + cksum = ~cksum; >>>> + >>>> /* >>>> - * Per RFC 768:If the computed checksum is zero for UDP, >>>> + * Per RFC 768: If the computed checksum is zero for UDP, >>>> * it is transmitted as all ones >>>> * (the equivalent in one's complement arithmetic). >>>> */ >>>> if (cksum == 0 && ipv4_hdr->next_proto_id == IPPROTO_UDP) >>>> cksum = 0xffff; >>>> >>>> - return (uint16_t)cksum; >>>> + return cksum; >>>> +} >>>> + >>>> +/** >>>> + * Validate the IPv4 UDP or TCP checksum. >>>> + * >>>> + * @param ipv4_hdr >>>> + * The pointer to the contiguous IPv4 header. >>>> + * @param l4_hdr >>>> + * The pointer to the beginning of the L4 header. >>>> + * @return >>>> + * Return 0 if the checksum is correct, else -1. >>>> + */ >>>> +__rte_experimental >>>> +static inline int >>>> +rte_ipv4_udptcp_cksum_verify(const struct rte_ipv4_hdr *ipv4_hdr, >>>> + const void *l4_hdr) >>>> +{ >>>> + uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr); >>>> + >>>> + if (cksum != 0xffff) >>>> + return -1; >>>> + >>>> + return 0; >>> >>> There is behavior change in tap verify, I am asking just to verify if >>> expected, >>> >>> Previously for UDP, if calculated checksum is '0', the >>> 'rte_ipv4_udptcp_cksum()' >>> returns 0xFFFF. >>> And 0xFFFF is taken as good checksum by tap PMD. > > rte_ipv4_udptcp_cksum() cannot return 0xFFFF: this is only possible if > all data is 0. Before verifying a udp packet, the user must check that > it is not 0 (which means no checksum). In tcp, "Data offset" is never > 0. Moreover, port=0 is a reserved value for both udp and tcp. > >>> With new 'rte_ipv4_udptcp_cksum_verify()', if calculated checksum is '0' it >>> will >>> be taken as bad checksum. >>> >>> I don't know if calculated checksum with valid checksum in place can return >>> 0. >>> >>> >>> Also for TCP, 'rte_ipv4_udptcp_cksum_verify()' doesn't have inversion >>> (cksum = >>> ~cksum;) seems changing pass/fail status of the checksum, unless I am not >>> missing anything here. >> >> Yes, it looks suspicious to me as well. >> >> Olivier, could you clarify, please. > > Before commit d5df2ae0428a ("net: fix unneeded replacement of TCP checksum > 0"), > the behavior was: > > // rte_ipv4_udptcp_cksum() is 0xffff if checksum is valid > // so cksum is 0 if checksum is valid > cksum = ~rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr); > // ol_flags is set to PKT_RX_L4_CKSUM_GOOD if checksum is valid > mbuf->ol_flags |= cksum ? PKT_RX_L4_CKSUM_BAD : PKT_RX_L4_CKSUM_GOOD; > > After commit d5df2ae0428a ("net: fix unneeded replacement of TCP checksum 0"), > it is broken: > > // rte_ipv4_udptcp_cksum() is 0 (tcp) or 0xffff (udp) if checksum is valid > // so cksum is 0xffff (tcp) or 0 (udp) if checksum is valid > cksum = ~rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr); > // ol_flags is set to BAD (tcp) or GOOD (udp) if checksum is valid > mbuf->ol_flags |= cksum ? PKT_RX_L4_CKSUM_BAD : PKT_RX_L4_CKSUM_GOOD; > > After patch 2/4 ("net/tap: fix Rx cksum flags on TCP packets"), the > correct behavior is restored: > > // cksum is 0 (tcp) or 0xffff (udp) if checksum is valid > // note: 0xffff for tcp cannot happen (there is at least 1 bit set in the > header) > // note: 0 for udp cannot happen (it is replaced by in > rte_ipv4_udptcp_cksum()) > cksum = rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr); > // cksum_ok is 1 if checksum is valid > cksum_ok = (cksum == 0) || (cksum == 0xffff); > // ol_flags is set to GOOD if checksum is valid > mbuf->ol_flags |= cksum_ok ? PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD; > > After this patch [3/4] ("net: introduce functions to verify L4 checksums"), > it is simplified by using rte_ipv4_udptcp_cksum_verify(): > > // cksum_ok is 1 if checksum is valid > cksum_ok = !rte_ipv4_udptcp_cksum_verify(l3_hdr, l4_hdr); > // ol_flags is set to GOOD if checksum is valid > mbuf->ol_flags |= cksum_ok ? PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD; >
Many thanks for the detailed explanations. It replies to all my questions (even not asked, but kept in my head). Andrew. > Olivier > >> >>>> } >>>> >>>> /** >>>> @@ -448,6 +484,25 @@ rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr >>>> *ipv6_hdr, uint64_t ol_flags) >>>> return __rte_raw_cksum_reduce(sum); >>>> } >>>> >>>> +/** >>>> + * @internal Calculate the non-complemented IPv4 L4 checksum >>>> + */ >>>> +static inline uint16_t >>>> +__rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void >>>> *l4_hdr) >>>> +{ >>>> + uint32_t cksum; >>>> + uint32_t l4_len; >>>> + >>>> + l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len); >>>> + >>>> + cksum = rte_raw_cksum(l4_hdr, l4_len); >>>> + cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0); >>>> + >>>> + cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); >>>> + >>>> + return (uint16_t)cksum; >>>> +} >>>> + >>>> /** >>>> * Process the IPv6 UDP or TCP checksum. >>>> * >>>> @@ -464,16 +519,10 @@ rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr >>>> *ipv6_hdr, uint64_t ol_flags) >>>> static inline uint16_t >>>> rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void >>>> *l4_hdr) >>>> { >>>> - uint32_t cksum; >>>> - uint32_t l4_len; >>>> - >>>> - l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len); >>>> + uint16_t cksum = __rte_ipv6_udptcp_cksum(ipv6_hdr, l4_hdr); >>>> >>>> - cksum = rte_raw_cksum(l4_hdr, l4_len); >>>> - cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0); >>>> + cksum = ~cksum; >>>> >>>> - cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); >>>> - cksum = (~cksum) & 0xffff; >>>> /* >>>> * Per RFC 768: If the computed checksum is zero for UDP, >>>> * it is transmitted as all ones >>>> @@ -482,7 +531,34 @@ rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr >>>> *ipv6_hdr, const void *l4_hdr) >>>> if (cksum == 0 && ipv6_hdr->proto == IPPROTO_UDP) >>>> cksum = 0xffff; >>>> >>>> - return (uint16_t)cksum; >>>> + return cksum; >>>> +} >>>> + >>>> +/** >>>> + * Validate the IPv6 UDP or TCP checksum. >>>> + * >>>> + * The function accepts a 0 checksum, since it can exceptionally happen. >>>> See 8.1 >>>> + * (Upper-Layer Checksums) in RFC 8200. >>>> + * >>>> + * @param ipv6_hdr >>>> + * The pointer to the contiguous IPv6 header. >>>> + * @param l4_hdr >>>> + * The pointer to the beginning of the L4 header. >>>> + * @return >>>> + * Return 0 if the checksum is correct, else -1. >>>> + */ >>>> +__rte_experimental >>>> +static inline int >>>> +rte_ipv6_udptcp_cksum_verify(const struct rte_ipv6_hdr *ipv6_hdr, >>>> + const void *l4_hdr) >>>> +{ >>>> + uint16_t cksum; >>>> + >>>> + cksum = __rte_ipv6_udptcp_cksum(ipv6_hdr, l4_hdr); >>>> + if (cksum != 0xffff) >>>> + return -1; >>>> + >>>> + return 0; >>>> } >>> >>> Nitpicking but, 'rte_ipv4_udptcp_cksum_verify()' is almost same with this >>> function ('rte_ipv6_udptcp_cksum_verify()') but they have different line >>> spacing, can be good to have similar syntax for both. >>> >>