On Fri, 3 Jun 2022 08:53:59 -0700
Stephen Hemminger <step...@networkplumber.org> wrote:

> On Fri, 25 Mar 2022 20:58:09 +0530
> madhuker.myt...@oracle.com wrote:
> 
> > From: Madhuker Mythri <madhuker.myt...@oracle.com>
> > 
> > As per analysis on DPDK Tap PMD, the existing RSS algorithm considering 
> > 4-tuple(Src-IP, Dst-IP, Src-port and Dst-port) and identification of 
> > fragment packets is not done, thus we are seeing all the fragmented chunks 
> > of single packet differs RSS hash value and distributed across multiple 
> > queues.
> > The RSS algorithm assumes that, all the incoming IP packets are based on 
> > L4-protocol(UDP/TCP) and trying to fetch the L4 fields(Src-port and 
> > Dst-port) for each incoming packet, but for the fragmented packets these 
> > L4-header will not be present(except for first packet) and should not 
> > consider in RSS hash for L4 header fields in-case of fragmented packets.
> > Which is a bug in the RSS algorithm implemented in the BPF functionality 
> > under TAP PMD.
> > 
> > So, modified the RSS eBPF C-program and generated the structure of C-array 
> > in the 'tap_bpf_insns.h' file, which is in eBPF byte-code instructions 
> > format.
> > 
> > Bugzilla Id: 870
> > 
> > Signed-off-by: Madhuker Mythri <madhuker.myt...@oracle.com>
> > ---
> >  drivers/net/tap/tap_bpf_insns.h   | 3371 +++++++++++++++--------------
> >  drivers/net/tap/tap_bpf_program.c |   48 +-
> >  2 files changed, 1743 insertions(+), 1676 deletions(-)
> >  
> 
> Going back to the original RSS specs on Windows:
> https://docs.microsoft.com/en-us/windows-hardware/drivers/network/rss-hashing-types
> 
> There is note there:
> 
>       If a NIC receives a packet that has both IP and TCP headers, 
> NDIS_HASH_TCP_IPV4 should not always
>       be used. In the case of a fragmented IP packet, NDIS_HASH_IPV4 must be 
> used. 
>       This includes the first fragment which contains both IP and TCP headers.
> 
> The modified BPF program in this patch does not do that exactly.
> Adding port of 0 is not the same hashing a smaller tuple.
> 
> IPV6 fragments need similar fix?

Something like the following (totally untested)...

diff --git a/drivers/net/tap/tap_bpf_program.c 
b/drivers/net/tap/tap_bpf_program.c
index 20c310e5e7ba..db6b32b9003b 100644
--- a/drivers/net/tap/tap_bpf_program.c
+++ b/drivers/net/tap/tap_bpf_program.c
@@ -170,10 +170,19 @@ rss_l3_l4(struct __sk_buff *skb)
                        .dport = PORT(*(src_dst_port + 2),
                                        *(src_dst_port + 3)),
                };
+               __u16 *frag_off_addr = data + off + offsetof(struct iphdr, 
frag_off);
+               __u16 frag_off = PORT(frag_off_addr[0], frag_off_addr[1]);
+               __u8 *proto_addr = data + off + offsetof(struct iphdr, 
protocol);
+               __u8 protocol = *proto_addr;
                __u8 input_len = sizeof(v4_tuple) / sizeof(__u32);
-               if (rsskey->hash_fields & (1 << HASH_FIELD_IPV4_L3))
+
+               /* If only want L3 or fragmented or not tcp/udp then do L3 only 
*/
+               if ((rsskey->hash_fields & (1 << HASH_FIELD_IPV4_L3)) ||
+                   (frag_off & 0x3fff) ||
+                   !(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP))
                        input_len--;
-               hash = rte_softrss_be((__u32 *)&v4_tuple, key, 3);
+
+               hash = rte_softrss_be((__u32 *)&v4_tuple, key, input_len);
        } else if (proto == htons(ETH_P_IPV6)) {
                if (data + off + sizeof(struct ipv6hdr) +
                                        sizeof(__u32) > data_end)
@@ -182,6 +191,8 @@ rss_l3_l4(struct __sk_buff *skb)
                                        offsetof(struct ipv6hdr, saddr);
                __u8 *src_dst_port = data + off +
                                        sizeof(struct ipv6hdr);
+               __u8 *nexthdr = data + off + offsetof(struct ipv6hdr, nexthdr);
+
                struct ipv6_l3_l4_tuple v6_tuple;
                for (j = 0; j < 4; j++)
                        *((uint32_t *)&v6_tuple.src_addr + j) =
@@ -197,9 +208,11 @@ rss_l3_l4(struct __sk_buff *skb)
                              *(src_dst_port + 3));
 
                __u8 input_len = sizeof(v6_tuple) / sizeof(__u32);
-               if (rsskey->hash_fields & (1 << HASH_FIELD_IPV6_L3))
+
+               if ((rsskey->hash_fields & (1 << HASH_FIELD_IPV6_L3)) ||
+                   !(*nexthdr == IPPROTO_TCP || *nexthdr == IPPROTO_UDP))
                        input_len--;
-               hash = rte_softrss_be((__u32 *)&v6_tuple, key, 9);
+               hash = rte_softrss_be((__u32 *)&v6_tuple, key, input_len);
        } else {
                return TC_ACT_PIPE;
        }

Reply via email to