Hi Kumara, The v3 patch is not complete and it seems to be a patch based on v2. In addition, did you test the code for tcp4 and tcp6 after your change?
For others, please see replies inline. Thanks, Jiayu > -----Original Message----- > From: Kumara Parameshwaran <kumaraparames...@gmail.com> > Sent: Friday, June 2, 2023 2:34 PM > To: Hu, Jiayu <jiayu...@intel.com> > Cc: dev@dpdk.org; Kumara Parameshwaran > <kumaraparames...@gmail.com> > Subject: [PATCH v3] gro : ipv6 changes to support GRO for TCP/ipv6 > > The patch adds GRO support for TCP/ipv6 packets. This does not include the > support for vxlan, udp ipv6 packets. > > Signed-off-by: Kumara Parameshwaran <kumaraparames...@gmail.com> > --- > v1: > * Changes to support GRO for TCP/ipv6 packets. This does not > include > vxlan changes. > * The GRO is performed only for ipv6 packets that does not contain > extension headers. > * The logic for the TCP coalescing remains the same, in ipv6 header > the source address, destination address, flow label, version fields > are expected to be the same. > * Re-organised the code to reuse certain tcp functions for both ipv4 > and > ipv6 flows. > v2: > * Fix comments in gro_tcp6.h header file. > > v3: > * Adderess review comments to fix code duplication for v4 and v6 > > lib/gro/gro_tcp.c | 160 ++++++++++++++++++++++++ > lib/gro/gro_tcp.h | 63 ++++++++++ > lib/gro/gro_tcp4.c | 255 ++++++++++++--------------------------- > lib/gro/gro_tcp4.h | 18 +-- > lib/gro/gro_tcp6.c | 243 ++++++++++--------------------------- > lib/gro/gro_tcp6.h | 31 +++-- > lib/gro/gro_vxlan_tcp4.c | 18 +-- > lib/gro/meson.build | 1 + > 8 files changed, 396 insertions(+), 393 deletions(-) create mode 100644 > lib/gro/gro_tcp.c > > diff --git a/lib/gro/gro_tcp.c b/lib/gro/gro_tcp.c new file mode 100644 index > 0000000000..6a5aaada58 > --- /dev/null > +++ b/lib/gro/gro_tcp.c > @@ -0,0 +1,160 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2017 Intel Corporation > + */ > +#include <rte_malloc.h> > +#include <rte_mbuf.h> > +#include <rte_ethdev.h> > + > +#include "gro_tcp.h" > + > +static inline uint32_t > +find_an_empty_item(struct gro_tcp_item *items, > + uint32_t table_size) > +{ > + uint32_t i; > + > + for (i = 0; i < table_size; i++) > + if (items[i].firstseg == NULL) > + return i; > + return INVALID_ARRAY_INDEX; > +} > + > +uint32_t > +insert_new_tcp_item(struct rte_mbuf *pkt, > + struct gro_tcp_item *items, > + uint32_t *item_num, > + uint32_t table_size, > + uint64_t start_time, > + uint32_t prev_idx, > + uint32_t sent_seq, > + uint16_t ip_id, > + uint8_t is_atomic) > +{ > + uint32_t item_idx; > + > + item_idx = find_an_empty_item(items, table_size); > + if (item_idx == INVALID_ARRAY_INDEX) > + return INVALID_ARRAY_INDEX; > + > + items[item_idx].firstseg = pkt; > + items[item_idx].lastseg = rte_pktmbuf_lastseg(pkt); > + items[item_idx].start_time = start_time; > + items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX; > + items[item_idx].sent_seq = sent_seq; > + items[item_idx].ip_id = ip_id; > + items[item_idx].nb_merged = 1; > + items[item_idx].is_atomic = is_atomic; > + (*item_num) += 1; > + > + /* if the previous packet exists, chain them together. */ > + if (prev_idx != INVALID_ARRAY_INDEX) { > + items[item_idx].next_pkt_idx = > + items[prev_idx].next_pkt_idx; > + items[prev_idx].next_pkt_idx = item_idx; > + } > + > + return item_idx; > +} > + > +uint32_t > +delete_tcp_item(struct gro_tcp_item *items, uint32_t item_idx, > + uint32_t *item_num, > + uint32_t prev_item_idx) > +{ > + uint32_t next_idx = items[item_idx].next_pkt_idx; > + > + /* NULL indicates an empty item */ > + items[item_idx].firstseg = NULL; > + (*item_num) -= 1; > + if (prev_item_idx != INVALID_ARRAY_INDEX) > + items[prev_item_idx].next_pkt_idx = next_idx; > + > + return next_idx; > +} > + > +int32_t > +gro_tcp_reassemble(struct rte_mbuf *pkt, > + void *tbl, > + void *key, > + int32_t tcp_dl, > + struct gro_tcp_flow_ops *ops, > + struct gro_tcp_item *items, > + uint32_t *item_num, > + uint32_t table_size, > + uint16_t ip_id, > + uint8_t is_atomic, > + uint64_t start_time) In general, TCP4 and TCP6 share struct gro_tcp_item and have private flow structures, i.e., struct gro_tcp4/6_flow, and I like this abstraction. IMO, the code processing struct gro_tcp_item should be implemented as common functions shared by gro_tcp4.c and gro_tcp6.c. The code processing struct gro_tcp4/6_flow is tcp4 and tcp6 dependent and no need to abstract and share. In gro_tcp_reassemble(), it uses callback functions defined in struct gro_tcp_flow_ops to provide the different operations on struct gro_tcp4/6_flow. I don't think it's necessary for abstraction purpose as gro_tcp4/6_flows_ops implementations are alike and it also introduces extra cost on function calls. The gro_tcp_reassemble() has two parts: the common part to process struct gro_tcp_item and the private part to process struct gro_tcp4/6_flow. I think a better way is to remove gro_tcp_reassemble() and struct gro_tcp_flow_ops, and implement the common part as an internal function in gro_tcp.c/gro_tcp.h, and make both gro_tcp4/6_reassemble() implement own private part and invoke the common function to process struct gro_tcp_item. With this change, there is no callback cost anymore and tcp4/6 can share the common function. > +{ > + uint32_t item_idx; > + uint32_t cur_idx; > + uint32_t prev_idx; > + struct rte_tcp_hdr *tcp_hdr; > + int cmp; > + uint32_t sent_seq; > + > + tcp_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_tcp_hdr *, pkt- > >l2_len + pkt->l3_len); > + /* > + * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE > + * or CWR set. > + */ > + if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG) > + return -1; > + sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq); > + > + ops->tcp_flow_key_init(key, tcp_hdr); > + > + item_idx = ops->tcp_flow_lookup(tbl, key); > + if (item_idx == INVALID_ARRAY_INDEX) { > + item_idx = insert_new_tcp_item(pkt, items, item_num, > table_size, start_time, > + > INVALID_ARRAY_INDEX, sent_seq, ip_id, > + is_atomic); > + if (item_idx == INVALID_ARRAY_INDEX) > + return -1; > + if (ops->tcp_flow_insert(tbl, key, item_idx) == > + INVALID_ARRAY_INDEX) { > + /* > + * Fail to insert a new flow, so delete the > + * stored packet. > + */ > + delete_tcp_item(items, item_idx, item_num, > INVALID_ARRAY_INDEX); > + return -1; > + } > + return 0; > + } > + /* > + * Check all packets in the flow and try to find a neighbor for > + * the input packet. > + */ > + cur_idx = item_idx; > + prev_idx = cur_idx; > + do { > + cmp = check_seq_option(&items[cur_idx], tcp_hdr, > + sent_seq, ip_id, pkt->l4_len, tcp_dl, 0, > + is_atomic); > + if (cmp) { > + if (merge_two_tcp_packets(&items[cur_idx], > + pkt, cmp, sent_seq, ip_id, 0)) > + return 1; > + /* > + * Fail to merge the two packets, as the packet > + * length is greater than the max value. Store > + * the packet into the flow. > + */ > + if (insert_new_tcp_item(pkt, items, item_num, > table_size, start_time, cur_idx, > + sent_seq, ip_id, is_atomic) == > + INVALID_ARRAY_INDEX) > + return -1; > + return 0; > + } > + prev_idx = cur_idx; > + cur_idx = items[cur_idx].next_pkt_idx; > + } while (cur_idx != INVALID_ARRAY_INDEX); > + > + /* Fail to find a neighbor, so store the packet into the flow. */ > + if (insert_new_tcp_item(pkt, items, item_num, table_size, start_time, > prev_idx, sent_seq, > + ip_id, is_atomic) == INVALID_ARRAY_INDEX) > + return -1; > + > + return 0; > + > +} > diff --git a/lib/gro/gro_tcp.h b/lib/gro/gro_tcp.h index > c5d248a022..202f485c18 100644 > --- a/lib/gro/gro_tcp.h > +++ b/lib/gro/gro_tcp.h > @@ -1,6 +1,8 @@ > #ifndef _GRO_TCP_H_ > #define _GRO_TCP_H_ > > +#define INVALID_ARRAY_INDEX 0xffffffffUL > + > #include <rte_tcp.h> > > /* > @@ -14,6 +16,31 @@ > #define INVALID_TCP_HDRLEN(len) \ > (((len) < sizeof(struct rte_tcp_hdr)) || ((len) > MAX_TCP_HLEN)) > > +struct gro_tcp_flow { > + struct rte_ether_addr eth_saddr; > + struct rte_ether_addr eth_daddr; > + uint32_t recv_ack; > + uint16_t src_port; > + uint16_t dst_port; > +}; > + > +#define ASSIGN_TCP_FLOW_KEY(k1, k2) \ > + rte_ether_addr_copy(&(k1->eth_saddr), &(k2->eth_saddr)); \ > + rte_ether_addr_copy(&(k1->eth_daddr), &(k2->eth_daddr)); \ > + k2->recv_ack = k1->recv_ack; \ > + k2->src_port = k1->src_port; \ > + k2->dst_port = k1->dst_port; For multiline macro, it's better to use do{...}while(0) to avoid unexpected errors in the future. > + > +typedef uint32_t (*gro_tcp_flow_lookup)(void *table, void *key); > +typedef uint32_t (*gro_tcp_flow_insert)(void *table, void *key, > +uint32_t item_idx); typedef void (*gro_tcp_flow_key_init)(void *key, > +struct rte_tcp_hdr *tcp_hdr); > + > +struct gro_tcp_flow_ops { > + gro_tcp_flow_lookup tcp_flow_lookup; > + gro_tcp_flow_insert tcp_flow_insert; > + gro_tcp_flow_key_init tcp_flow_key_init; }; > + > struct gro_tcp_item { > /* > * The first MBUF segment of the packet. If the value @@ -44,6 > +71,36 @@ struct gro_tcp_item { > uint8_t is_atomic; > }; > > +uint32_t > +insert_new_tcp_item(struct rte_mbuf *pkt, > + struct gro_tcp_item *items, > + uint32_t *item_num, > + uint32_t table_size, > + uint64_t start_time, > + uint32_t prev_idx, > + uint32_t sent_seq, > + uint16_t ip_id, > + uint8_t is_atomic); > + > +uint32_t > +delete_tcp_item(struct gro_tcp_item *items, > + uint32_t item_idx, > + uint32_t *item_num, > + uint32_t prev_item_idx); > + > +int32_t > +gro_tcp_reassemble(struct rte_mbuf *pkt, > + void *tbl, > + void *key, > + int32_t tcp_dl, > + struct gro_tcp_flow_ops *ops, > + struct gro_tcp_item *items, > + uint32_t *item_num, > + uint32_t table_size, > + uint16_t ip_id, > + uint8_t is_atomic, > + uint64_t start_time); > + > /* > * Merge two TCP packets without updating checksums. > * If cmp is larger than 0, append the new packet to the @@ -152,4 +209,10 > @@ check_seq_option(struct gro_tcp_item *item, > return 0; > } > > +static inline int > +is_same_tcp_flow(struct gro_tcp_flow *k1, struct gro_tcp_flow *k2) { > + return (!memcmp(k1, k2, sizeof(struct gro_tcp_flow))); } > + > #endif