At 2020-09-08 10:05:16, "Jiayu Hu" <jiayu...@intel.com> wrote: >Some comments are inline.
Thanks, replies inline. > >Thanks, >Jiayu > >On Fri, Sep 04, 2020 at 04:37:39PM +0800, yang_y...@163.com wrote: >> From: Yi Yang <yangy...@inspur.com> >> >> UDP GRO can help improve VM-to-VM UDP performance when >> VM is enabled UFO or GSO, GRO must be supported if GSO >> or UFO is enabled, otherwise, performance gain will be >> hurt. >> >> With this enabled in DPDK, OVS DPDK can leverage it >> to improve VM-to-VM UDP performance, this will make >> sure IP fragments will be reassembled once it is >> received from physical NIC. It is very helpful in OVS >> DPDK VLAN TSO case. >> >> Signed-off-by: Yi Yang <yangy...@inspur.com> >> --- >> lib/librte_gro/Makefile | 1 + >> lib/librte_gro/gro_udp4.c | 430 >> +++++++++++++++++++++++++++++++++++++++++++++ >> lib/librte_gro/gro_udp4.h | 281 +++++++++++++++++++++++++++++ >> lib/librte_gro/meson.build | 2 +- >> lib/librte_gro/rte_gro.c | 93 ++++++++-- >> lib/librte_gro/rte_gro.h | 5 +- >> 6 files changed, 796 insertions(+), 16 deletions(-) >> create mode 100644 lib/librte_gro/gro_udp4.c >> create mode 100644 lib/librte_gro/gro_udp4.h >> >> diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile >> index e848687..41ec29e 100644 >> --- a/lib/librte_gro/Makefile >> +++ b/lib/librte_gro/Makefile >> @@ -15,6 +15,7 @@ EXPORT_MAP := rte_gro_version.map >> # source files >> SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c >> SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_tcp4.c >> +SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_udp4.c >> SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_vxlan_tcp4.c >> >> # install this header file >> diff --git a/lib/librte_gro/gro_udp4.c b/lib/librte_gro/gro_udp4.c >> new file mode 100644 >> index 0000000..25584fd >> --- /dev/null >> +++ b/lib/librte_gro/gro_udp4.c >> @@ -0,0 +1,430 @@ >> +/* SPDX-License-Identifier: BSD-3-Clause >> + * Copyright(c) 2020 Inspur Corporation >> + */ >> + >> +#include <rte_malloc.h> >> +#include <rte_mbuf.h> >> +#include <rte_cycles.h> >> +#include <rte_ethdev.h> >> + >> +#include "gro_udp4.h" >> + >> +void * >> +gro_udp4_tbl_create(uint16_t socket_id, >> + uint16_t max_flow_num, >> + uint16_t max_item_per_flow) >> +{ >> + struct gro_udp4_tbl *tbl; >> + size_t size; >> + uint32_t entries_num, i; >> + >> + entries_num = max_flow_num * max_item_per_flow; >> + entries_num = RTE_MIN(entries_num, GRO_UDP4_TBL_MAX_ITEM_NUM); >> + >> + if (entries_num == 0) >> + return NULL; >> + >> + tbl = rte_zmalloc_socket(__func__, >> + sizeof(struct gro_udp4_tbl), >> + RTE_CACHE_LINE_SIZE, >> + socket_id); >> + if (tbl == NULL) >> + return NULL; >> + >> + size = sizeof(struct gro_udp4_item) * entries_num; >> + tbl->items = rte_zmalloc_socket(__func__, >> + size, >> + RTE_CACHE_LINE_SIZE, >> + socket_id); >> + if (tbl->items == NULL) { >> + rte_free(tbl); >> + return NULL; >> + } >> + tbl->max_item_num = entries_num; >> + >> + size = sizeof(struct gro_udp4_flow) * entries_num; >> + tbl->flows = rte_zmalloc_socket(__func__, >> + size, >> + RTE_CACHE_LINE_SIZE, >> + socket_id); >> + if (tbl->flows == NULL) { >> + rte_free(tbl->items); >> + rte_free(tbl); >> + return NULL; >> + } >> + /* INVALID_ARRAY_INDEX indicates an empty flow */ >> + for (i = 0; i < entries_num; i++) >> + tbl->flows[i].start_index = INVALID_ARRAY_INDEX; >> + tbl->max_flow_num = entries_num; >> + >> + return tbl; >> +} >> + >> +void >> +gro_udp4_tbl_destroy(void *tbl) >> +{ >> + struct gro_udp4_tbl *udp_tbl = tbl; >> + >> + if (udp_tbl) { >> + rte_free(udp_tbl->items); >> + rte_free(udp_tbl->flows); >> + } >> + rte_free(udp_tbl); >> +} >> + >> +static inline uint32_t >> +find_an_empty_item(struct gro_udp4_tbl *tbl) >> +{ >> + uint32_t i; >> + uint32_t max_item_num = tbl->max_item_num; >> + >> + for (i = 0; i < max_item_num; i++) >> + if (tbl->items[i].firstseg == NULL) >> + return i; >> + return INVALID_ARRAY_INDEX; >> +} >> + >> +static inline uint32_t >> +find_an_empty_flow(struct gro_udp4_tbl *tbl) >> +{ >> + uint32_t i; >> + uint32_t max_flow_num = tbl->max_flow_num; >> + >> + for (i = 0; i < max_flow_num; i++) >> + if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX) >> + return i; >> + return INVALID_ARRAY_INDEX; >> +} >> + >> +static inline uint32_t >> +insert_new_item(struct gro_udp4_tbl *tbl, >> + struct rte_mbuf *pkt, >> + uint64_t start_time, >> + uint32_t prev_idx, >> + uint16_t frag_offset, >> + uint8_t is_last_frag) >> +{ >> + uint32_t item_idx; >> + >> + item_idx = find_an_empty_item(tbl); >> + if (item_idx == INVALID_ARRAY_INDEX) >> + return INVALID_ARRAY_INDEX; >> + >> + tbl->items[item_idx].firstseg = pkt; >> + tbl->items[item_idx].lastseg = rte_pktmbuf_lastseg(pkt); >> + tbl->items[item_idx].start_time = start_time; >> + tbl->items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX; >> + tbl->items[item_idx].frag_offset = frag_offset; >> + tbl->items[item_idx].is_last_frag = is_last_frag; >> + tbl->items[item_idx].nb_merged = 1; >> + tbl->item_num++; >> + >> + /* if the previous packet exists, chain them together. */ >> + if (prev_idx != INVALID_ARRAY_INDEX) { >> + tbl->items[item_idx].next_pkt_idx = >> + tbl->items[prev_idx].next_pkt_idx; >> + tbl->items[prev_idx].next_pkt_idx = item_idx; >> + } >> + >> + return item_idx; >> +} >> + >> +static inline uint32_t >> +delete_item(struct gro_udp4_tbl *tbl, uint32_t item_idx, >> + uint32_t prev_item_idx) >> +{ >> + uint32_t next_idx = tbl->items[item_idx].next_pkt_idx; >> + >> + /* NULL indicates an empty item */ >> + tbl->items[item_idx].firstseg = NULL; >> + tbl->item_num--; >> + if (prev_item_idx != INVALID_ARRAY_INDEX) >> + tbl->items[prev_item_idx].next_pkt_idx = next_idx; >> + >> + return next_idx; >> +} >> + >> +static inline uint32_t >> +insert_new_flow(struct gro_udp4_tbl *tbl, >> + struct udp4_flow_key *src, >> + uint32_t item_idx) >> +{ >> + struct udp4_flow_key *dst; >> + uint32_t flow_idx; >> + >> + flow_idx = find_an_empty_flow(tbl); >> + if (unlikely(flow_idx == INVALID_ARRAY_INDEX)) >> + return INVALID_ARRAY_INDEX; >> + >> + dst = &(tbl->flows[flow_idx].key); >> + >> + rte_ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr)); >> + rte_ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr)); >> + dst->ip_src_addr = src->ip_src_addr; >> + dst->ip_dst_addr = src->ip_dst_addr; >> + dst->ip_id = src->ip_id; >> + dst->ip_id = src->ip_id; > >Duplicated code. Thanks, good catch, will remove it. > >> + >> + tbl->flows[flow_idx].start_index = item_idx; >> + tbl->flow_num++; >> + >> + return flow_idx; >> +} >> + >> +/* >> + * update the packet length for the flushed packet. >> + */ >> +static inline void >> +update_header(struct gro_udp4_item *item) >> +{ >> + struct rte_ipv4_hdr *ipv4_hdr; >> + struct rte_mbuf *pkt = item->firstseg; >> + uint16_t frag_offset; >> + >> + ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + >> + pkt->l2_len); >> + ipv4_hdr->total_length = rte_cpu_to_be_16(pkt->pkt_len - >> + pkt->l2_len); >> + >> + /* Clear MF bit if it is last fragment */ >> + if (item->is_last_frag) { >> + frag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset); >> + ipv4_hdr->fragment_offset = >> + rte_cpu_to_be_16(frag_offset & ~RTE_IPV4_HDR_MF_FLAG); >> + } >> +} >> + >> +int32_t >> +gro_udp4_reassemble(struct rte_mbuf *pkt, >> + struct gro_udp4_tbl *tbl, >> + uint64_t start_time) >> +{ >> + struct rte_ether_hdr *eth_hdr; >> + struct rte_ipv4_hdr *ipv4_hdr; >> + uint16_t ip_dl; >> + uint16_t ip_id, hdr_len; >> + uint16_t frag_offset = 0; >> + uint8_t is_last_frag; >> + >> + struct udp4_flow_key key; >> + uint32_t cur_idx, prev_idx, item_idx; >> + uint32_t i, max_flow_num, remaining_flow_num; >> + int cmp; >> + uint8_t find; >> + >> + eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); >> + ipv4_hdr = (struct rte_ipv4_hdr *)((char *)eth_hdr + pkt->l2_len); >> + hdr_len = pkt->l2_len + pkt->l3_len; >> + >> + /* >> + * Don't process non-fragment packet. >> + */ >> + if (!is_ipv4_fragment(ipv4_hdr)) >> + return -1; >> + >> + /* >> + * Don't process the packet whose payload length is less than or >> + * equal to 0. >> + */ >> + if (pkt->pkt_len - hdr_len <= 0) >> + return -1; > >If input packets are malicious, whose mbuf->pkt_len is smaller than >(mbuf->l2_len+mbuf->l3_len), the above check wotn't work correctly, >as its value is uint16_t, which is always positive. I tried the below code. $ cat a.c #include #include int main() { uint16_t a = 10; uint16_t b = 8; if ((b - a) < 0) { printf("%u < %u\n", b , a); } } $ gcc a.c $ ./a.out 8 < 10 $ It works, so I think it is ok, nervertheless, I'll change it to the below to avoid your concern. if (pkt->pkt_len <= hdr_len) > >> + >> + ip_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len; > >Same as above. I think "if (pkt->pkt_len <= hdr_len)" can ensure ip_dl will be positive, I can add one more condition check here if you want if (rte_be_to_cpu_16(ipv4_hdr->total_length) <= l3_len) return -1; But I think it is unnecessary here for a little bit performance as you're caring. > >> + ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id); >> + frag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset); >> + is_last_frag = ((frag_offset & RTE_IPV4_HDR_MF_FLAG) == 0) ? 1 : 0; >> + frag_offset = (uint16_t)(frag_offset & RTE_IPV4_HDR_OFFSET_MASK) << 3; >> + >> + rte_ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr)); >> + rte_ether_addr_copy(&(eth_hdr->d_addr), &(key.eth_daddr)); >> + key.ip_src_addr = ipv4_hdr->src_addr; >> + key.ip_dst_addr = ipv4_hdr->dst_addr; >> + key.ip_id = ip_id; >> + >> + /* Search for a matched flow. */ >> + max_flow_num = tbl->max_flow_num; >> + remaining_flow_num = tbl->flow_num; >> + find = 0; >> + for (i = 0; i < max_flow_num && remaining_flow_num; i++) { >> + if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) { >> + if (is_same_udp4_flow(tbl->flows[i].key, key)) { >> + find = 1; >> + break; >> + } >> + remaining_flow_num--; >> + } >> + } >> + >> + /* >> + * Fail to find a matched flow. Insert a new flow and store the >> + * packet into the flow. >> + */ >> + if (find == 0) { >> + item_idx = insert_new_item(tbl, pkt, start_time, >> + INVALID_ARRAY_INDEX, frag_offset, >> + is_last_frag); >> + if (item_idx == INVALID_ARRAY_INDEX) >> + return -1; > >Adding unlikely here could get better performance, IMO. No problem. > >> + if (insert_new_flow(tbl, &key, item_idx) == >> + INVALID_ARRAY_INDEX) { >> + /* >> + * Fail to insert a new flow, so delete the >> + * stored packet. >> + */ >> + delete_item(tbl, item_idx, INVALID_ARRAY_INDEX); >> + return -1; >> + } >> + return 0; >> + } >> + >> + /* >> + * Check all packets in the flow and try to find a neighbor for >> + * the input packet. >> + */ >> + cur_idx = tbl->flows[i].start_index; >> + prev_idx = cur_idx; >> + do { >> + cmp = udp_check_neighbor(&(tbl->items[cur_idx]), >> + frag_offset, ip_dl, 0); >> + if (cmp) { >> + if (merge_two_udp4_packets(&(tbl->items[cur_idx]), >> + pkt, cmp, frag_offset, >> + is_last_frag, 0)) >> + return 1; >> + /* >> + * Fail to merge the two packets, as the packet >> + * length is greater than the max value. Store >> + * the packet into the flow. >> + */ >> + if (insert_new_item(tbl, pkt, start_time, prev_idx, >> + frag_offset, is_last_frag) == >> + INVALID_ARRAY_INDEX) >> + return -1; >> + return 0; >> + } >> + >> + /* Ensure inserted items are ordered by frag_offset */ >> + if (frag_offset >> + < tbl->items[cur_idx].frag_offset) { >> + break; >> + } >> + >> + prev_idx = cur_idx; >> + cur_idx = tbl->items[cur_idx].next_pkt_idx; >> + } while (cur_idx != INVALID_ARRAY_INDEX); >> + >> + /* Fail to find a neighbor, so store the packet into the flow. */ >> + if (cur_idx == tbl->flows[i].start_index) { >> + /* Insert it before the first packet of the flow */ >> + item_idx = insert_new_item(tbl, pkt, start_time, >> + INVALID_ARRAY_INDEX, frag_offset, >> + is_last_frag); >> + if (item_idx == INVALID_ARRAY_INDEX) >> + return -1; >> + tbl->items[item_idx].next_pkt_idx = cur_idx; >> + tbl->flows[i].start_index = item_idx; >> + } else { >> + if (insert_new_item(tbl, pkt, start_time, prev_idx, >> + frag_offset, is_last_frag) >> + == INVALID_ARRAY_INDEX) >> + return -1; >> + } >> + >> + return 0; >> +} >> + >> +static int >> +gro_udp4_merge_items(struct gro_udp4_tbl *tbl, >> + uint32_t start_idx) >> +{ >> + uint16_t frag_offset; >> + uint8_t is_last_frag; >> + int16_t ip_dl; >> + struct rte_mbuf *pkt; >> + int cmp; >> + uint32_t item_idx; >> + uint16_t hdr_len; >> + >> + item_idx = tbl->items[start_idx].next_pkt_idx; >> + while (item_idx != INVALID_ARRAY_INDEX) { >> + pkt = tbl->items[item_idx].firstseg; >> + hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len + >> + pkt->l3_len; > >For non-tunnel packets, mbuf->outer_l3/l2_len are not guaranteed 0. >I think it's better to pass outer header length as a parameter of >gro_udp4_merge_item(), like udp_check_neighbor(). For udp GRO, we can remove outer_l3/l2_len, they are nonsense. > >> + ip_dl = pkt->pkt_len - hdr_len; >> + frag_offset = tbl->items[item_idx].frag_offset; >> + is_last_frag = tbl->items[item_idx].is_last_frag; >> + cmp = udp_check_neighbor(&(tbl->items[start_idx]), >> + frag_offset, ip_dl, 0); >> + if (cmp) { >> + if (merge_two_udp4_packets( >> + &(tbl->items[start_idx]), >> + pkt, cmp, frag_offset, >> + is_last_frag, 0)) { >> + item_idx = delete_item(tbl, item_idx, >> + INVALID_ARRAY_INDEX); >> + tbl->items[start_idx].next_pkt_idx >> + = item_idx; >> + } else { >> + return 0; >> + } >> + } else { >> + return 0; >> + } > >A single line doesn't need braces. Got it, will remove { } > >> + } >> + >> + return 0; >> +} >> + >> +uint16_t >> +gro_udp4_tbl_timeout_flush(struct gro_udp4_tbl *tbl, >> + uint64_t flush_timestamp, >> + struct rte_mbuf **out, >> + uint16_t nb_out) >> +{ >> + uint16_t k = 0; >> + uint32_t i, j; >> + uint32_t max_flow_num = tbl->max_flow_num; >> + >> + for (i = 0; i < max_flow_num; i++) { >> + if (unlikely(tbl->flow_num == 0)) >> + return k; >> + >> + j = tbl->flows[i].start_index; >> + while (j != INVALID_ARRAY_INDEX) { >> + if (tbl->items[j].start_time <= flush_timestamp) { >> + gro_udp4_merge_items(tbl, j); >> + out[k++] = tbl->items[j].firstseg; >> + if (tbl->items[j].nb_merged > 1) >> + update_header(&(tbl->items[j])); >> + /* >> + * Delete the packet and get the next >> + * packet in the flow. >> + */ >> + j = delete_item(tbl, j, INVALID_ARRAY_INDEX); >> + tbl->flows[i].start_index = j; >> + if (j == INVALID_ARRAY_INDEX) >> + tbl->flow_num--; >> + >> + if (unlikely(k == nb_out)) >> + return k; >> + } else >> + /* >> + * The left packets in this flow won't be >> + * timeout. Go to check other flows. >> + */ >> + break; >> + } >> + } >> + return k; >> +} >> + >> +uint32_t >> +gro_udp4_tbl_pkt_count(void *tbl) >> +{ >> + struct gro_udp4_tbl *gro_tbl = tbl; >> + >> + if (gro_tbl) >> + return gro_tbl->item_num; >> + >> + return 0; >> +} >> diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h >> new file mode 100644 >> index 0000000..bc67eb1 >> --- /dev/null >> +++ b/lib/librte_gro/gro_udp4.h >> @@ -0,0 +1,281 @@ >> +/* SPDX-License-Identifier: BSD-3-Clause >> + * Copyright(c) 2020 Inspur Corporation >> + */ >> + >> +#ifndef _GRO_UDP4_H_ >> +#define _GRO_UDP4_H_ >> + >> +#include <rte_ip.h> >> +#include <rte_udp.h> >> + >> +#define INVALID_ARRAY_INDEX 0xffffffffUL >> +#define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL) >> + >> +/* >> + * The max length of a IPv4 packet, which includes the length of the L3 >> + * header, the L4 header and the data payload. >> + */ >> +#define MAX_IPV4_PKT_LENGTH UINT16_MAX >> + >> +/* Header fields representing a UDP/IPv4 flow */ >> +struct udp4_flow_key { >> + struct rte_ether_addr eth_saddr; >> + struct rte_ether_addr eth_daddr; >> + uint32_t ip_src_addr; >> + uint32_t ip_dst_addr; >> + >> + /* IP fragment for UDP does not contain UDP header >> + * except the first one. But IP ID must be same. >> + */ >> + uint16_t ip_id; >> +}; >> + >> +struct gro_udp4_flow { >> + struct udp4_flow_key key; >> + /* >> + * The index of the first packet in the flow. >> + * INVALID_ARRAY_INDEX indicates an empty flow. >> + */ >> + uint32_t start_index; >> +}; >> + >> +struct gro_udp4_item { >> + /* >> + * The first MBUF segment of the packet. If the value >> + * is NULL, it means the item is empty. >> + */ >> + struct rte_mbuf *firstseg; >> + /* The last MBUF segment of the packet */ >> + struct rte_mbuf *lastseg; >> + /* >> + * The time when the first packet is inserted into the table. >> + * This value won't be updated, even if the packet is merged >> + * with other packets. >> + */ >> + uint64_t start_time; >> + /* >> + * next_pkt_idx is used to chain the packets that >> + * are in the same flow but can't be merged together >> + * (e.g. caused by packet reordering). >> + */ >> + uint32_t next_pkt_idx; >> + /* offset of IP fragment packet */ >> + uint16_t frag_offset; >> + /* is last IP fragment? */ >> + uint8_t is_last_frag; >> + /* the number of merged packets */ >> + uint16_t nb_merged; >> +}; >> + >> +/* >> + * UDP/IPv4 reassembly table structure. >> + */ >> +struct gro_udp4_tbl { >> + /* item array */ >> + struct gro_udp4_item *items; >> + /* flow array */ >> + struct gro_udp4_flow *flows; >> + /* current item number */ >> + uint32_t item_num; >> + /* current flow num */ >> + uint32_t flow_num; >> + /* item array size */ >> + uint32_t max_item_num; >> + /* flow array size */ >> + uint32_t max_flow_num; >> +}; >> + >> +/** >> + * This function creates a UDP/IPv4 reassembly table. >> + * >> + * @param socket_id >> + * Socket index for allocating the UDP/IPv4 reassemble table >> + * @param max_flow_num >> + * The maximum number of flows in the UDP/IPv4 GRO table >> + * @param max_item_per_flow >> + * The maximum number of packets per flow >> + * >> + * @return >> + * - Return the table pointer on success. >> + * - Return NULL on failure. >> + */ >> +void *gro_udp4_tbl_create(uint16_t socket_id, >> + uint16_t max_flow_num, >> + uint16_t max_item_per_flow); >> + >> +/** >> + * This function destroys a UDP/IPv4 reassembly table. >> + * >> + * @param tbl >> + * Pointer pointing to the UDP/IPv4 reassembly table. >> + */ >> +void gro_udp4_tbl_destroy(void *tbl); >> + >> +/** >> + * This function merges a UDP/IPv4 packet. >> + * >> + * This function does not check if the packet has correct checksums and >> + * does not re-calculate checksums for the merged packet. It returns the >> + * packet if it isn't UDP fragment or there is no available space in >> + * the table. >> + * >> + * @param pkt >> + * Packet to reassemble >> + * @param tbl >> + * Pointer pointing to the UDP/IPv4 reassembly table >> + * @start_time >> + * The time when the packet is inserted into the table >> + * >> + * @return >> + * - Return a positive value if the packet is merged. >> + * - Return zero if the packet isn't merged but stored in the table. >> + * - Return a negative value for invalid parameters or no available >> + * space in the table. >> + */ >> +int32_t gro_udp4_reassemble(struct rte_mbuf *pkt, >> + struct gro_udp4_tbl *tbl, >> + uint64_t start_time); >> + >> +/** >> + * This function flushes timeout packets in a UDP/IPv4 reassembly table, >> + * and without updating checksums. >> + * >> + * @param tbl >> + * UDP/IPv4 reassembly table pointer >> + * @param flush_timestamp >> + * Flush packets which are inserted into the table before or at the >> + * flush_timestamp. >> + * @param out >> + * Pointer array used to keep flushed packets >> + * @param nb_out >> + * The element number in 'out'. It also determines the maximum number of >> + * packets that can be flushed finally. >> + * >> + * @return >> + * The number of flushed packets >> + */ >> +uint16_t gro_udp4_tbl_timeout_flush(struct gro_udp4_tbl *tbl, >> + uint64_t flush_timestamp, >> + struct rte_mbuf **out, >> + uint16_t nb_out); >> + >> +/** >> + * This function returns the number of the packets in a UDP/IPv4 >> + * reassembly table. >> + * >> + * @param tbl >> + * UDP/IPv4 reassembly table pointer >> + * >> + * @return >> + * The number of packets in the table >> + */ >> +uint32_t gro_udp4_tbl_pkt_count(void *tbl); >> + >> +/* >> + * Check if two UDP/IPv4 packets belong to the same flow. >> + */ >> +static inline int >> +is_same_udp4_flow(struct udp4_flow_key k1, struct udp4_flow_key k2) >> +{ >> + return (rte_is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) && >> + rte_is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) && >> + (k1.ip_src_addr == k2.ip_src_addr) && >> + (k1.ip_dst_addr == k2.ip_dst_addr) && >> + (k1.ip_id == k2.ip_id)); >> +} >> + >> +/* >> + * Merge two UDP/IPv4 packets without updating checksums. >> + * If cmp is larger than 0, append the new packet to the >> + * original packet. Otherwise, pre-pend the new packet to >> + * the original packet. >> + */ >> +static inline int >> +merge_two_udp4_packets(struct gro_udp4_item *item, >> + struct rte_mbuf *pkt, >> + int cmp, >> + uint16_t frag_offset, >> + uint8_t is_last_frag, >> + uint16_t l2_offset) >> +{ >> + struct rte_mbuf *pkt_head, *pkt_tail, *lastseg; >> + uint16_t hdr_len, l2_len; >> + uint32_t ip_len; >> + >> + if (cmp > 0) { >> + pkt_head = item->firstseg; >> + pkt_tail = pkt; >> + } else { >> + pkt_head = pkt; >> + pkt_tail = item->firstseg; >> + } >> + >> + /* check if the IPv4 packet length is greater than the max value */ >> + hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len; >> + l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len; >> + ip_len = pkt_head->pkt_len - l2_len >> + + pkt_tail->pkt_len - hdr_len; >> + if (unlikely(ip_len > MAX_IPV4_PKT_LENGTH)) >> + return 0; >> + >> + /* remove the packet header for the tail packet */ >> + rte_pktmbuf_adj(pkt_tail, hdr_len); >> + >> + /* chain two packets together */ >> + if (cmp > 0) { >> + item->lastseg->next = pkt; >> + item->lastseg = rte_pktmbuf_lastseg(pkt); >> + } else { >> + lastseg = rte_pktmbuf_lastseg(pkt); >> + lastseg->next = item->firstseg; >> + item->firstseg = pkt; >> + item->frag_offset = frag_offset; >> + } >> + item->nb_merged++; >> + if (is_last_frag) >> + item->is_last_frag = is_last_frag; >> + >> + /* update MBUF metadata for the merged packet */ >> + pkt_head->nb_segs += pkt_tail->nb_segs; >> + pkt_head->pkt_len += pkt_tail->pkt_len; >> + >> + return 1; >> +} >> + >> +/* >> + * Check if two UDP/IPv4 packets are neighbors. >> + */ >> +static inline int >> +udp_check_neighbor(struct gro_udp4_item *item, >> + uint16_t frag_offset, >> + uint16_t ip_dl, >> + uint16_t l2_offset) > >It's better to rename the function as udp4_check_neighbor(), IMO. No problem > >> +{ >> + struct rte_mbuf *pkt_orig = item->firstseg; >> + uint16_t len; >> + >> + /* check if the two packets are neighbors */ >> + len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len - >> + pkt_orig->l3_len; >> + if (frag_offset == item->frag_offset + len) >> + /* append the new packet */ >> + return 1; >> + else if (frag_offset + ip_dl == item->frag_offset) >> + /* pre-pend the new packet */ >> + return -1; >> + >> + return 0; >> +} >> + >> +static inline int >> +is_ipv4_fragment(const struct rte_ipv4_hdr *hdr) >> +{ >> + uint16_t flag_offset, ip_flag, ip_ofs; >> + >> + flag_offset = rte_be_to_cpu_16(hdr->fragment_offset); >> + ip_ofs = (uint16_t)(flag_offset & RTE_IPV4_HDR_OFFSET_MASK); >> + ip_flag = (uint16_t)(flag_offset & RTE_IPV4_HDR_MF_FLAG); >> + >> + return ip_flag != 0 || ip_ofs != 0; >> +} >> +#endif >> diff --git a/lib/librte_gro/meson.build b/lib/librte_gro/meson.build >> index 501668c..0d18dc2 100644 >> --- a/lib/librte_gro/meson.build >> +++ b/lib/librte_gro/meson.build >> @@ -1,6 +1,6 @@ >> # SPDX-License-Identifier: BSD-3-Clause >> # Copyright(c) 2017 Intel Corporation >> >> -sources = files('rte_gro.c', 'gro_tcp4.c', 'gro_vxlan_tcp4.c') >> +sources = files('rte_gro.c', 'gro_tcp4.c', 'gro_udp4.c', 'gro_vxlan_tcp4.c') >> headers = files('rte_gro.h') >> deps += ['ethdev'] >> diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c >> index 6618f4d..d094129 100644 >> --- a/lib/librte_gro/rte_gro.c >> +++ b/lib/librte_gro/rte_gro.c >> @@ -9,6 +9,7 @@ >> >> #include "rte_gro.h" >> #include "gro_tcp4.h" >> +#include "gro_udp4.h" >> #include "gro_vxlan_tcp4.h" >> >> typedef void *(*gro_tbl_create_fn)(uint16_t socket_id, >> @@ -18,17 +19,23 @@ >> typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl); >> >> static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = { >> - gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create, NULL}; >> + gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create, >> + gro_udp4_tbl_create, NULL}; >> static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = { >> gro_tcp4_tbl_destroy, gro_vxlan_tcp4_tbl_destroy, >> + gro_udp4_tbl_destroy, >> NULL}; >> static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = { >> gro_tcp4_tbl_pkt_count, gro_vxlan_tcp4_tbl_pkt_count, >> + gro_udp4_tbl_pkt_count, >> NULL}; >> >> #define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \ >> ((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP)) >> >> +#define IS_IPV4_UDP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \ >> + ((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP)) >> + >> #define IS_IPV4_VXLAN_TCP4_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \ >> ((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \ >> ((ptype & RTE_PTYPE_TUNNEL_VXLAN) == \ >> @@ -40,6 +47,7 @@ >> RTE_PTYPE_INNER_L3_IPV4_EXT | \ >> RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN)) != 0)) >> >> + >> /* >> * GRO context structure. It keeps the table structures, which are >> * used to merge packets, for different GRO types. Before using >> @@ -123,20 +131,26 @@ struct gro_ctx { >> struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM]; >> struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} }; >> >> - /* Allocate a reassembly table for VXLAN GRO */ >> + /* allocate a reassembly table for UDP/IPv4 GRO */ >> + struct gro_udp4_tbl udp_tbl; >> + struct gro_udp4_flow udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM]; >> + struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} }; >> + >> + /* Allocate a reassembly table for VXLAN TCP GRO */ >> struct gro_vxlan_tcp4_tbl vxlan_tbl; >> struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM]; >> - struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM] = { >> - {{0}, 0, 0} }; >> + struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM] >> + = {{{0}, 0, 0} }; >> >> struct rte_mbuf *unprocess_pkts[nb_pkts]; >> uint32_t item_num; >> int32_t ret; >> uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts; >> - uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0; >> + uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0, do_udp4_gro = 0; >> >> if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 | >> - RTE_GRO_TCP_IPV4)) == 0)) >> + RTE_GRO_TCP_IPV4 | >> + RTE_GRO_UDP_IPV4)) == 0)) >> return nb_pkts; >> >> /* Get the maximum number of packets */ >> @@ -170,6 +184,20 @@ struct gro_ctx { >> do_tcp4_gro = 1; >> } >> >> + if (param->gro_types & RTE_GRO_UDP_IPV4) { >> + for (i = 0; i < item_num; i++) >> + udp_flows[i].start_index = INVALID_ARRAY_INDEX; >> + >> + udp_tbl.flows = udp_flows; >> + udp_tbl.items = udp_items; >> + udp_tbl.flow_num = 0; >> + udp_tbl.item_num = 0; >> + udp_tbl.max_flow_num = item_num; >> + udp_tbl.max_item_num = item_num; >> + do_udp4_gro = 1; >> + } >> + >> + >> for (i = 0; i < nb_pkts; i++) { >> /* >> * The timestamp is ignored, since all packets >> @@ -177,7 +205,8 @@ struct gro_ctx { >> */ >> if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) && >> do_vxlan_gro) { >> - ret = gro_vxlan_tcp4_reassemble(pkts[i], &vxlan_tbl, 0); >> + ret = gro_vxlan_tcp4_reassemble(pkts[i], >> + &vxlan_tbl, 0); >> if (ret > 0) >> /* Merge successfully */ >> nb_after_gro--; >> @@ -191,27 +220,43 @@ struct gro_ctx { >> nb_after_gro--; >> else if (ret < 0) >> unprocess_pkts[unprocess_num++] = pkts[i]; >> + } else if (IS_IPV4_UDP_PKT(pkts[i]->packet_type) && >> + do_udp4_gro) { >> + ret = gro_udp4_reassemble(pkts[i], &udp_tbl, 0); >> + if (ret > 0) >> + /* merge successfully */ >> + nb_after_gro--; >> + else if (ret < 0) >> + unprocess_pkts[unprocess_num++] = pkts[i]; >> } else >> unprocess_pkts[unprocess_num++] = pkts[i]; >> } >> >> - if (nb_after_gro < nb_pkts) { >> + if ((nb_after_gro < nb_pkts) >> + || (unprocess_num < nb_pkts)) { > >Why need to check unprocess_num here? In the case that packet isn't merged, nb_after_gro won't be changed, we only can use unprocess_num to check, it is possible to reassmble them successfully in flush phase. > >> i = 0; >> /* Flush all packets from the tables */ >> if (do_vxlan_gro) { >> i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl, >> 0, pkts, nb_pkts); >> } >> + >> if (do_tcp4_gro) { >> i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0, >> &pkts[i], nb_pkts - i); >> } >> + >> + if (do_udp4_gro) { >> + i += gro_udp4_tbl_timeout_flush(&udp_tbl, 0, >> + &pkts[i], nb_pkts - i); >> + } >> /* Copy unprocessed packets */ >> if (unprocess_num > 0) { >> memcpy(&pkts[i], unprocess_pkts, >> sizeof(struct rte_mbuf *) * >> unprocess_num); >> } >> + nb_after_gro = i + unprocess_num; >> } >> >> return nb_after_gro; >> 1.8.3.1