From: Chengwen Feng <fengcheng...@huawei.com> Currently, the L3L4P/L3E/L4E/OL3E/OL4E fields in Rx descriptor used to indicate hardware checksum result: 1. L3L4P: indicates hardware has processed L3L4 checksum for this packet, if this bit is 1 then L3E/L4E/OL3E/OL4E is trustable. 2. L3E: L3 checksum error indication, 1 means with error. 3. L4E: L4 checksum error indication, 1 means with error. 4. OL3E: outer L3 checksum error indication, 1 means with error. 5. OL4E: outer L4 checksum error indication, 1 means with error.
Driver will set the good checksum flag through packet type and L3E/L4E/OL3E/OL4E when L3L4P is 1, it runs as follows: 1. If packet type indicates it's tunnel packet: 1.1. If packet type indicates it has inner L3 and L3E is zero, then mark the IP checksum good. 1.2. If packet type indicates it has inner L4 and L4E is zero, then mark the L4 checksum good. 1.3. If packet type indicates it has outer L4 and OL4E is zero, then mark the outer L4 checksum good. 2. If packet type indicates it's not tunnel packet: 2.1. If packet type indicates it has L3 and L3E is zero, then mark the IP checksum good. 2.2. If packet type indicates it has L4 and L4E is zero, then mark the L4 checksum good. As described above, the good checksum calculation is time consuming, it impacts the Rx performance. By balancing performance and functionality, driver uses the following scheme to set good checksum flag when L3L4P is 1: 1. If L3E is zero, then mark the IP checksum good. 2. If L4E is zero, then mark the L4 checksum good. The performance gains are 3% in small packet iofwd scenarios. Signed-off-by: Chengwen Feng <fengcheng...@huawei.com> Signed-off-by: Min Hu (Connor) <humi...@huawei.com> --- drivers/net/hns3/hns3_rxtx.c | 14 +---- drivers/net/hns3/hns3_rxtx.h | 103 ++++++++++++++-------------------- drivers/net/hns3/hns3_rxtx_vec_neon.h | 7 +-- drivers/net/hns3/hns3_rxtx_vec_sve.c | 6 +- 4 files changed, 45 insertions(+), 85 deletions(-) diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c index bec7fae..c29c0cf 100644 --- a/drivers/net/hns3/hns3_rxtx.c +++ b/drivers/net/hns3/hns3_rxtx.c @@ -2400,7 +2400,6 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) struct rte_mbuf *nmb; /* pointer of the new mbuf */ struct rte_mbuf *rxm; uint32_t bd_base_info; - uint32_t cksum_err; uint32_t l234_info; uint32_t ol_info; uint64_t dma_addr; @@ -2475,8 +2474,7 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) /* Load remained descriptor data and extract necessary fields */ l234_info = rte_le_to_cpu_32(rxd.rx.l234_info); ol_info = rte_le_to_cpu_32(rxd.rx.ol_info); - ret = hns3_handle_bdinfo(rxq, rxm, bd_base_info, - l234_info, &cksum_err); + ret = hns3_handle_bdinfo(rxq, rxm, bd_base_info, l234_info); if (unlikely(ret)) goto pkt_err; @@ -2485,9 +2483,6 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) if (rxm->packet_type == RTE_PTYPE_L2_ETHER_TIMESYNC) rxm->ol_flags |= PKT_RX_IEEE1588_PTP; - if (likely(bd_base_info & BIT(HNS3_RXD_L3L4P_B))) - hns3_rx_set_cksum_flag(rxm, rxm->packet_type, - cksum_err); hns3_rxd_to_vlan_tci(rxq, rxm, l234_info, &rxd); /* Increment bytes counter */ @@ -2526,7 +2521,6 @@ hns3_recv_scattered_pkts(void *rx_queue, struct rte_mbuf *rxm; struct rte_eth_dev *dev; uint32_t bd_base_info; - uint32_t cksum_err; uint32_t l234_info; uint32_t gro_size; uint32_t ol_info; @@ -2700,17 +2694,13 @@ hns3_recv_scattered_pkts(void *rx_queue, l234_info = rte_le_to_cpu_32(rxd.rx.l234_info); ol_info = rte_le_to_cpu_32(rxd.rx.ol_info); ret = hns3_handle_bdinfo(rxq, first_seg, bd_base_info, - l234_info, &cksum_err); + l234_info); if (unlikely(ret)) goto pkt_err; first_seg->packet_type = hns3_rx_calc_ptype(rxq, l234_info, ol_info); - if (bd_base_info & BIT(HNS3_RXD_L3L4P_B)) - hns3_rx_set_cksum_flag(first_seg, - first_seg->packet_type, - cksum_err); hns3_rxd_to_vlan_tci(rxq, first_seg, l234_info, &rxd); /* Increment bytes counter */ diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h index 10a6c64..9dfae61 100644 --- a/drivers/net/hns3/hns3_rxtx.h +++ b/drivers/net/hns3/hns3_rxtx.h @@ -541,19 +541,50 @@ enum hns3_cksum_status { extern uint64_t hns3_timestamp_rx_dynflag; extern int hns3_timestamp_dynfield_offset; -static inline int -hns3_handle_bdinfo(struct hns3_rx_queue *rxq, struct rte_mbuf *rxm, - uint32_t bd_base_info, uint32_t l234_info, - uint32_t *cksum_err) +static inline void +hns3_rx_set_cksum_flag(struct hns3_rx_queue *rxq, + struct rte_mbuf *rxm, + uint32_t l234_info) { -#define L2E_TRUNC_ERR_FLAG (BIT(HNS3_RXD_L2E_B) | \ - BIT(HNS3_RXD_TRUNCATE_B)) -#define CHECKSUM_ERR_FLAG (BIT(HNS3_RXD_L3E_B) | \ +#define HNS3_RXD_CKSUM_ERR_MASK (BIT(HNS3_RXD_L3E_B) | \ BIT(HNS3_RXD_L4E_B) | \ BIT(HNS3_RXD_OL3E_B) | \ BIT(HNS3_RXD_OL4E_B)) - uint32_t tmp = 0; + if (likely((l234_info & HNS3_RXD_CKSUM_ERR_MASK) == 0)) { + rxm->ol_flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD); + return; + } + + if (unlikely(l234_info & BIT(HNS3_RXD_L3E_B))) { + rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD; + rxq->dfx_stats.l3_csum_errors++; + } else { + rxm->ol_flags |= PKT_RX_IP_CKSUM_GOOD; + } + + if (unlikely(l234_info & BIT(HNS3_RXD_L4E_B))) { + rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD; + rxq->dfx_stats.l4_csum_errors++; + } else { + rxm->ol_flags |= PKT_RX_L4_CKSUM_GOOD; + } + + if (unlikely(l234_info & BIT(HNS3_RXD_OL3E_B))) + rxq->dfx_stats.ol3_csum_errors++; + + if (unlikely(l234_info & BIT(HNS3_RXD_OL4E_B))) { + rxm->ol_flags |= PKT_RX_OUTER_L4_CKSUM_BAD; + rxq->dfx_stats.ol4_csum_errors++; + } +} + +static inline int +hns3_handle_bdinfo(struct hns3_rx_queue *rxq, struct rte_mbuf *rxm, + uint32_t bd_base_info, uint32_t l234_info) +{ +#define L2E_TRUNC_ERR_FLAG (BIT(HNS3_RXD_L2E_B) | \ + BIT(HNS3_RXD_TRUNCATE_B)) /* * If packet len bigger than mtu when recv with no-scattered algorithm, @@ -572,64 +603,12 @@ hns3_handle_bdinfo(struct hns3_rx_queue *rxq, struct rte_mbuf *rxm, return -EINVAL; } - if (bd_base_info & BIT(HNS3_RXD_L3L4P_B)) { - if (likely((l234_info & CHECKSUM_ERR_FLAG) == 0)) { - *cksum_err = 0; - return 0; - } - - if (unlikely(l234_info & BIT(HNS3_RXD_L3E_B))) { - rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD; - rxq->dfx_stats.l3_csum_errors++; - tmp |= HNS3_L3_CKSUM_ERR; - } - - if (unlikely(l234_info & BIT(HNS3_RXD_L4E_B))) { - rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD; - rxq->dfx_stats.l4_csum_errors++; - tmp |= HNS3_L4_CKSUM_ERR; - } - - if (unlikely(l234_info & BIT(HNS3_RXD_OL3E_B))) { - rxq->dfx_stats.ol3_csum_errors++; - tmp |= HNS3_OUTER_L3_CKSUM_ERR; - } - - if (unlikely(l234_info & BIT(HNS3_RXD_OL4E_B))) { - rxm->ol_flags |= PKT_RX_OUTER_L4_CKSUM_BAD; - rxq->dfx_stats.ol4_csum_errors++; - tmp |= HNS3_OUTER_L4_CKSUM_ERR; - } - } - *cksum_err = tmp; + if (bd_base_info & BIT(HNS3_RXD_L3L4P_B)) + hns3_rx_set_cksum_flag(rxq, rxm, l234_info); return 0; } -static inline void -hns3_rx_set_cksum_flag(struct rte_mbuf *rxm, const uint64_t packet_type, - const uint32_t cksum_err) -{ - if (unlikely((packet_type & RTE_PTYPE_TUNNEL_MASK))) { - if (likely(packet_type & RTE_PTYPE_INNER_L3_MASK) && - (cksum_err & HNS3_L3_CKSUM_ERR) == 0) - rxm->ol_flags |= PKT_RX_IP_CKSUM_GOOD; - if (likely(packet_type & RTE_PTYPE_INNER_L4_MASK) && - (cksum_err & HNS3_L4_CKSUM_ERR) == 0) - rxm->ol_flags |= PKT_RX_L4_CKSUM_GOOD; - if (likely(packet_type & RTE_PTYPE_L4_MASK) && - (cksum_err & HNS3_OUTER_L4_CKSUM_ERR) == 0) - rxm->ol_flags |= PKT_RX_OUTER_L4_CKSUM_GOOD; - } else { - if (likely(packet_type & RTE_PTYPE_L3_MASK) && - (cksum_err & HNS3_L3_CKSUM_ERR) == 0) - rxm->ol_flags |= PKT_RX_IP_CKSUM_GOOD; - if (likely(packet_type & RTE_PTYPE_L4_MASK) && - (cksum_err & HNS3_L4_CKSUM_ERR) == 0) - rxm->ol_flags |= PKT_RX_L4_CKSUM_GOOD; - } -} - static inline uint32_t hns3_rx_calc_ptype(struct hns3_rx_queue *rxq, const uint32_t l234_info, const uint32_t ol_info) diff --git a/drivers/net/hns3/hns3_rxtx_vec_neon.h b/drivers/net/hns3/hns3_rxtx_vec_neon.h index 14d6fb0..a58bc3a 100644 --- a/drivers/net/hns3/hns3_rxtx_vec_neon.h +++ b/drivers/net/hns3/hns3_rxtx_vec_neon.h @@ -98,7 +98,6 @@ hns3_desc_parse_field(struct hns3_rx_queue *rxq, uint32_t l234_info, ol_info, bd_base_info; struct rte_mbuf *pkt; uint32_t retcode = 0; - uint32_t cksum_err; uint32_t i; int ret; @@ -111,17 +110,13 @@ hns3_desc_parse_field(struct hns3_rx_queue *rxq, l234_info = rxdp[i].rx.l234_info; ol_info = rxdp[i].rx.ol_info; bd_base_info = rxdp[i].rx.bd_base_info; - ret = hns3_handle_bdinfo(rxq, pkt, bd_base_info, - l234_info, &cksum_err); + ret = hns3_handle_bdinfo(rxq, pkt, bd_base_info, l234_info); if (unlikely(ret)) { retcode |= 1u << i; continue; } pkt->packet_type = hns3_rx_calc_ptype(rxq, l234_info, ol_info); - if (likely(bd_base_info & BIT(HNS3_RXD_L3L4P_B))) - hns3_rx_set_cksum_flag(pkt, pkt->packet_type, - cksum_err); /* Increment bytes counter */ rxq->basic_stats.bytes += pkt->pkt_len; diff --git a/drivers/net/hns3/hns3_rxtx_vec_sve.c b/drivers/net/hns3/hns3_rxtx_vec_sve.c index 2eaf692..f45aaad 100644 --- a/drivers/net/hns3/hns3_rxtx_vec_sve.c +++ b/drivers/net/hns3/hns3_rxtx_vec_sve.c @@ -39,7 +39,6 @@ hns3_desc_parse_field_sve(struct hns3_rx_queue *rxq, uint32_t bd_vld_num) { uint32_t retcode = 0; - uint32_t cksum_err; int ret, i; for (i = 0; i < (int)bd_vld_num; i++) { @@ -47,7 +46,7 @@ hns3_desc_parse_field_sve(struct hns3_rx_queue *rxq, rx_pkts[i]->ol_flags = PKT_RX_RSS_HASH; ret = hns3_handle_bdinfo(rxq, rx_pkts[i], key->bd_base_info[i], - key->l234_info[i], &cksum_err); + key->l234_info[i]); if (unlikely(ret)) { retcode |= 1u << i; continue; @@ -55,9 +54,6 @@ hns3_desc_parse_field_sve(struct hns3_rx_queue *rxq, rx_pkts[i]->packet_type = hns3_rx_calc_ptype(rxq, key->l234_info[i], key->ol_info[i]); - if (likely(key->bd_base_info[i] & BIT(HNS3_RXD_L3L4P_B))) - hns3_rx_set_cksum_flag(rx_pkts[i], - rx_pkts[i]->packet_type, cksum_err); /* Increment bytes counter */ rxq->basic_stats.bytes += rx_pkts[i]->pkt_len; -- 2.7.4