Hi, > -----Original Message----- > From: Tao, Zhe > Sent: Friday, October 30, 2015 9:02 PM > To: dev at dpdk.org > Cc: Tao, Zhe; Liang, Cunming > Subject: [dpdk-dev][PATCH 1/4 v3] add vector PMD RX for FVL > > The vPMD RX function uses the multi-buffer and SSE instructions to > accelerate the RX speed, but now the pktype cannot be supported by the vPMD > RX, > because it will decrease the performance heavily. > > Signed-off-by: Zhe Tao <zhe.tao at intel.com> > --- > config/common_bsdapp | 2 + > config/common_linuxapp | 2 + > drivers/net/i40e/Makefile | 1 + > drivers/net/i40e/i40e_rxtx.c | 28 ++- > drivers/net/i40e/i40e_rxtx.h | 28 ++- > drivers/net/i40e/i40e_rxtx_vec.c | 484 > +++++++++++++++++++++++++++++++++++++++ > 6 files changed, 540 insertions(+), 5 deletions(-) > create mode 100644 drivers/net/i40e/i40e_rxtx_vec.c >
[...] > +void __attribute__((weak)) > +i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue __rte_unused*rxq) > +{ > + return; > +} > diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h > index 4385142..961a415 100644 > --- a/drivers/net/i40e/i40e_rxtx.h > +++ b/drivers/net/i40e/i40e_rxtx.h > @@ -44,13 +44,27 @@ > #define I40E_TX_FLAG_INSERT_VLAN ((uint32_t)(1 << 1)) > #define I40E_TX_FLAG_TSYN ((uint32_t)(1 << 2)) > > -#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC > #define RTE_PMD_I40E_RX_MAX_BURST 32 > -#endif > +#define RTE_PMD_I40E_TX_MAX_BURST 32 > + > +#define RTE_I40E_VPMD_RX_BURST 32 > +#define RTE_I40E_VPMD_TX_BURST 32 > +#define RTE_I40E_RXQ_REARM_THRESH 32 > +#define RTE_I40E_MAX_RX_BURST RTE_I40E_RXQ_REARM_THRESH > +#define RTE_I40E_TX_MAX_FREE_BUF_SZ 64 > +#define RTE_I40E_DESCS_PER_LOOP 4 > > #define I40E_RXBUF_SZ_1024 1024 > #define I40E_RXBUF_SZ_2048 2048 > > +#undef container_of > +#define container_of(ptr, type, member) ({ \ > + typeof(((type *)0)->member)(*__mptr) = (ptr); \ > + (type *)((char *)__mptr - offsetof(type, member)); }) > + > +#define I40E_TD_CMD (I40E_TX_DESC_CMD_ICRC |\ > + I40E_TX_DESC_CMD_EOP) > + > enum i40e_header_split_mode { > i40e_header_split_none = 0, > i40e_header_split_enabled = 1, > @@ -100,6 +114,11 @@ struct i40e_rx_queue { > struct rte_mbuf fake_mbuf; /**< dummy mbuf */ > struct rte_mbuf *rx_stage[RTE_PMD_I40E_RX_MAX_BURST * 2]; > #endif > + > + uint16_t rxrearm_nb; /**< number of remaining to be re-armed */ > + uint16_t rxrearm_start; /**< the idx we start the re-arming from */ > + uint64_t mbuf_initializer; /**< value to init mbufs */ > + > uint8_t port_id; /**< device port ID */ > uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise */ > uint16_t queue_id; /**< RX queue index */ > @@ -210,4 +229,9 @@ uint32_t i40e_dev_rx_queue_count(struct rte_eth_dev > *dev, > uint16_t rx_queue_id); > int i40e_dev_rx_descriptor_done(void *rx_queue, uint16_t offset); > > +uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, > + uint16_t nb_pkts); > +int i40e_rxq_vec_setup(struct i40e_rx_queue *rxq); > +void i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq); > + > #endif /* _I40E_RXTX_H_ */ > diff --git a/drivers/net/i40e/i40e_rxtx_vec.c > b/drivers/net/i40e/i40e_rxtx_vec.c > new file mode 100644 > index 0000000..a95916b > --- /dev/null > +++ b/drivers/net/i40e/i40e_rxtx_vec.c > @@ -0,0 +1,484 @@ [...] > + > +#include <tmmintrin.h> > + > +#ifndef __INTEL_COMPILER > +#pragma GCC diagnostic ignored "-Wcast-qual" > +#endif > + > +static inline void > +i40e_rxq_rearm(struct i40e_rx_queue *rxq) > +{ > + int i; > + uint16_t rx_id; > + Tiny typo, not necessary to reserve a blank line between these two definition. > + volatile union i40e_rx_desc *rxdp; > + struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; > + struct rte_mbuf *mb0, *mb1; > + __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM, > + RTE_PKTMBUF_HEADROOM); > + __m128i dma_addr0, dma_addr1; > + [...] > + > + /* vPMD receive routine, now only accept (nb_pkts == > RTE_I40E_VPMD_RX_BURST) > + * in one loop > + * > + * Notice: > + * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet > + * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan > RTE_I40E_VPMD_RX_BURST > + * numbers of DD bits > + Remove the blank line. > + */ > +static inline uint16_t > +_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, > + uint16_t nb_pkts, uint8_t *split_packet) > +{ [...]