restrict pointer aliasing to optimize the code generated. The patch showed ~3% performnace uplift on Arm N1SDP platform, and no degradation on ThunderX2. The tet case is RFC2544 zero-loss L2 forwarding running testpmd.
[1] https://gcc.gnu.org/onlinedocs/gcc-4.8.5/gcc/Restricted-Pointers.html Signed-off-by: Gavin Hu <gavin...@arm.com> Reviewed-by: Steve Capper <steve.cap...@arm.com> --- drivers/net/i40e/i40e_rxtx_vec_neon.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c b/drivers/net/i40e/i40e_rxtx_vec_neon.c index 4376d8911..405bd82df 100644 --- a/drivers/net/i40e/i40e_rxtx_vec_neon.c +++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c @@ -172,8 +172,8 @@ desc_to_olflags_v(struct i40e_rx_queue *rxq, uint64x2_t descs[4], #define I40E_UINT16_BIT (CHAR_BIT * sizeof(uint16_t)) static inline void -desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **rx_pkts, - uint32_t *ptype_tbl) +desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **restrict rx_pkts, + uint32_t *restrict ptype_tbl) { int i; uint8_t ptype; @@ -194,8 +194,8 @@ desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **rx_pkts, * numbers of DD bits */ static inline uint16_t -_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts, uint8_t *split_packet) +_recv_raw_pkts_vec(struct i40e_rx_queue *restrict rxq, struct rte_mbuf + **restrict rx_pkts, uint16_t nb_pkts, uint8_t *split_packet) { volatile union i40e_rx_desc *rxdp; struct i40e_rx_entry *sw_ring; @@ -432,7 +432,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, * numbers of DD bits */ uint16_t -i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, +i40e_recv_pkts_vec(void *restrict rx_queue, struct rte_mbuf **restrict rx_pkts, uint16_t nb_pkts) { return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL); @@ -494,8 +494,8 @@ vtx1(volatile struct i40e_tx_desc *txdp, } static inline void -vtx(volatile struct i40e_tx_desc *txdp, - struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags) +vtx(volatile struct i40e_tx_desc *txdp, struct rte_mbuf **pkt, + uint16_t nb_pkts, uint64_t flags) { int i; @@ -504,8 +504,8 @@ vtx(volatile struct i40e_tx_desc *txdp, } uint16_t -i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t nb_pkts) +i40e_xmit_fixed_burst_vec(void *restrict tx_queue, + struct rte_mbuf **restrict tx_pkts, uint16_t nb_pkts) { struct i40e_tx_queue *txq = (struct i40e_tx_queue *)tx_queue; volatile struct i40e_tx_desc *txdp; -- 2.17.1