On Wed, May 18, 2016 at 06:02:08PM +0200, Olivier Matz wrote: > Some architectures (ex: Power8) have a cache line size of 128 bytes, > so the drivers should not expect that prefetching the second part of > the mbuf with rte_prefetch0(&m->cacheline1) is valid. > > This commit add helpers that can be used by drivers to prefetch the > rx or tx part of the mbuf, whatever the cache line size. > > Signed-off-by: Olivier Matz <olivier.matz at 6wind.com>
Reviewed-by: Jerin Jacob <jerin.jacob at caviumnetworks.com> > --- > > v1 -> v2: > - rename part0 as part1 and part1 as part2, as suggested by Thomas > > > drivers/net/fm10k/fm10k_rxtx_vec.c | 8 ++++---- > drivers/net/i40e/i40e_rxtx_vec.c | 8 ++++---- > drivers/net/ixgbe/ixgbe_rxtx_vec.c | 8 ++++---- > drivers/net/mlx4/mlx4.c | 4 ++-- > drivers/net/mlx5/mlx5_rxtx.c | 4 ++-- > examples/ipsec-secgw/ipsec-secgw.c | 2 +- > lib/librte_mbuf/rte_mbuf.h | 38 > ++++++++++++++++++++++++++++++++++++++ > 7 files changed, 55 insertions(+), 17 deletions(-) > > diff --git a/drivers/net/fm10k/fm10k_rxtx_vec.c > b/drivers/net/fm10k/fm10k_rxtx_vec.c > index 03e4a5c..ef256a5 100644 > --- a/drivers/net/fm10k/fm10k_rxtx_vec.c > +++ b/drivers/net/fm10k/fm10k_rxtx_vec.c > @@ -487,10 +487,10 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf > **rx_pkts, > rte_compiler_barrier(); > > if (split_packet) { > - rte_prefetch0(&rx_pkts[pos]->cacheline1); > - rte_prefetch0(&rx_pkts[pos + 1]->cacheline1); > - rte_prefetch0(&rx_pkts[pos + 2]->cacheline1); > - rte_prefetch0(&rx_pkts[pos + 3]->cacheline1); > + rte_mbuf_prefetch_part2(rx_pkts[pos]); > + rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); > + rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); > + rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); > } > > /* D.1 pkt 3,4 convert format from desc to pktmbuf */ > diff --git a/drivers/net/i40e/i40e_rxtx_vec.c > b/drivers/net/i40e/i40e_rxtx_vec.c > index f7a62a8..eef80d9 100644 > --- a/drivers/net/i40e/i40e_rxtx_vec.c > +++ b/drivers/net/i40e/i40e_rxtx_vec.c > @@ -297,10 +297,10 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct > rte_mbuf **rx_pkts, > _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2); > > if (split_packet) { > - rte_prefetch0(&rx_pkts[pos]->cacheline1); > - rte_prefetch0(&rx_pkts[pos + 1]->cacheline1); > - rte_prefetch0(&rx_pkts[pos + 2]->cacheline1); > - rte_prefetch0(&rx_pkts[pos + 3]->cacheline1); > + rte_mbuf_prefetch_part2(rx_pkts[pos]); > + rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); > + rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); > + rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); > } > > /* avoid compiler reorder optimization */ > diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c > b/drivers/net/ixgbe/ixgbe_rxtx_vec.c > index c4d709b..e97ea82 100644 > --- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c > +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c > @@ -307,10 +307,10 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct > rte_mbuf **rx_pkts, > _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2); > > if (split_packet) { > - rte_prefetch0(&rx_pkts[pos]->cacheline1); > - rte_prefetch0(&rx_pkts[pos + 1]->cacheline1); > - rte_prefetch0(&rx_pkts[pos + 2]->cacheline1); > - rte_prefetch0(&rx_pkts[pos + 3]->cacheline1); > + rte_mbuf_prefetch_part2(rx_pkts[pos]); > + rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); > + rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); > + rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); > } > > /* avoid compiler reorder optimization */ > diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c > index c5d8535..733d192 100644 > --- a/drivers/net/mlx4/mlx4.c > +++ b/drivers/net/mlx4/mlx4.c > @@ -3235,8 +3235,8 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, > uint16_t pkts_n) > * Fetch initial bytes of packet descriptor into a > * cacheline while allocating rep. > */ > - rte_prefetch0(seg); > - rte_prefetch0(&seg->cacheline1); > + rte_mbuf_prefetch_part1(seg); > + rte_mbuf_prefetch_part2(seg); > ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL, > &flags); > if (unlikely(ret < 0)) { > diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c > index 1832a21..5be8c62 100644 > --- a/drivers/net/mlx5/mlx5_rxtx.c > +++ b/drivers/net/mlx5/mlx5_rxtx.c > @@ -1086,8 +1086,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, > uint16_t pkts_n) > * Fetch initial bytes of packet descriptor into a > * cacheline while allocating rep. > */ > - rte_prefetch0(seg); > - rte_prefetch0(&seg->cacheline1); > + rte_mbuf_prefetch_part1(seg); > + rte_mbuf_prefetch_part2(seg); > ret = rxq->poll(rxq->cq, NULL, NULL, &flags, &vlan_tci); > if (unlikely(ret < 0)) { > struct ibv_wc wc; > diff --git a/examples/ipsec-secgw/ipsec-secgw.c > b/examples/ipsec-secgw/ipsec-secgw.c > index 1dc505c..ebd7c23 100644 > --- a/examples/ipsec-secgw/ipsec-secgw.c > +++ b/examples/ipsec-secgw/ipsec-secgw.c > @@ -298,7 +298,7 @@ prepare_tx_burst(struct rte_mbuf *pkts[], uint16_t > nb_pkts, uint8_t port) > const int32_t prefetch_offset = 2; > > for (i = 0; i < (nb_pkts - prefetch_offset); i++) { > - rte_prefetch0(pkts[i + prefetch_offset]->cacheline1); > + rte_mbuf_prefetch_part2(pkts[i + prefetch_offset]); > prepare_tx_pkt(pkts[i], port); > } > /* Process left packets */ > diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h > index 7b92b88..3ee8d66 100644 > --- a/lib/librte_mbuf/rte_mbuf.h > +++ b/lib/librte_mbuf/rte_mbuf.h > @@ -842,6 +842,44 @@ struct rte_mbuf { > uint16_t timesync; > } __rte_cache_aligned; > > +/** > + * Prefetch the first part of the mbuf > + * > + * The first 64 bytes of the mbuf corresponds to fields that are used early > + * in the receive path. If the cache line of the architecture is higher than > + * 64B, the second part will also be prefetched. > + * > + * @param m > + * The pointer to the mbuf. > + */ > +static inline void > +rte_mbuf_prefetch_part1(struct rte_mbuf *m) > +{ > + rte_prefetch0(&m->cacheline0); > +} > + > +/** > + * Prefetch the second part of the mbuf > + * > + * The next 64 bytes of the mbuf corresponds to fields that are used in the > + * transmit path. If the cache line of the architecture is higher than 64B, > + * this function does nothing as it is expected that the full mbuf is > + * already in cache. > + * > + * @param m > + * The pointer to the mbuf. > + */ > +static inline void > +rte_mbuf_prefetch_part2(struct rte_mbuf *m) > +{ > +#if RTE_CACHE_LINE_SIZE == 64 > + rte_prefetch0(&m->cacheline1); > +#else > + RTE_SET_USED(m); > +#endif > +} > + > + > static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp); > > /** > -- > 2.8.0.rc3 >