On Wed, 7 Sep 2016 15:42:24 +0300 Saeed Mahameed <sae...@mellanox.com> wrote:
> From: Tariq Toukan <tar...@mellanox.com> > > Instead of reallocating and mapping pages for RX data-path, > recycle already used pages in a per ring cache. > > We ran pktgen single-stream benchmarks, with iptables-raw-drop: > > Single stride, 64 bytes: > * 4,739,057 - baseline > * 4,749,550 - order0 no cache > * 4,786,899 - order0 with cache > 1% gain > > Larger packets, no page cross, 1024 bytes: > * 3,982,361 - baseline > * 3,845,682 - order0 no cache > * 4,127,852 - order0 with cache > 3.7% gain > > Larger packets, every 3rd packet crosses a page, 1500 bytes: > * 3,731,189 - baseline > * 3,579,414 - order0 no cache > * 3,931,708 - order0 with cache > 5.4% gain > > Signed-off-by: Tariq Toukan <tar...@mellanox.com> > Signed-off-by: Saeed Mahameed <sae...@mellanox.com> > --- > drivers/net/ethernet/mellanox/mlx5/core/en.h | 16 ++++++ > drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 15 ++++++ > drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 57 > ++++++++++++++++++++-- > drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 16 ++++++ > 4 files changed, 99 insertions(+), 5 deletions(-) > > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h > b/drivers/net/ethernet/mellanox/mlx5/core/en.h > index 075cdfc..afbdf70 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h > @@ -287,6 +287,18 @@ struct mlx5e_rx_am { /* Adaptive Moderation */ > u8 tired; > }; > > +/* a single cache unit is capable to serve one napi call (for non-striding > rq) > + * or a MPWQE (for striding rq). > + */ > +#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ > + MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT) > +#define MLX5E_CACHE_SIZE (2 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) > +struct mlx5e_page_cache { > + u32 head; > + u32 tail; > + struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE]; > +}; > + [...] > > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c > b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c > index c1cb510..8e02af3 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c > @@ -305,11 +305,55 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq > *rq, u16 ix) > mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); > } > > +static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, > + struct mlx5e_dma_info *dma_info) > +{ > + struct mlx5e_page_cache *cache = &rq->page_cache; > + u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); > + > + if (tail_next == cache->head) { > + rq->stats.cache_full++; > + return false; > + } > + > + cache->page_cache[cache->tail] = *dma_info; > + cache->tail = tail_next; > + return true; > +} > + > +static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, > + struct mlx5e_dma_info *dma_info) > +{ > + struct mlx5e_page_cache *cache = &rq->page_cache; > + > + if (unlikely(cache->head == cache->tail)) { > + rq->stats.cache_empty++; > + return false; > + } > + > + if (page_ref_count(cache->page_cache[cache->head].page) != 1) { > + rq->stats.cache_busy++; > + return false; > + } Hmmm... doesn't this cause "blocking" of the page_cache recycle facility until the page at the head of the queue gets (page) refcnt decremented? Real use-case could fairly easily block/cause this... > + > + *dma_info = cache->page_cache[cache->head]; > + cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); > + rq->stats.cache_reuse++; > + > + dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE, > + DMA_FROM_DEVICE); > + return true; > +} > + > static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, > struct mlx5e_dma_info *dma_info) > { > - struct page *page = dev_alloc_page(); > + struct page *page; > + > + if (mlx5e_rx_cache_get(rq, dma_info)) > + return 0; > > + page = dev_alloc_page(); > if (unlikely(!page)) > return -ENOMEM; -- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat Author of http://www.iptv-analyzer.org LinkedIn: http://www.linkedin.com/in/brouer