On Tue, Nov 28, 2017 at 12:19:29PM +0000, Matan Azrad wrote:
> Using power of 2 descriptors number makes the ring management easier
> and allows to use mask operation instead of wraparound conditions.
> 
> Adjust Tx descriptor number to be power of 2 and change calculation to
> use mask accordingly.
> 
> Signed-off-by: Matan Azrad <ma...@mellanox.com>

A few minor comments, see below.

> ---
>  drivers/net/mlx4/mlx4_rxtx.c | 22 ++++++++--------------
>  drivers/net/mlx4/mlx4_txq.c  | 20 ++++++++++++--------
>  2 files changed, 20 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
> index 30f2930..b5aaf4c 100644
> --- a/drivers/net/mlx4/mlx4_rxtx.c
> +++ b/drivers/net/mlx4/mlx4_rxtx.c
> @@ -314,7 +314,7 @@ struct pv {
>   *   Pointer to Tx queue structure.
>   */
>  static void
> -mlx4_txq_complete(struct txq *txq, const unsigned int elts_n,
> +mlx4_txq_complete(struct txq *txq, const unsigned int elts_m,
>                                 struct mlx4_sq *sq)

Documentation needs updating.

>  {
>       unsigned int elts_tail = txq->elts_tail;
> @@ -355,13 +355,11 @@ struct pv {
>       if (unlikely(!completed))
>               return;
>       /* First stamping address is the end of the last one. */
> -     first_txbb = (&(*txq->elts)[elts_tail])->eocb;
> +     first_txbb = (&(*txq->elts)[elts_tail & elts_m])->eocb;
>       elts_tail += completed;
> -     if (elts_tail >= elts_n)
> -             elts_tail -= elts_n;
>       /* The new tail element holds the end address. */
>       sq->remain_size += mlx4_txq_stamp_freed_wqe(sq, first_txbb,
> -             (&(*txq->elts)[elts_tail])->eocb);
> +             (&(*txq->elts)[elts_tail & elts_m])->eocb);
>       /* Update CQ consumer index. */
>       cq->cons_index = cons_index;
>       *cq->set_ci_db = rte_cpu_to_be_32(cons_index & MLX4_CQ_DB_CI_MASK);
> @@ -534,6 +532,7 @@ struct pv {
>       struct txq *txq = (struct txq *)dpdk_txq;
>       unsigned int elts_head = txq->elts_head;
>       const unsigned int elts_n = txq->elts_n;
> +     const unsigned int elts_m = elts_n - 1;
>       unsigned int bytes_sent = 0;
>       unsigned int i;
>       unsigned int max;
> @@ -543,24 +542,20 @@ struct pv {
>  
>       assert(txq->elts_comp_cd != 0);
>       if (likely(txq->elts_comp != 0))
> -             mlx4_txq_complete(txq, elts_n, sq);
> +             mlx4_txq_complete(txq, elts_m, sq);
>       max = (elts_n - (elts_head - txq->elts_tail));
> -     if (max > elts_n)
> -             max -= elts_n;
>       assert(max >= 1);
>       assert(max <= elts_n);
>       /* Always leave one free entry in the ring. */
>       --max;
>       if (max > pkts_n)
>               max = pkts_n;
> -     elt = &(*txq->elts)[elts_head];
> +     elt = &(*txq->elts)[elts_head & elts_m];
>       /* First Tx burst element saves the next WQE control segment. */
>       ctrl = elt->wqe;
>       for (i = 0; (i != max); ++i) {
>               struct rte_mbuf *buf = pkts[i];
> -             unsigned int elts_head_next =
> -                     (((elts_head + 1) == elts_n) ? 0 : elts_head + 1);
> -             struct txq_elt *elt_next = &(*txq->elts)[elts_head_next];
> +             struct txq_elt *elt_next = &(*txq->elts)[++elts_head & elts_m];
>               uint32_t owner_opcode = sq->owner_opcode;
>               volatile struct mlx4_wqe_data_seg *dseg =
>                               (volatile struct mlx4_wqe_data_seg *)(ctrl + 1);
> @@ -678,7 +673,6 @@ struct pv {
>               ctrl->owner_opcode = rte_cpu_to_be_32(owner_opcode);
>               elt->buf = buf;
>               bytes_sent += buf->pkt_len;
> -             elts_head = elts_head_next;
>               ctrl = ctrl_next;
>               elt = elt_next;
>       }
> @@ -694,7 +688,7 @@ struct pv {
>       rte_wmb();
>       /* Ring QP doorbell. */
>       rte_write32(txq->msq.doorbell_qpn, txq->msq.db);
> -     txq->elts_head = elts_head;
> +     txq->elts_head += i;
>       txq->elts_comp += i;
>       return i;
>  }
> diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
> index 4c7b62a..253075a 100644
> --- a/drivers/net/mlx4/mlx4_txq.c
> +++ b/drivers/net/mlx4/mlx4_txq.c
> @@ -76,17 +76,16 @@
>       unsigned int elts_head = txq->elts_head;
>       unsigned int elts_tail = txq->elts_tail;
>       struct txq_elt (*elts)[txq->elts_n] = txq->elts;
> +     unsigned int elts_m = txq->elts_n - 1;
>  
>       DEBUG("%p: freeing WRs", (void *)txq);
>       while (elts_tail != elts_head) {
> -             struct txq_elt *elt = &(*elts)[elts_tail];
> +             struct txq_elt *elt = &(*elts)[elts_tail++ & elts_m];
>  
>               assert(elt->buf != NULL);
>               rte_pktmbuf_free(elt->buf);
>               elt->buf = NULL;
>               elt->wqe = NULL;
> -             if (++elts_tail == RTE_DIM(*elts))
> -                     elts_tail = 0;
>       }
>       txq->elts_tail = txq->elts_head;
>  }
> @@ -208,7 +207,9 @@ struct txq_mp2mr_mbuf_check_data {
>       struct mlx4dv_obj mlxdv;
>       struct mlx4dv_qp dv_qp;
>       struct mlx4dv_cq dv_cq;
> -     struct txq_elt (*elts)[desc];
> +     uint32_t elts_size = desc > 0x1000 ? 0x1000 :
> +             rte_align32pow2((uint32_t)desc);

Where is that magical 0x1000 value coming from? It should at least come
through a macro definition.

> +     struct txq_elt (*elts)[elts_size];
>       struct ibv_qp_init_attr qp_init_attr;
>       struct txq *txq;
>       uint8_t *bounce_buf;
> @@ -247,11 +248,14 @@ struct txq_mp2mr_mbuf_check_data {
>                     (void *)dev, idx);
>               return -rte_errno;
>       }
> -     if (!desc) {
> -             rte_errno = EINVAL;
> -             ERROR("%p: invalid number of Tx descriptors", (void *)dev);
> -             return -rte_errno;
> +     if ((uint32_t)desc != elts_size) {
> +             desc = (uint16_t)elts_size;
> +             WARN("%p: changed number of descriptors in TX queue %u"
> +                  " to be power of two (%d)",
> +                  (void *)dev, idx, desc);

You should display the same message as in mlx4_rxq.c for consistency
(also TX => Tx).

>       }
> +     DEBUG("%p: configuring queue %u for %u descriptors",
> +           (void *)dev, idx, desc);

Unnecessary duplicated debugging message already printed at the beginning of
this function. Yes this is a different value but WARN() made that pretty
clear.

>       /* Allocate and initialize Tx queue. */
>       mlx4_zmallocv_socket("TXQ", vec, RTE_DIM(vec), socket);
>       if (!txq) {
> -- 
> 1.8.3.1
> 

-- 
Adrien Mazarguil
6WIND

Reply via email to