cpfl: support hairpin queue setup and release

Liu, Mingxia Wed, 24 May 2023 02:06:28 -0700

> +cpfl_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> +                         uint16_t nb_desc,
> +                         const struct rte_eth_hairpin_conf *conf) {
> +     struct cpfl_vport *cpfl_vport =
> +         (struct cpfl_vport *)dev->data->dev_private;
> +
> +     struct idpf_vport *vport = &cpfl_vport->base;
> +     struct idpf_adapter *adapter_base = vport->adapter;
> +     uint16_t logic_qid = cpfl_vport->nb_p2p_txq;
> +     struct cpfl_txq_hairpin_info *hairpin_info;
> +     struct idpf_hw *hw = &adapter_base->hw;
> +     struct cpfl_tx_queue *cpfl_txq;
> +     struct idpf_tx_queue *txq, *cq;
> +     const struct rte_memzone *mz;
> +     uint32_t ring_size;
> +     uint16_t peer_port, peer_q;
> +
> +     if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) {
> +             PMD_INIT_LOG(ERR, "Only spilt queue model supports hairpin
> queue.");
> +             return -EINVAL;
> +     }
> +
> +     if (conf->peer_count != 1) {
> +             PMD_INIT_LOG(ERR, "Can't support Tx hairpin queue peer
> count %d", conf->peer_count);
> +             return -EINVAL;
> +     }
> +
> +     peer_port = conf->peers[0].port;
> +     peer_q = conf->peers[0].queue;
> +
> +     if (nb_desc % CPFL_ALIGN_RING_DESC != 0 ||
> +         nb_desc > CPFL_MAX_RING_DESC ||
> +         nb_desc < CPFL_MIN_RING_DESC) {
> +             PMD_INIT_LOG(ERR, "Number (%u) of transmit descriptors is
> invalid",
> +                          nb_desc);
> +             return -EINVAL;
> +     }
> +
> +     /* Free memory if needed. */
> +     if (dev->data->tx_queues[queue_idx]) {
> +             cpfl_tx_queue_release(dev->data->tx_queues[queue_idx]);
> +             dev->data->tx_queues[queue_idx] = NULL;
> +     }
> +
> +     /* Allocate the TX queue data structure. */
> +     cpfl_txq = rte_zmalloc_socket("cpfl hairpin txq",
> +                              sizeof(struct cpfl_tx_queue),
> +                              RTE_CACHE_LINE_SIZE,
> +                              SOCKET_ID_ANY);
> +     if (!cpfl_txq) {
> +             PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue
> structure");
> +             return -ENOMEM;
> +     }
> +
> +     txq = &cpfl_txq->base;
> +     hairpin_info = &cpfl_txq->hairpin_info;
> +     /* Txq ring length should be 2 times of Tx completion queue size. */
> +     txq->nb_tx_desc = nb_desc * 2;
> +     txq->queue_id = cpfl_hw_qid_get(cpfl_vport-
> >p2p_q_chunks_info.tx_start_qid, logic_qid);
> +     txq->port_id = dev->data->port_id;
> +     hairpin_info->hairpin_q = true;
> +     hairpin_info->peer_rxp = peer_port;
> +     hairpin_info->peer_rxq_id = peer_q;
> +
> +     if (conf->manual_bind != 0)
> +             cpfl_vport->p2p_manual_bind = true;
> +     else
> +             cpfl_vport->p2p_manual_bind = false;
> +
> +     /* Always Tx hairpin queue allocates Tx HW ring */
> +     ring_size = RTE_ALIGN(txq->nb_tx_desc * CPFL_P2P_DESC_LEN,
> +                           CPFL_DMA_MEM_ALIGN);
> +     mz = rte_eth_dma_zone_reserve(dev, "hairpin_tx_ring", logic_qid,
> +                                   ring_size + CPFL_P2P_RING_BUF,
> +                                   CPFL_RING_BASE_ALIGN,
> +                                   dev->device->numa_node);
> +     if (!mz) {
> +             PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX");
> +             rte_free(txq->sw_ring);
> +             rte_free(txq);
> +             return -ENOMEM;
> +     }
> +
> +     txq->tx_ring_phys_addr = mz->iova;
> +     txq->desc_ring = mz->addr;
> +     txq->mz = mz;
> +
> +     cpfl_tx_hairpin_descq_reset(txq);
> +     txq->qtx_tail = hw->hw_addr +
> +             cpfl_hw_qtail_get(cpfl_vport-
> >p2p_q_chunks_info.tx_qtail_start,
> +                               logic_qid, cpfl_vport-
> >p2p_q_chunks_info.tx_qtail_spacing);
> +     txq->ops = &def_txq_ops;
> +
> +     if (cpfl_vport->p2p_tx_complq == NULL) {
[Liu, Mingxia] In cpfl_rx_hairpin_queue_setup(), "logic_qid" is used to 
identify if it is the first time to allocate "p2p_rx_bufq" buffer, 
Can it be unified, using logic_qid == 0 or p2p_tx_complq/ p2p_rx_bufq == NULL ?




> -----Original Message-----
> From: Xing, Beilei <beilei.x...@intel.com>
> Sent: Friday, May 19, 2023 3:31 PM
> To: Wu, Jingjing <jingjing...@intel.com>
> Cc: dev@dpdk.org; Liu, Mingxia <mingxia....@intel.com>; Xing, Beilei
> <beilei.x...@intel.com>; Wang, Xiao W <xiao.w.w...@intel.com>
> Subject: [PATCH v3 05/10] net/cpfl: support hairpin queue setup and release
> 
> From: Beilei Xing <beilei.x...@intel.com>
> 
> Support hairpin Rx/Tx queue setup and release.
> 
> Signed-off-by: Xiao Wang <xiao.w.w...@intel.com>
> Signed-off-by: Mingxia Liu <mingxia....@intel.com>
> Signed-off-by: Beilei Xing <beilei.x...@intel.com>
> ---
>  drivers/net/cpfl/cpfl_ethdev.c          |   6 +
>  drivers/net/cpfl/cpfl_ethdev.h          |  12 +
>  drivers/net/cpfl/cpfl_rxtx.c            | 373 +++++++++++++++++++++++-
>  drivers/net/cpfl/cpfl_rxtx.h            |  26 ++
>  drivers/net/cpfl/cpfl_rxtx_vec_common.h |   4 +
>  5 files changed, 420 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c 
> index
> 8e471d2a9b..03813716ce 100644
> --- a/drivers/net/cpfl/cpfl_ethdev.c
> +++ b/drivers/net/cpfl/cpfl_ethdev.c
> @@ -874,6 +874,10 @@ cpfl_dev_close(struct rte_eth_dev *dev)
>       struct cpfl_adapter_ext *adapter = CPFL_ADAPTER_TO_EXT(vport-
> >adapter);
> 
>       cpfl_dev_stop(dev);
> +     if (cpfl_vport->p2p_mp) {
> +             rte_mempool_free(cpfl_vport->p2p_mp);
> +             cpfl_vport->p2p_mp = NULL;
> +     }
> 
>       if (!adapter->base.is_rx_singleq && !adapter->base.is_tx_singleq)
>               cpfl_p2p_queue_grps_del(vport);
> @@ -916,6 +920,8 @@ static const struct eth_dev_ops cpfl_eth_dev_ops = {
>       .xstats_get_names               = cpfl_dev_xstats_get_names,
>       .xstats_reset                   = cpfl_dev_xstats_reset,
>       .hairpin_cap_get                = cpfl_hairpin_cap_get,
> +     .rx_hairpin_queue_setup         = cpfl_rx_hairpin_queue_setup,
> +     .tx_hairpin_queue_setup         = cpfl_tx_hairpin_queue_setup,
>  };
> 
>  static int
> diff --git a/drivers/net/cpfl/cpfl_ethdev.h b/drivers/net/cpfl/cpfl_ethdev.h 
> index
> 65c9a195b2..a48344299c 100644
> --- a/drivers/net/cpfl/cpfl_ethdev.h
> +++ b/drivers/net/cpfl/cpfl_ethdev.h
> @@ -89,6 +89,18 @@ struct p2p_queue_chunks_info {  struct cpfl_vport {
>       struct idpf_vport base;
>       struct p2p_queue_chunks_info p2p_q_chunks_info;
> +
> +     struct rte_mempool *p2p_mp;
> +
> +     uint16_t nb_data_rxq;
> +     uint16_t nb_data_txq;
> +     uint16_t nb_p2p_rxq;
> +     uint16_t nb_p2p_txq;
> +
> +     struct idpf_rx_queue *p2p_rx_bufq;
> +     struct idpf_tx_queue *p2p_tx_complq;
> +     bool p2p_manual_bind;
> +
>  };
> 
>  struct cpfl_adapter_ext {
> diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c index
> 04a51b8d15..333a399e73 100644
> --- a/drivers/net/cpfl/cpfl_rxtx.c
> +++ b/drivers/net/cpfl/cpfl_rxtx.c
> @@ -10,6 +10,79 @@
>  #include "cpfl_rxtx.h"
>  #include "cpfl_rxtx_vec_common.h"
> 
> +uint16_t
> +cpfl_hw_qid_get(uint16_t start_qid, uint16_t offset) {
> +     return start_qid + offset;
> +}
> +
> +uint64_t
> +cpfl_hw_qtail_get(uint64_t tail_start, uint16_t offset, uint64_t
> +tail_spacing) {
> +     return tail_start + offset * tail_spacing; }
> +
> +static inline void
> +cpfl_tx_hairpin_descq_reset(struct idpf_tx_queue *txq) {
> +     uint32_t i, size;
> +
> +     if (!txq) {
> +             PMD_DRV_LOG(DEBUG, "Pointer to txq is NULL");
> +             return;
> +     }
> +
> +     size = txq->nb_tx_desc * CPFL_P2P_DESC_LEN;
> +     for (i = 0; i < size; i++)
> +             ((volatile char *)txq->desc_ring)[i] = 0; }
> +
> +static inline void
> +cpfl_tx_hairpin_complq_reset(struct idpf_tx_queue *cq) {
> +     uint32_t i, size;
> +
> +     if (!cq) {
> +             PMD_DRV_LOG(DEBUG, "Pointer to complq is NULL");
> +             return;
> +     }
> +
> +     size = cq->nb_tx_desc * CPFL_P2P_DESC_LEN;
> +     for (i = 0; i < size; i++)
> +             ((volatile char *)cq->compl_ring)[i] = 0; }
> +
> +static inline void
> +cpfl_rx_hairpin_descq_reset(struct idpf_rx_queue *rxq) {
> +     uint16_t len;
> +     uint32_t i;
> +
> +     if (!rxq)
> +             return;
> +
> +     len = rxq->nb_rx_desc;
> +     for (i = 0; i < len * CPFL_P2P_DESC_LEN; i++)
> +             ((volatile char *)rxq->rx_ring)[i] = 0; }
> +
> +static inline void
> +cpfl_rx_hairpin_bufq_reset(struct idpf_rx_queue *rxbq) {
> +     uint16_t len;
> +     uint32_t i;
> +
> +     if (!rxbq)
> +             return;
> +
> +     len = rxbq->nb_rx_desc;
> +     for (i = 0; i < len * CPFL_P2P_DESC_LEN; i++)
> +             ((volatile char *)rxbq->rx_ring)[i] = 0;
> +
> +     rxbq->bufq1 = NULL;
> +     rxbq->bufq2 = NULL;
> +}
> +
>  static uint64_t
>  cpfl_rx_offload_convert(uint64_t offload)  { @@ -234,7 +307,10 @@
> cpfl_rx_queue_release(void *rxq)
> 
>       /* Split queue */
>       if (!q->adapter->is_rx_singleq) {
> -             if (q->bufq2)
> +             /* the mz is shared between Tx/Rx hairpin, let Rx_release
> +              * free the buf, q->bufq1->mz and q->mz.
> +              */
> +             if (!cpfl_rxq->hairpin_info.hairpin_q && q->bufq2)
>                       cpfl_rx_split_bufq_release(q->bufq2);
> 
>               if (q->bufq1)
> @@ -385,6 +461,7 @@ cpfl_rx_queue_setup(struct rte_eth_dev *dev, uint16_t
> queue_idx,
>               }
>       }
> 
> +     cpfl_vport->nb_data_rxq++;
>       rxq->q_set = true;
>       dev->data->rx_queues[queue_idx] = cpfl_rxq;
> 
> @@ -548,6 +625,7 @@ cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t
> queue_idx,
>       txq->qtx_tail = hw->hw_addr + (vport->chunks_info.tx_qtail_start +
>                       queue_idx * vport->chunks_info.tx_qtail_spacing);
>       txq->ops = &def_txq_ops;
> +     cpfl_vport->nb_data_txq++;
>       txq->q_set = true;
>       dev->data->tx_queues[queue_idx] = cpfl_txq;
> 
> @@ -562,6 +640,297 @@ cpfl_tx_queue_setup(struct rte_eth_dev *dev,
> uint16_t queue_idx,
>       return ret;
>  }
> 
> +static int
> +cpfl_rx_hairpin_bufq_setup(struct rte_eth_dev *dev, struct idpf_rx_queue
> *bufq,
> +                        uint16_t logic_qid, uint16_t nb_desc) {
> +     struct cpfl_vport *cpfl_vport =
> +         (struct cpfl_vport *)dev->data->dev_private;
> +     struct idpf_vport *vport = &cpfl_vport->base;
> +     struct idpf_adapter *adapter = vport->adapter;
> +     struct rte_mempool *mp;
> +     char pool_name[RTE_MEMPOOL_NAMESIZE];
> +
> +     mp = cpfl_vport->p2p_mp;
> +     if (!mp) {
> +             snprintf(pool_name, RTE_MEMPOOL_NAMESIZE,
> "p2p_mb_pool_%u",
> +                      dev->data->port_id);
> +             mp = rte_pktmbuf_pool_create(pool_name,
> CPFL_P2P_NB_MBUF, CPFL_P2P_CACHE_SIZE,
> +                                          0, CPFL_P2P_MBUF_SIZE, dev-
> >device->numa_node);
> +             if (!mp) {
> +                     PMD_INIT_LOG(ERR, "Failed to allocate mbuf pool for
> p2p");
> +                     return -ENOMEM;
> +             }
> +             cpfl_vport->p2p_mp = mp;
> +     }
> +
> +     bufq->mp = mp;
> +     bufq->nb_rx_desc = nb_desc;
> +     bufq->queue_id = cpfl_hw_qid_get(cpfl_vport-
> >p2p_q_chunks_info.rx_buf_start_qid, logic_qid);
> +     bufq->port_id = dev->data->port_id;
> +     bufq->adapter = adapter;
> +     bufq->rx_buf_len = CPFL_P2P_MBUF_SIZE -
> RTE_PKTMBUF_HEADROOM;
> +
> +     bufq->sw_ring = rte_zmalloc("sw ring",
> +                                 sizeof(struct rte_mbuf *) * nb_desc,
> +                                 RTE_CACHE_LINE_SIZE);
> +     if (!bufq->sw_ring) {
> +             PMD_INIT_LOG(ERR, "Failed to allocate memory for SW ring");
> +             return -ENOMEM;
> +     }
> +
> +     bufq->q_set = true;
> +     bufq->ops = &def_rxq_ops;
> +
> +     return 0;
> +}
> +
> +int
> +cpfl_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> +                         uint16_t nb_desc,
> +                         const struct rte_eth_hairpin_conf *conf) {
> +     struct cpfl_vport *cpfl_vport = (struct cpfl_vport *)dev->data-
> >dev_private;
> +     struct idpf_vport *vport = &cpfl_vport->base;
> +     struct idpf_adapter *adapter_base = vport->adapter;
> +     uint16_t logic_qid = cpfl_vport->nb_p2p_rxq;
> +     struct cpfl_rxq_hairpin_info *hairpin_info;
> +     struct cpfl_rx_queue *cpfl_rxq;
> +     struct idpf_rx_queue *bufq1 = NULL;
> +     struct idpf_rx_queue *rxq;
> +     uint16_t peer_port, peer_q;
> +     uint16_t qid;
> +     int ret;
> +
> +     if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) {
> +             PMD_INIT_LOG(ERR, "Only spilt queue model supports hairpin
> queue.");
> +             return -EINVAL;
> +     }
> +
> +     if (conf->peer_count != 1) {
> +             PMD_INIT_LOG(ERR, "Can't support Rx hairpin queue peer
> count %d", conf->peer_count);
> +             return -EINVAL;
> +     }
> +
> +     peer_port = conf->peers[0].port;
> +     peer_q = conf->peers[0].queue;
> +
> +     if (nb_desc % CPFL_ALIGN_RING_DESC != 0 ||
> +         nb_desc > CPFL_MAX_RING_DESC ||
> +         nb_desc < CPFL_MIN_RING_DESC) {
> +             PMD_INIT_LOG(ERR, "Number (%u) of receive descriptors is
> invalid", nb_desc);
> +             return -EINVAL;
> +     }
> +
> +     /* Free memory if needed */
> +     if (dev->data->rx_queues[queue_idx]) {
> +             cpfl_rx_queue_release(dev->data->rx_queues[queue_idx]);
> +             dev->data->rx_queues[queue_idx] = NULL;
> +     }
> +
> +     /* Setup Rx description queue */
> +     cpfl_rxq = rte_zmalloc_socket("cpfl hairpin rxq",
> +                              sizeof(struct cpfl_rx_queue),
> +                              RTE_CACHE_LINE_SIZE,
> +                              SOCKET_ID_ANY);
> +     if (!cpfl_rxq) {
> +             PMD_INIT_LOG(ERR, "Failed to allocate memory for rx queue
> data structure");
> +             return -ENOMEM;
> +     }
> +
> +     rxq = &cpfl_rxq->base;
> +     hairpin_info = &cpfl_rxq->hairpin_info;
> +     rxq->nb_rx_desc = nb_desc * 2;
> +     rxq->queue_id = cpfl_hw_qid_get(cpfl_vport-
> >p2p_q_chunks_info.rx_start_qid, logic_qid);
> +     rxq->port_id = dev->data->port_id;
> +     rxq->adapter = adapter_base;
> +     rxq->rx_buf_len = CPFL_P2P_MBUF_SIZE - RTE_PKTMBUF_HEADROOM;
> +     hairpin_info->hairpin_q = true;
> +     hairpin_info->peer_txp = peer_port;
> +     hairpin_info->peer_txq_id = peer_q;
> +
> +     if (conf->manual_bind != 0)
> +             cpfl_vport->p2p_manual_bind = true;
> +     else
> +             cpfl_vport->p2p_manual_bind = false;
> +
> +     /* setup 1 Rx buffer queue for the 1st hairpin rxq */
> +     if (logic_qid == 0) {
> +             bufq1 = rte_zmalloc_socket("hairpin rx bufq1",
> +                                        sizeof(struct idpf_rx_queue),
> +                                        RTE_CACHE_LINE_SIZE,
> +                                        SOCKET_ID_ANY);
> +             if (!bufq1) {
> +                     PMD_INIT_LOG(ERR, "Failed to allocate memory for
> hairpin Rx buffer queue 1.");
> +                     ret = -ENOMEM;
> +                     goto err_alloc_bufq1;
> +             }
> +             qid = 2 * logic_qid;
> +             ret = cpfl_rx_hairpin_bufq_setup(dev, bufq1, qid, nb_desc);
> +             if (ret) {
> +                     PMD_INIT_LOG(ERR, "Failed to setup hairpin Rx buffer
> queue 1");
> +                     ret = -EINVAL;
> +                     goto err_setup_bufq1;
> +             }
> +             cpfl_vport->p2p_rx_bufq = bufq1;
> +     }
> +
> +     rxq->bufq1 = cpfl_vport->p2p_rx_bufq;
> +     rxq->bufq2 = NULL;
> +
> +     cpfl_vport->nb_p2p_rxq++;
> +     rxq->q_set = true;
> +     dev->data->rx_queues[queue_idx] = cpfl_rxq;
> +
> +     return 0;
> +
> +err_setup_bufq1:
> +     rte_free(bufq1);
> +err_alloc_bufq1:
> +     rte_free(rxq);
> +
> +     return ret;
> +}
> +
> +int
> +cpfl_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> +                         uint16_t nb_desc,
> +                         const struct rte_eth_hairpin_conf *conf) {
> +     struct cpfl_vport *cpfl_vport =
> +         (struct cpfl_vport *)dev->data->dev_private;
> +
> +     struct idpf_vport *vport = &cpfl_vport->base;
> +     struct idpf_adapter *adapter_base = vport->adapter;
> +     uint16_t logic_qid = cpfl_vport->nb_p2p_txq;
> +     struct cpfl_txq_hairpin_info *hairpin_info;
> +     struct idpf_hw *hw = &adapter_base->hw;
> +     struct cpfl_tx_queue *cpfl_txq;
> +     struct idpf_tx_queue *txq, *cq;
> +     const struct rte_memzone *mz;
> +     uint32_t ring_size;
> +     uint16_t peer_port, peer_q;
> +
> +     if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) {
> +             PMD_INIT_LOG(ERR, "Only spilt queue model supports hairpin
> queue.");
> +             return -EINVAL;
> +     }
> +
> +     if (conf->peer_count != 1) {
> +             PMD_INIT_LOG(ERR, "Can't support Tx hairpin queue peer
> count %d", conf->peer_count);
> +             return -EINVAL;
> +     }
> +
> +     peer_port = conf->peers[0].port;
> +     peer_q = conf->peers[0].queue;
> +
> +     if (nb_desc % CPFL_ALIGN_RING_DESC != 0 ||
> +         nb_desc > CPFL_MAX_RING_DESC ||
> +         nb_desc < CPFL_MIN_RING_DESC) {
> +             PMD_INIT_LOG(ERR, "Number (%u) of transmit descriptors is
> invalid",
> +                          nb_desc);
> +             return -EINVAL;
> +     }
> +
> +     /* Free memory if needed. */
> +     if (dev->data->tx_queues[queue_idx]) {
> +             cpfl_tx_queue_release(dev->data->tx_queues[queue_idx]);
> +             dev->data->tx_queues[queue_idx] = NULL;
> +     }
> +
> +     /* Allocate the TX queue data structure. */
> +     cpfl_txq = rte_zmalloc_socket("cpfl hairpin txq",
> +                              sizeof(struct cpfl_tx_queue),
> +                              RTE_CACHE_LINE_SIZE,
> +                              SOCKET_ID_ANY);
> +     if (!cpfl_txq) {
> +             PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue
> structure");
> +             return -ENOMEM;
> +     }
> +
> +     txq = &cpfl_txq->base;
> +     hairpin_info = &cpfl_txq->hairpin_info;
> +     /* Txq ring length should be 2 times of Tx completion queue size. */
> +     txq->nb_tx_desc = nb_desc * 2;
> +     txq->queue_id = cpfl_hw_qid_get(cpfl_vport-
> >p2p_q_chunks_info.tx_start_qid, logic_qid);
> +     txq->port_id = dev->data->port_id;
> +     hairpin_info->hairpin_q = true;
> +     hairpin_info->peer_rxp = peer_port;
> +     hairpin_info->peer_rxq_id = peer_q;
> +
> +     if (conf->manual_bind != 0)
> +             cpfl_vport->p2p_manual_bind = true;
> +     else
> +             cpfl_vport->p2p_manual_bind = false;
> +
> +     /* Always Tx hairpin queue allocates Tx HW ring */
> +     ring_size = RTE_ALIGN(txq->nb_tx_desc * CPFL_P2P_DESC_LEN,
> +                           CPFL_DMA_MEM_ALIGN);
> +     mz = rte_eth_dma_zone_reserve(dev, "hairpin_tx_ring", logic_qid,
> +                                   ring_size + CPFL_P2P_RING_BUF,
> +                                   CPFL_RING_BASE_ALIGN,
> +                                   dev->device->numa_node);
> +     if (!mz) {
> +             PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX");
> +             rte_free(txq->sw_ring);
> +             rte_free(txq);
> +             return -ENOMEM;
> +     }
> +
> +     txq->tx_ring_phys_addr = mz->iova;
> +     txq->desc_ring = mz->addr;
> +     txq->mz = mz;
> +
> +     cpfl_tx_hairpin_descq_reset(txq);
> +     txq->qtx_tail = hw->hw_addr +
> +             cpfl_hw_qtail_get(cpfl_vport-
> >p2p_q_chunks_info.tx_qtail_start,
> +                               logic_qid, cpfl_vport-
> >p2p_q_chunks_info.tx_qtail_spacing);
> +     txq->ops = &def_txq_ops;
> +
> +     if (cpfl_vport->p2p_tx_complq == NULL) {
[Liu, Mingxia] In cpfl_rx_hairpin_queue_setup(), "logic_qid" is used to 
identify if it is the first time to allocate "p2p_rx_bufq" buffer, 
Can it be unified, using logic_qid == 0 or p2p_tx_complq/ p2p_rx_bufq == NULL ?
> +             cq = rte_zmalloc_socket("cpfl hairpin cq",
> +                                     sizeof(struct idpf_tx_queue),
> +                                     RTE_CACHE_LINE_SIZE,
> +                                     dev->device->numa_node);
> +             if (!cq) {
> +                     PMD_INIT_LOG(ERR, "Failed to allocate memory for tx
> queue structure");
> +                     return -ENOMEM;
> +             }
> +
> +             cq->nb_tx_desc = nb_desc;
> +             cq->queue_id = cpfl_hw_qid_get(cpfl_vport-
> >p2p_q_chunks_info.tx_compl_start_qid, 0);
> +             cq->port_id = dev->data->port_id;
> +
> +             /* Tx completion queue always allocates the HW ring */
> +             ring_size = RTE_ALIGN(cq->nb_tx_desc * CPFL_P2P_DESC_LEN,
> +                                   CPFL_DMA_MEM_ALIGN);
> +             mz = rte_eth_dma_zone_reserve(dev, "hairpin_tx_compl_ring",
> logic_qid,
> +                                           ring_size + CPFL_P2P_RING_BUF,
> +                                           CPFL_RING_BASE_ALIGN,
> +                                           dev->device->numa_node);
> +             if (!mz) {
> +                     PMD_INIT_LOG(ERR, "Failed to reserve DMA memory
> for TX completion queue");
> +                     rte_free(txq->sw_ring);
> +                     rte_free(txq);
> +                     return -ENOMEM;
> +             }
> +             cq->tx_ring_phys_addr = mz->iova;
> +             cq->compl_ring = mz->addr;
> +             cq->mz = mz;
> +
> +             cpfl_tx_hairpin_complq_reset(cq);
> +             cpfl_vport->p2p_tx_complq = cq;
> +     }
> +
> +     txq->complq = cpfl_vport->p2p_tx_complq;
> +
> +     cpfl_vport->nb_p2p_txq++;
> +     txq->q_set = true;
> +     dev->data->tx_queues[queue_idx] = cpfl_txq;
> +
> +     return 0;
> +}
> +
>  int
>  cpfl_rx_queue_init(struct rte_eth_dev *dev, uint16_t rx_queue_id)  { @@ -
> 865,6 +1234,8 @@ cpfl_set_rx_function(struct rte_eth_dev *dev)
>               if (vport->rx_vec_allowed) {
>                       for (i = 0; i < dev->data->nb_rx_queues; i++) {
>                               cpfl_rxq = dev->data->rx_queues[i];
> +                             if (cpfl_rxq->hairpin_info.hairpin_q)
> +                                     continue;
>                               (void)idpf_qc_splitq_rx_vec_setup(&cpfl_rxq-
> >base);
>                       }
>  #ifdef CC_AVX512_SUPPORT
> diff --git a/drivers/net/cpfl/cpfl_rxtx.h b/drivers/net/cpfl/cpfl_rxtx.h index
> 3a87a1f4b3..5e9f2dada7 100644
> --- a/drivers/net/cpfl/cpfl_rxtx.h
> +++ b/drivers/net/cpfl/cpfl_rxtx.h
> @@ -13,6 +13,7 @@
>  #define CPFL_MIN_RING_DESC   32
>  #define CPFL_MAX_RING_DESC   4096
>  #define CPFL_DMA_MEM_ALIGN   4096
> +#define CPFL_P2P_DESC_LEN            16
>  #define CPFL_MAX_HAIRPINQ_RX_2_TX    1
>  #define CPFL_MAX_HAIRPINQ_TX_2_RX    1
>  #define CPFL_MAX_HAIRPINQ_NB_DESC    1024
> @@ -21,6 +22,10 @@
>  #define CPFL_P2P_NB_TX_COMPLQ                1
>  #define CPFL_P2P_NB_QUEUE_GRPS               1
>  #define CPFL_P2P_QUEUE_GRP_ID                1
> +#define CPFL_P2P_NB_MBUF             4096
> +#define CPFL_P2P_CACHE_SIZE          250
> +#define CPFL_P2P_MBUF_SIZE           2048
> +#define CPFL_P2P_RING_BUF            128
>  /* Base address of the HW descriptor ring should be 128B aligned. */
>  #define CPFL_RING_BASE_ALIGN 128
> 
> @@ -31,12 +36,26 @@
> 
>  #define CPFL_SUPPORT_CHAIN_NUM 5
> 
> +struct cpfl_rxq_hairpin_info {
> +     bool hairpin_q;         /* if rx queue is a hairpin queue */
> +     uint16_t peer_txp;
> +     uint16_t peer_txq_id;
> +};
> +
>  struct cpfl_rx_queue {
>       struct idpf_rx_queue base;
> +     struct cpfl_rxq_hairpin_info hairpin_info; };
> +
> +struct cpfl_txq_hairpin_info {
> +     bool hairpin_q;         /* if tx queue is a hairpin queue */
> +     uint16_t peer_rxp;
> +     uint16_t peer_rxq_id;
>  };
> 
>  struct cpfl_tx_queue {
>       struct idpf_tx_queue base;
> +     struct cpfl_txq_hairpin_info hairpin_info;
>  };
> 
>  int cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, @@ -
> 57,4 +76,11 @@ void cpfl_dev_tx_queue_release(struct rte_eth_dev *dev,
> uint16_t qid);  void cpfl_dev_rx_queue_release(struct rte_eth_dev *dev,
> uint16_t qid);  void cpfl_set_rx_function(struct rte_eth_dev *dev);  void
> cpfl_set_tx_function(struct rte_eth_dev *dev);
> +uint16_t cpfl_hw_qid_get(uint16_t start_qid, uint16_t offset); uint64_t
> +cpfl_hw_qtail_get(uint64_t tail_start, uint16_t offset, uint64_t
> +tail_spacing); int cpfl_rx_hairpin_queue_setup(struct rte_eth_dev *dev,
> uint16_t queue_idx,
> +                             uint16_t nb_desc, const struct
> rte_eth_hairpin_conf *conf); int
> +cpfl_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> +                             uint16_t nb_desc,
> +                             const struct rte_eth_hairpin_conf *conf);
>  #endif /* _CPFL_RXTX_H_ */
> diff --git a/drivers/net/cpfl/cpfl_rxtx_vec_common.h
> b/drivers/net/cpfl/cpfl_rxtx_vec_common.h
> index 5690b17911..d8e9191196 100644
> --- a/drivers/net/cpfl/cpfl_rxtx_vec_common.h
> +++ b/drivers/net/cpfl/cpfl_rxtx_vec_common.h
> @@ -85,6 +85,8 @@ cpfl_rx_vec_dev_check_default(struct rte_eth_dev *dev)
>               cpfl_rxq = dev->data->rx_queues[i];
>               default_ret = cpfl_rx_vec_queue_default(&cpfl_rxq->base);
>               if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {
> +                     if (cpfl_rxq->hairpin_info.hairpin_q)
> +                             continue;
>                       splitq_ret = cpfl_rx_splitq_vec_default(&cpfl_rxq-
> >base);
>                       ret = splitq_ret && default_ret;
>               } else {
> @@ -106,6 +108,8 @@ cpfl_tx_vec_dev_check_default(struct rte_eth_dev *dev)
> 
>       for (i = 0; i < dev->data->nb_tx_queues; i++) {
>               cpfl_txq = dev->data->tx_queues[i];
> +             if (cpfl_txq->hairpin_info.hairpin_q)
> +                     continue;
>               ret = cpfl_tx_vec_queue_default(&cpfl_txq->base);
>               if (ret == CPFL_SCALAR_PATH)
>                       return CPFL_SCALAR_PATH;
> --
> 2.26.2

RE: [PATCH v3 05/10] net/cpfl: support hairpin queue setup and release

Reply via email to