Hi

On 26/09/2019 13:18, Andrew Rybchenko wrote:
> On 9/26/19 9:28 AM, Ori Kam wrote:
>> This commit introduce the RX/TX hairpin setup function.
> 
> RX/TX should be Rx/Tx here and everywhere below.
> 
>> Hairpin is RX/TX queue that is used by the nic in order to offload
>> wire to wire traffic.
>>
>> Each hairpin queue is binded to one or more queues from other type.
>> For example TX hairpin queue should be binded to at least 1 RX hairpin
>> queue and vice versa.
> 
> How should application find out that hairpin queues are supported?

You might want to look this patch "[dpdk-dev] [PATCH v2 0/4] get Rx/Tx
packet burst mode information" from Haiyue Wang.

Where he adds a information bitmask to describe the capabilities of the PMD.

Ray K

> How many?
> How should application find out which ports/queues could be used for
> pining?
> Is hair-pinning domain on device level sufficient to expose limitations?
> 
>> Signed-off-by: Ori Kam <or...@mellanox.com>
>> ---
>>   lib/librte_ethdev/rte_ethdev.c           | 213
>> +++++++++++++++++++++++++++++++
>>   lib/librte_ethdev/rte_ethdev.h           | 145 +++++++++++++++++++++
>>   lib/librte_ethdev/rte_ethdev_core.h      |  18 +++
>>   lib/librte_ethdev/rte_ethdev_version.map |   4 +
>>   4 files changed, 380 insertions(+)
>>
>> diff --git a/lib/librte_ethdev/rte_ethdev.c
>> b/lib/librte_ethdev/rte_ethdev.c
>> index 30b0c78..4021f38 100644
>> --- a/lib/librte_ethdev/rte_ethdev.c
>> +++ b/lib/librte_ethdev/rte_ethdev.c
>> @@ -1701,6 +1701,115 @@ struct rte_eth_dev *
>>   }
>>     int
>> +rte_eth_rx_hairpin_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
>> +                   uint16_t nb_rx_desc, unsigned int socket_id,
>> +                   const struct rte_eth_rxconf *rx_conf,
>> +                   const struct rte_eth_hairpin_conf *hairpin_conf)
> 
> Below code duplicates rte_eth_rx_queue_setup() a lot and it is very
> bad from maintenance point of view. Similar problem with Tx hairpin
> queue setup.
> 
>> +{
>> +    int ret;
>> +    struct rte_eth_dev *dev;
>> +    struct rte_eth_dev_info dev_info;
>> +    struct rte_eth_rxconf local_conf;
>> +    void **rxq;
>> +
>> +    RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
>> +
>> +    dev = &rte_eth_devices[port_id];
>> +    if (rx_queue_id >= dev->data->nb_rx_queues) {
>> +        RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id);
>> +        return -EINVAL;
>> +    }
>> +
>> +    RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
>> +    RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_hairpin_queue_setup,
>> +                -ENOTSUP);
>> +
>> +    rte_eth_dev_info_get(port_id, &dev_info);
>> +
>> +    /* Use default specified by driver, if nb_rx_desc is zero */
>> +    if (nb_rx_desc == 0) {
>> +        nb_rx_desc = dev_info.default_rxportconf.ring_size;
>> +        /* If driver default is also zero, fall back on EAL default */
>> +        if (nb_rx_desc == 0)
>> +            nb_rx_desc = RTE_ETH_DEV_FALLBACK_RX_RINGSIZE;
>> +    }
>> +
>> +    if (nb_rx_desc > dev_info.rx_desc_lim.nb_max ||
>> +            nb_rx_desc < dev_info.rx_desc_lim.nb_min ||
>> +            nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) {
>> +
>> +        RTE_ETHDEV_LOG(ERR,
>> +                   "Invalid value for nb_rx_desc(=%hu), should be: "
>> +                   "<= %hu, >= %hu, and a product of %hu\n",
>> +            nb_rx_desc, dev_info.rx_desc_lim.nb_max,
>> +            dev_info.rx_desc_lim.nb_min,
>> +            dev_info.rx_desc_lim.nb_align);
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (dev->data->dev_started &&
>> +        !(dev_info.dev_capa &
>> +            RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP))
>> +        return -EBUSY;
>> +
>> +    if (dev->data->dev_started &&
>> +        (dev->data->rx_queue_state[rx_queue_id] !=
>> +            RTE_ETH_QUEUE_STATE_STOPPED))
>> +        return -EBUSY;
>> +
>> +    rxq = dev->data->rx_queues;
>> +    if (rxq[rx_queue_id]) {
>> +        RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release,
>> +                    -ENOTSUP);
>> +        (*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]);
>> +        rxq[rx_queue_id] = NULL;
>> +    }
>> +
>> +    if (rx_conf == NULL)
>> +        rx_conf = &dev_info.default_rxconf;
>> +
>> +    local_conf = *rx_conf;
>> +
>> +    /*
>> +     * If an offloading has already been enabled in
>> +     * rte_eth_dev_configure(), it has been enabled on all queues,
>> +     * so there is no need to enable it in this queue again.
>> +     * The local_conf.offloads input to underlying PMD only carries
>> +     * those offloadings which are only enabled on this queue and
>> +     * not enabled on all queues.
>> +     */
>> +    local_conf.offloads &= ~dev->data->dev_conf.rxmode.offloads;
>> +
>> +    /*
>> +     * New added offloadings for this queue are those not enabled in
>> +     * rte_eth_dev_configure() and they must be per-queue type.
>> +     * A pure per-port offloading can't be enabled on a queue while
>> +     * disabled on another queue. A pure per-port offloading can't
>> +     * be enabled for any queue as new added one if it hasn't been
>> +     * enabled in rte_eth_dev_configure().
>> +     */
>> +    if ((local_conf.offloads & dev_info.rx_queue_offload_capa) !=
>> +         local_conf.offloads) {
>> +        RTE_ETHDEV_LOG(ERR,
>> +            "Ethdev port_id=%d rx_queue_id=%d, "
>> +            "new added offloads 0x%"PRIx64" must be "
>> +            "within per-queue offload capabilities "
>> +            "0x%"PRIx64" in %s()\n",
>> +            port_id, rx_queue_id, local_conf.offloads,
>> +            dev_info.rx_queue_offload_capa,
>> +            __func__);
>> +        return -EINVAL;
>> +    }
>> +
>> +    ret = (*dev->dev_ops->rx_hairpin_queue_setup)(dev, rx_queue_id,
>> +                              nb_rx_desc, socket_id,
>> +                              &local_conf,
>> +                              hairpin_conf);
>> +
>> +    return eth_err(port_id, ret);
>> +}
>> +
>> +int
>>   rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
>>                  uint16_t nb_tx_desc, unsigned int socket_id,
>>                  const struct rte_eth_txconf *tx_conf)
>> @@ -1799,6 +1908,110 @@ struct rte_eth_dev *
>>                  tx_queue_id, nb_tx_desc, socket_id, &local_conf));
>>   }
>>   +int
>> +rte_eth_tx_hairpin_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
>> +                   uint16_t nb_tx_desc, unsigned int socket_id,
>> +                   const struct rte_eth_txconf *tx_conf,
>> +                   const struct rte_eth_hairpin_conf *hairpin_conf)
>> +{
>> +    struct rte_eth_dev *dev;
>> +    struct rte_eth_dev_info dev_info;
>> +    struct rte_eth_txconf local_conf;
>> +    void **txq;
>> +
>> +    RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
>> +
>> +    dev = &rte_eth_devices[port_id];
>> +    if (tx_queue_id >= dev->data->nb_tx_queues) {
>> +        RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id);
>> +        return -EINVAL;
>> +    }
>> +
>> +    RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
>> +    RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_hairpin_queue_setup,
>> +                -ENOTSUP);
>> +
>> +    rte_eth_dev_info_get(port_id, &dev_info);
>> +
>> +    /* Use default specified by driver, if nb_tx_desc is zero */
>> +    if (nb_tx_desc == 0) {
>> +        nb_tx_desc = dev_info.default_txportconf.ring_size;
>> +        /* If driver default is zero, fall back on EAL default */
>> +        if (nb_tx_desc == 0)
>> +            nb_tx_desc = RTE_ETH_DEV_FALLBACK_TX_RINGSIZE;
>> +    }
>> +    if (nb_tx_desc > dev_info.tx_desc_lim.nb_max ||
>> +        nb_tx_desc < dev_info.tx_desc_lim.nb_min ||
>> +        nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) {
>> +        RTE_ETHDEV_LOG(ERR,
>> +                   "Invalid value for nb_tx_desc(=%hu), "
>> +                   "should be: <= %hu, >= %hu, and a product of "
>> +                   " %hu\n",
>> +                   nb_tx_desc, dev_info.tx_desc_lim.nb_max,
>> +                   dev_info.tx_desc_lim.nb_min,
>> +                   dev_info.tx_desc_lim.nb_align);
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (dev->data->dev_started &&
>> +        !(dev_info.dev_capa &
>> +          RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP))
>> +        return -EBUSY;
>> +
>> +    if (dev->data->dev_started &&
>> +        (dev->data->tx_queue_state[tx_queue_id] !=
>> +         RTE_ETH_QUEUE_STATE_STOPPED))
>> +        return -EBUSY;
>> +
>> +    txq = dev->data->tx_queues;
>> +    if (txq[tx_queue_id]) {
>> +        RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release,
>> +                    -ENOTSUP);
>> +        (*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]);
>> +        txq[tx_queue_id] = NULL;
>> +    }
>> +
>> +    if (tx_conf == NULL)
>> +        tx_conf = &dev_info.default_txconf;
>> +
>> +    local_conf = *tx_conf;
>> +
>> +    /*
>> +     * If an offloading has already been enabled in
>> +     * rte_eth_dev_configure(), it has been enabled on all queues,
>> +     * so there is no need to enable it in this queue again.
>> +     * The local_conf.offloads input to underlying PMD only carries
>> +     * those offloadings which are only enabled on this queue and
>> +     * not enabled on all queues.
>> +     */
>> +    local_conf.offloads &= ~dev->data->dev_conf.txmode.offloads;
>> +
>> +    /*
>> +     * New added offloadings for this queue are those not enabled in
>> +     * rte_eth_dev_configure() and they must be per-queue type.
>> +     * A pure per-port offloading can't be enabled on a queue while
>> +     * disabled on another queue. A pure per-port offloading can't
>> +     * be enabled for any queue as new added one if it hasn't been
>> +     * enabled in rte_eth_dev_configure().
>> +     */
>> +    if ((local_conf.offloads & dev_info.tx_queue_offload_capa) !=
>> +         local_conf.offloads) {
>> +        RTE_ETHDEV_LOG(ERR,
>> +                   "Ethdev port_id=%d tx_queue_id=%d, new added "
>> +                   "offloads 0x%"PRIx64" must be within "
>> +                   "per-queue offload capabilities 0x%"PRIx64" "
>> +                   "in %s()\n",
>> +                   port_id, tx_queue_id, local_conf.offloads,
>> +                   dev_info.tx_queue_offload_capa,
>> +                   __func__);
>> +        return -EINVAL;
>> +    }
>> +
>> +    return eth_err(port_id, (*dev->dev_ops->tx_hairpin_queue_setup)
>> +               (dev, tx_queue_id, nb_tx_desc, socket_id, &local_conf,
>> +            hairpin_conf));
>> +}
>> +
>>   void
>>   rte_eth_tx_buffer_drop_callback(struct rte_mbuf **pkts, uint16_t
>> unsent,
>>           void *userdata __rte_unused)
>> diff --git a/lib/librte_ethdev/rte_ethdev.h
>> b/lib/librte_ethdev/rte_ethdev.h
>> index 475dbda..b3b1597 100644
>> --- a/lib/librte_ethdev/rte_ethdev.h
>> +++ b/lib/librte_ethdev/rte_ethdev.h
>> @@ -803,6 +803,30 @@ struct rte_eth_txconf {
>>       uint64_t offloads;
>>   };
>>   +#define RTE_ETH_MAX_HAIRPIN_PEERS 32
>> +
>> +/**
>> + * @warning
>> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
>> notice
>> + *
>> + * A structure used to hold hairpin peer data.
>> + */
>> +struct rte_eth_hairpin_peer {
>> +    uint16_t port; /**< Peer port. */
>> +    uint16_t queue; /**< Peer queue. */
>> +};
>> +
>> +/**
>> + * @warning
>> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
>> notice
>> + *
>> + * A structure used to configure hairpin binding.
>> + */
>> +struct rte_eth_hairpin_conf {
>> +    uint16_t peer_n; /**< The number of peers. */
>> +    struct rte_eth_hairpin_peer peers[RTE_ETH_MAX_HAIRPIN_PEERS];
>> +};
>> +
>>   /**
>>    * A structure contains information about HW descriptor ring
>> limitations.
>>    */
>> @@ -1769,6 +1793,60 @@ int rte_eth_rx_queue_setup(uint16_t port_id,
>> uint16_t rx_queue_id,
>>           struct rte_mempool *mb_pool);
>>     /**
>> + * @warning
>> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
>> notice
>> + *
>> + * Allocate and set up a hairpin receive queue for an Ethernet device.
>> + *
>> + * The function set up the selected queue to be used in hairpin.
>> + *
>> + * @param port_id
>> + *   The port identifier of the Ethernet device.
>> + * @param rx_queue_id
>> + *   The index of the receive queue to set up.
>> + *   The value must be in the range [0, nb_rx_queue - 1] previously
>> supplied
>> + *   to rte_eth_dev_configure().
> 
> Is any Rx queue may be setup as hairpin queue?
> Can it be still used for regular traffic?
> 
>> + * @param nb_rx_desc
>> + *   The number of receive descriptors to allocate for the receive ring.
> 
> Does it still make sense for hairpin queue?
> 
>> + * @param socket_id
>> + *   The *socket_id* argument is the socket identifier in case of NUMA.
>> + *   The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for
>> + *   the DMA memory allocated for the receive descriptors of the ring.
> 
> Is it still required to be provided for hairpin Rx queue?
> 
>> + * @param rx_conf
>> + *   The pointer to the configuration data to be used for the receive
>> queue.
>> + *   NULL value is allowed, in which case default RX configuration
>> + *   will be used.
>> + *   The *rx_conf* structure contains an *rx_thresh* structure with
>> the values
>> + *   of the Prefetch, Host, and Write-Back threshold registers of the
>> receive
>> + *   ring.
>> + *   In addition it contains the hardware offloads features to
>> activate using
>> + *   the DEV_RX_OFFLOAD_* flags.
>> + *   If an offloading set in rx_conf->offloads
>> + *   hasn't been set in the input argument eth_conf->rxmode.offloads
>> + *   to rte_eth_dev_configure(), it is a new added offloading, it
>> must be
>> + *   per-queue type and it is enabled for the queue.
>> + *   No need to repeat any bit in rx_conf->offloads which has already
>> been
>> + *   enabled in rte_eth_dev_configure() at port level. An offloading
>> enabled
>> + *   at port level can't be disabled at queue level.
> 
> Which offloads still make sense in the case of hairpin Rx queue?
> What about threshhods, drop enable?
> 
>> + * @param hairpin_conf
>> + *   The pointer to the hairpin binding configuration.
>> + * @return
>> + *   - 0: Success, receive queue correctly set up.
>> + *   - -EINVAL: The size of network buffers which can be allocated
>> from the
>> + *      memory pool does not fit the various buffer sizes allowed by the
>> + *      device controller.
>> + *   - -ENOMEM: Unable to allocate the receive ring descriptors or to
>> + *      allocate network memory buffers from the memory pool when
>> + *      initializing receive descriptors.
>> + */
>> +__rte_experimental
>> +int rte_eth_rx_hairpin_queue_setup
>> +    (uint16_t port_id, uint16_t rx_queue_id,
>> +     uint16_t nb_rx_desc, unsigned int socket_id,
>> +     const struct rte_eth_rxconf *rx_conf,
>> +     const struct rte_eth_hairpin_conf *hairpin_conf);
>> +
>> +/**
>>    * Allocate and set up a transmit queue for an Ethernet device.
>>    *
>>    * @param port_id
>> @@ -1821,6 +1899,73 @@ int rte_eth_tx_queue_setup(uint16_t port_id,
>> uint16_t tx_queue_id,
>>           const struct rte_eth_txconf *tx_conf);
>>     /**
>> + * @warning
>> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
>> notice
>> + *
>> + * Allocate and set up a transmit hairpin queue for an Ethernet device.
>> + *
>> + * @param port_id
>> + *   The port identifier of the Ethernet device.
>> + * @param tx_queue_id
>> + *   The index of the transmit queue to set up.
>> + *   The value must be in the range [0, nb_tx_queue - 1] previously
>> supplied
>> + *   to rte_eth_dev_configure().
> 
> Is any Tx queue may be setup as hairpin queue?
> 
>> + * @param nb_tx_desc
>> + *   The number of transmit descriptors to allocate for the transmit
>> ring.
> 
> Is it really required for hairpin queue? Are min/max/align limits still
> the same?
> 
>> + * @param socket_id
>> + *   The *socket_id* argument is the socket identifier in case of NUMA.
>> + *   Its value can be *SOCKET_ID_ANY* if there is no NUMA constraint for
>> + *   the DMA memory allocated for the transmit descriptors of the ring.
> 
> Does it still make sense for Tx hairpin queue?
> 
>> + * @param tx_conf
>> + *   The pointer to the configuration data to be used for the
>> transmit queue.
>> + *   NULL value is allowed, in which case default RX configuration
>> + *   will be used.
>> + *   The *tx_conf* structure contains the following data:
>> + *   - The *tx_thresh* structure with the values of the Prefetch,
>> Host, and
>> + *     Write-Back threshold registers of the transmit ring.
>> + *     When setting Write-Back threshold to the value greater then zero,
>> + *     *tx_rs_thresh* value should be explicitly set to one.
>> + *   - The *tx_free_thresh* value indicates the [minimum] number of
>> network
>> + *     buffers that must be pending in the transmit ring to trigger
>> their
>> + *     [implicit] freeing by the driver transmit function.
>> + *   - The *tx_rs_thresh* value indicates the [minimum] number of
>> transmit
>> + *     descriptors that must be pending in the transmit ring before
>> setting the
>> + *     RS bit on a descriptor by the driver transmit function.
>> + *     The *tx_rs_thresh* value should be less or equal then
>> + *     *tx_free_thresh* value, and both of them should be less then
>> + *     *nb_tx_desc* - 3.
> 
> I'm not sure that everything above makes sense for hairpin Tx queue.
> 
>> + *   - The *txq_flags* member contains flags to pass to the TX queue
>> setup
>> + *     function to configure the behavior of the TX queue. This
>> should be set
>> + *     to 0 if no special configuration is required.
>> + *     This API is obsolete and will be deprecated. Applications
>> + *     should set it to ETH_TXQ_FLAGS_IGNORE and use
>> + *     the offloads field below.
> 
> There is no txq_flags for a long time already. So, I'm wondering when it
> was
> copies from rte_eth_tx_queue_setup().
> 
>> + *   - The *offloads* member contains Tx offloads to be enabled.
>> + *     If an offloading set in tx_conf->offloads
>> + *     hasn't been set in the input argument eth_conf->txmode.offloads
>> + *     to rte_eth_dev_configure(), it is a new added offloading, it
>> must be
>> + *     per-queue type and it is enabled for the queue.
>> + *     No need to repeat any bit in tx_conf->offloads which has
>> already been
>> + *     enabled in rte_eth_dev_configure() at port level. An
>> offloading enabled
>> + *     at port level can't be disabled at queue level.
> 
> Which offloads do really make sense and valid to use for hairpin Tx queues?
> Do we need separate caps for hairpin offloads?
> 
>> + *
>> + *     Note that setting *tx_free_thresh* or *tx_rs_thresh* value to
>> 0 forces
>> + *     the transmit function to use default values.
>> + * @param hairpin_conf
>> + *   The hairpin binding configuration.
>> + *
>> + * @return
>> + *   - 0: Success, the transmit queue is correctly set up.
>> + *   - -ENOMEM: Unable to allocate the transmit ring descriptors.
>> + */
>> +__rte_experimental
>> +int rte_eth_tx_hairpin_queue_setup
>> +    (uint16_t port_id, uint16_t tx_queue_id,
>> +     uint16_t nb_tx_desc, unsigned int socket_id,
>> +     const struct rte_eth_txconf *tx_conf,
>> +     const struct rte_eth_hairpin_conf *hairpin_conf);
>> +
>> +/**
>>    * Return the NUMA socket to which an Ethernet device is connected
>>    *
>>    * @param port_id
>>
> 
> [snip]
> 

Reply via email to