On Tue, Sep 9, 2014 at 5:00 PM, Alex Wang <al...@nicira.com> wrote:
> Before this commit, ovs creates one tx and one rx queue for
> each dpdk interface and uses only one poll thread for handling
> I/O of all dpdk interfaces.  An upcoming patch will allow multiple
> poll threads be created.  As a preparation, this commit changes
> the default to create multiple tx and rx queues.
>
> Specifically, the default number of rx queues will be the number
> of dpdk interfaces on the numa node.  And the upcoming work
> will assign each rx queue to a different poll thread.  The default
> number of tx queues will be the number of cpu cores on the machine.
> Although not all the tx queues will be used, each poll thread will
> have its own queue for transmission on the dpdk interface.
>
I thought we had decided to create one rx queue for each core on local
numa node. Is there problem with this ?
creating one rx-queue for each core is more predictable than number of
device on the switch at given point.

> Signed-off-by: Alex Wang <al...@nicira.com>
>
> ---
> PATCH -> V2
> - rebase and refactor the code.
>
> V2 -> V3:
> - rebase.
> ---
>  lib/dpif-netdev.h |    1 -
>  lib/netdev-dpdk.c |   54 
> ++++++++++++++++++++++++++++++++++++++++++++---------
>  2 files changed, 45 insertions(+), 10 deletions(-)
>
> diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h
> index fb9d0e2..adbbf87 100644
> --- a/lib/dpif-netdev.h
> +++ b/lib/dpif-netdev.h
> @@ -42,7 +42,6 @@ static inline void dp_packet_pad(struct ofpbuf *b)
>
>  #define NETDEV_QID_NONE INT_MAX
>
> -#define NR_QUEUE   1
>  #define NR_PMD_THREADS 1
>
>  #ifdef  __cplusplus
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 4f9c5c2..c7bc4c5 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -36,6 +36,7 @@
>  #include "odp-util.h"
>  #include "ofp-print.h"
>  #include "ofpbuf.h"
> +#include "ovs-numa.h"
>  #include "ovs-thread.h"
>  #include "ovs-rcu.h"
>  #include "packet-dpif.h"
> @@ -154,6 +155,7 @@ struct dpdk_mp {
>      struct list list_node OVS_GUARDED_BY(dpdk_mutex);
>  };
>
> +/* There will one 'struct dpdk_tx_queue' created for each cpu core.*/
>  struct dpdk_tx_queue {
>      rte_spinlock_t tx_lock;
>      int count;
> @@ -182,7 +184,7 @@ struct netdev_dpdk {
>      int port_id;
>      int max_packet_len;
>
> -    struct dpdk_tx_queue tx_q[NR_QUEUE];
> +    struct dpdk_tx_queue *tx_q;
>
>      struct ovs_mutex mutex OVS_ACQ_AFTER(dpdk_mutex);
>
> @@ -387,6 +389,25 @@ dpdk_watchdog(void *dummy OVS_UNUSED)
>      return NULL;
>  }
>
> +/* Returns the number of dpdk ifaces on the numa node 'numa_id'. */
> +static int
> +dpdk_get_n_devs(int numa_id)
> +{
> +    int count = 0;
> +    int i;
> +
> +    ovs_assert(ovs_numa_numa_id_is_valid(numa_id));
> +
> +    for (i = 0; i < rte_eth_dev_count(); i++) {
> +        if (rte_eth_dev_socket_id(i) == numa_id) {
> +            count++;
> +        }
> +    }
> +    ovs_assert(count);
> +
> +    return count;
> +}
> +
>  static int
>  dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex)
>  {
> @@ -399,13 +420,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) 
> OVS_REQUIRES(dpdk_mutex)
>          return ENODEV;
>      }
>
> -    diag = rte_eth_dev_configure(dev->port_id, NR_QUEUE, NR_QUEUE,  
> &port_conf);
> +    diag = rte_eth_dev_configure(dev->port_id, dev->up.n_rxq, dev->up.n_txq,
> +                                 &port_conf);
>      if (diag) {
>          VLOG_ERR("eth dev config error %d",diag);
>          return -diag;
>      }
>
> -    for (i = 0; i < NR_QUEUE; i++) {
> +    for (i = 0; i < dev->up.n_txq; i++) {
>          diag = rte_eth_tx_queue_setup(dev->port_id, i, NIC_PORT_TX_Q_SIZE,
>                                        dev->socket_id, &tx_conf);
>          if (diag) {
> @@ -414,7 +436,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) 
> OVS_REQUIRES(dpdk_mutex)
>          }
>      }
>
> -    for (i = 0; i < NR_QUEUE; i++) {
> +    for (i = 0; i < dev->up.n_rxq; i++) {
>          diag = rte_eth_rx_queue_setup(dev->port_id, i, NIC_PORT_RX_Q_SIZE,
>                                        dev->socket_id,
>                                        &rx_conf, dev->dpdk_mp->mp);
> @@ -466,15 +488,27 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
> port_no) OVS_REQUIRES(dpdk
>  {
>      struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
>      int err = 0;
> -    int i;
> +    int n_cores, i;
>
>      ovs_mutex_init(&netdev->mutex);
>
>      ovs_mutex_lock(&netdev->mutex);
>
> -    for (i = 0; i < NR_QUEUE; i++) {
> +    /* There can only be ovs_numa_get_n_cores() pmd threads, so creates a 
> tx_q
> +     * for each of them. */
> +    n_cores = ovs_numa_get_n_cores();
> +    if (n_cores == OVS_CORE_UNSPEC) {
> +        VLOG_WARN_RL(&rl, "netdev_dpdk txq creation failed due to no cpu"
> +                     " core info");
> +        err = ENOENT;
> +        goto unlock;
> +    }
> +    netdev->tx_q = dpdk_rte_mzalloc(n_cores * sizeof *netdev->tx_q);
> +    for (i = 0; i < n_cores; i++) {
>          rte_spinlock_init(&netdev->tx_q[i].tx_lock);
>      }
> +    netdev_->n_txq = n_cores;
> +    netdev_->n_rxq = dpdk_get_n_devs(netdev->socket_id);
>

Rather than calculating n_tx_q and n_rx_q, these values should be
calculated by dpif-netdev and passed down to netdev implementation.


>      netdev->port_id = port_no;
>
> @@ -495,12 +529,13 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
> port_no) OVS_REQUIRES(dpdk
>      if (err) {
>          goto unlock;
>      }
> -    netdev_->n_txq = NR_QUEUE;
> -    netdev_->n_rxq = NR_QUEUE;
>
>      list_push_back(&dpdk_list, &netdev->list_node);
>
>  unlock:
> +    if (err) {
> +        rte_free(netdev->tx_q);
> +    }
>      ovs_mutex_unlock(&netdev->mutex);
>      return err;
>  }
> @@ -552,6 +587,7 @@ netdev_dpdk_destruct(struct netdev *netdev_)
>      ovs_mutex_unlock(&dev->mutex);
>
>      ovs_mutex_lock(&dpdk_mutex);
> +    rte_free(dev->tx_q);
>      list_remove(&dev->list_node);
>      dpdk_mp_put(dev->dpdk_mp);
>      ovs_mutex_unlock(&dpdk_mutex);
> @@ -812,7 +848,7 @@ netdev_dpdk_send(struct netdev *netdev, int qid, struct 
> dpif_packet **pkts,
>          int next_tx_idx = 0;
>          int dropped = 0;
>
> -        qid = rte_lcore_id() % NR_QUEUE;
> +        qid = rte_lcore_id();
>
>          for (i = 0; i < cnt; i++) {
>              int size = ofpbuf_size(&pkts[i]->ofpbuf);
> --
> 1.7.9.5
>
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to