On Tue, Sep 9, 2014 at 5:00 PM, Alex Wang <al...@nicira.com> wrote: > Before this commit, ovs creates one tx and one rx queue for > each dpdk interface and uses only one poll thread for handling > I/O of all dpdk interfaces. An upcoming patch will allow multiple > poll threads be created. As a preparation, this commit changes > the default to create multiple tx and rx queues. > > Specifically, the default number of rx queues will be the number > of dpdk interfaces on the numa node. And the upcoming work > will assign each rx queue to a different poll thread. The default > number of tx queues will be the number of cpu cores on the machine. > Although not all the tx queues will be used, each poll thread will > have its own queue for transmission on the dpdk interface. > I thought we had decided to create one rx queue for each core on local numa node. Is there problem with this ? creating one rx-queue for each core is more predictable than number of device on the switch at given point.
> Signed-off-by: Alex Wang <al...@nicira.com> > > --- > PATCH -> V2 > - rebase and refactor the code. > > V2 -> V3: > - rebase. > --- > lib/dpif-netdev.h | 1 - > lib/netdev-dpdk.c | 54 > ++++++++++++++++++++++++++++++++++++++++++++--------- > 2 files changed, 45 insertions(+), 10 deletions(-) > > diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h > index fb9d0e2..adbbf87 100644 > --- a/lib/dpif-netdev.h > +++ b/lib/dpif-netdev.h > @@ -42,7 +42,6 @@ static inline void dp_packet_pad(struct ofpbuf *b) > > #define NETDEV_QID_NONE INT_MAX > > -#define NR_QUEUE 1 > #define NR_PMD_THREADS 1 > > #ifdef __cplusplus > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 4f9c5c2..c7bc4c5 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -36,6 +36,7 @@ > #include "odp-util.h" > #include "ofp-print.h" > #include "ofpbuf.h" > +#include "ovs-numa.h" > #include "ovs-thread.h" > #include "ovs-rcu.h" > #include "packet-dpif.h" > @@ -154,6 +155,7 @@ struct dpdk_mp { > struct list list_node OVS_GUARDED_BY(dpdk_mutex); > }; > > +/* There will one 'struct dpdk_tx_queue' created for each cpu core.*/ > struct dpdk_tx_queue { > rte_spinlock_t tx_lock; > int count; > @@ -182,7 +184,7 @@ struct netdev_dpdk { > int port_id; > int max_packet_len; > > - struct dpdk_tx_queue tx_q[NR_QUEUE]; > + struct dpdk_tx_queue *tx_q; > > struct ovs_mutex mutex OVS_ACQ_AFTER(dpdk_mutex); > > @@ -387,6 +389,25 @@ dpdk_watchdog(void *dummy OVS_UNUSED) > return NULL; > } > > +/* Returns the number of dpdk ifaces on the numa node 'numa_id'. */ > +static int > +dpdk_get_n_devs(int numa_id) > +{ > + int count = 0; > + int i; > + > + ovs_assert(ovs_numa_numa_id_is_valid(numa_id)); > + > + for (i = 0; i < rte_eth_dev_count(); i++) { > + if (rte_eth_dev_socket_id(i) == numa_id) { > + count++; > + } > + } > + ovs_assert(count); > + > + return count; > +} > + > static int > dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) > { > @@ -399,13 +420,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) > OVS_REQUIRES(dpdk_mutex) > return ENODEV; > } > > - diag = rte_eth_dev_configure(dev->port_id, NR_QUEUE, NR_QUEUE, > &port_conf); > + diag = rte_eth_dev_configure(dev->port_id, dev->up.n_rxq, dev->up.n_txq, > + &port_conf); > if (diag) { > VLOG_ERR("eth dev config error %d",diag); > return -diag; > } > > - for (i = 0; i < NR_QUEUE; i++) { > + for (i = 0; i < dev->up.n_txq; i++) { > diag = rte_eth_tx_queue_setup(dev->port_id, i, NIC_PORT_TX_Q_SIZE, > dev->socket_id, &tx_conf); > if (diag) { > @@ -414,7 +436,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) > OVS_REQUIRES(dpdk_mutex) > } > } > > - for (i = 0; i < NR_QUEUE; i++) { > + for (i = 0; i < dev->up.n_rxq; i++) { > diag = rte_eth_rx_queue_setup(dev->port_id, i, NIC_PORT_RX_Q_SIZE, > dev->socket_id, > &rx_conf, dev->dpdk_mp->mp); > @@ -466,15 +488,27 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int > port_no) OVS_REQUIRES(dpdk > { > struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); > int err = 0; > - int i; > + int n_cores, i; > > ovs_mutex_init(&netdev->mutex); > > ovs_mutex_lock(&netdev->mutex); > > - for (i = 0; i < NR_QUEUE; i++) { > + /* There can only be ovs_numa_get_n_cores() pmd threads, so creates a > tx_q > + * for each of them. */ > + n_cores = ovs_numa_get_n_cores(); > + if (n_cores == OVS_CORE_UNSPEC) { > + VLOG_WARN_RL(&rl, "netdev_dpdk txq creation failed due to no cpu" > + " core info"); > + err = ENOENT; > + goto unlock; > + } > + netdev->tx_q = dpdk_rte_mzalloc(n_cores * sizeof *netdev->tx_q); > + for (i = 0; i < n_cores; i++) { > rte_spinlock_init(&netdev->tx_q[i].tx_lock); > } > + netdev_->n_txq = n_cores; > + netdev_->n_rxq = dpdk_get_n_devs(netdev->socket_id); > Rather than calculating n_tx_q and n_rx_q, these values should be calculated by dpif-netdev and passed down to netdev implementation. > netdev->port_id = port_no; > > @@ -495,12 +529,13 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int > port_no) OVS_REQUIRES(dpdk > if (err) { > goto unlock; > } > - netdev_->n_txq = NR_QUEUE; > - netdev_->n_rxq = NR_QUEUE; > > list_push_back(&dpdk_list, &netdev->list_node); > > unlock: > + if (err) { > + rte_free(netdev->tx_q); > + } > ovs_mutex_unlock(&netdev->mutex); > return err; > } > @@ -552,6 +587,7 @@ netdev_dpdk_destruct(struct netdev *netdev_) > ovs_mutex_unlock(&dev->mutex); > > ovs_mutex_lock(&dpdk_mutex); > + rte_free(dev->tx_q); > list_remove(&dev->list_node); > dpdk_mp_put(dev->dpdk_mp); > ovs_mutex_unlock(&dpdk_mutex); > @@ -812,7 +848,7 @@ netdev_dpdk_send(struct netdev *netdev, int qid, struct > dpif_packet **pkts, > int next_tx_idx = 0; > int dropped = 0; > > - qid = rte_lcore_id() % NR_QUEUE; > + qid = rte_lcore_id(); > > for (i = 0; i < cnt; i++) { > int size = ofpbuf_size(&pkts[i]->ofpbuf); > -- > 1.7.9.5 > _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev