Before this commit, ovs only creates one tx/rx queue for each dpdk interface and uses only one poll thread for handling the I/O of all dpdk interfaces. As one step toward using multiple poll threads, this commit makes ovs, by default, create same number of rx queues as the number dpdk interfaces on the cpu socket. Also each dpdk interface will have one tx queue for each cpu core, even though not all of those queues will be used.
Signed-off-by: Alex Wang <al...@nicira.com> --- lib/dpif-netdev.h | 1 - lib/netdev-dpdk.c | 55 +++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h index 410fcfa..50c1198 100644 --- a/lib/dpif-netdev.h +++ b/lib/dpif-netdev.h @@ -40,7 +40,6 @@ static inline void dp_packet_pad(struct ofpbuf *b) } } -#define NR_QUEUE 1 #define NR_PMD_THREADS 1 #ifdef __cplusplus diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 109006f..432524f 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -36,6 +36,7 @@ #include "odp-util.h" #include "ofp-print.h" #include "ofpbuf.h" +#include "ovs-numa.h" #include "ovs-thread.h" #include "ovs-rcu.h" #include "packet-dpif.h" @@ -179,7 +180,9 @@ struct netdev_dpdk { int port_id; int max_packet_len; - struct dpdk_tx_queue tx_q[NR_QUEUE]; + struct dpdk_tx_queue *tx_q; + int n_tx_q; + int n_rx_q; struct ovs_mutex mutex OVS_ACQ_AFTER(dpdk_mutex); @@ -384,6 +387,25 @@ dpdk_watchdog(void *dummy OVS_UNUSED) return NULL; } +/* Returns the number of dpdk ifaces on the cpu socket. */ +static int +dpdk_get_n_devs(int socket_id) +{ + int count = 0; + int i; + + ovs_assert(ovs_numa_cpu_socket_id_is_valid(socket_id)); + + for (i = 0; i < rte_eth_dev_count(); i++) { + if (rte_eth_dev_socket_id(i) == socket_id) { + count++; + } + } + ovs_assert(count); + + return count; +} + static int dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) { @@ -396,13 +418,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) return ENODEV; } - diag = rte_eth_dev_configure(dev->port_id, NR_QUEUE, NR_QUEUE, &port_conf); + diag = rte_eth_dev_configure(dev->port_id, dev->n_rx_q, dev->n_tx_q, + &port_conf); if (diag) { VLOG_ERR("eth dev config error %d",diag); return -diag; } - for (i = 0; i < NR_QUEUE; i++) { + for (i = 0; i < dev->n_tx_q; i++) { diag = rte_eth_tx_queue_setup(dev->port_id, i, NIC_PORT_TX_Q_SIZE, dev->socket_id, &tx_conf); if (diag) { @@ -411,7 +434,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) } } - for (i = 0; i < NR_QUEUE; i++) { + for (i = 0; i < dev->n_rx_q; i++) { diag = rte_eth_rx_queue_setup(dev->port_id, i, NIC_PORT_RX_Q_SIZE, dev->socket_id, &rx_conf, dev->dpdk_mp->mp); @@ -463,13 +486,25 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk { struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); int err = 0; - int i; + int n_cores, i; ovs_mutex_init(&netdev->mutex); ovs_mutex_lock(&netdev->mutex); - for (i = 0; i < NR_QUEUE; i++) { + netdev->n_rx_q = dpdk_get_n_devs(netdev->socket_id); + + /* There can only be ovs_numa_get_n_cores() pmd threads, so creates a tx_q + * for each of them. */ + n_cores = ovs_numa_get_n_cores(); + if (n_cores == OVS_CORE_UNSPEC) { + VLOG_WARN_RL(&rl, "netdev_dpdk init failed due to no cpu core info"); + err = ENOENT; + goto unlock; + } + netdev->n_tx_q = n_cores; + netdev->tx_q = dpdk_rte_mzalloc(netdev->n_tx_q * sizeof *netdev->tx_q); + for (i = 0; i < netdev->n_tx_q; i++) { rte_spinlock_init(&netdev->tx_q[i].tx_lock); } @@ -492,11 +527,14 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk if (err) { goto unlock; } - netdev_->n_rxq = NR_QUEUE; + netdev_->n_rxq = netdev->n_rx_q; list_push_back(&dpdk_list, &netdev->list_node); unlock: + if (err) { + rte_free(netdev->tx_q); + } ovs_mutex_unlock(&netdev->mutex); return err; } @@ -548,6 +586,7 @@ netdev_dpdk_destruct(struct netdev *netdev_) ovs_mutex_unlock(&dev->mutex); ovs_mutex_lock(&dpdk_mutex); + rte_free(dev->tx_q); list_remove(&dev->list_node); dpdk_mp_put(dev->dpdk_mp); ovs_mutex_unlock(&dpdk_mutex); @@ -786,7 +825,7 @@ netdev_dpdk_send(struct netdev *netdev, struct dpif_packet **pkts, int cnt, int next_tx_idx = 0; int dropped = 0; - qid = rte_lcore_id() % NR_QUEUE; + qid = rte_lcore_id(); for (i = 0; i < cnt; i++) { int size = ofpbuf_size(&pkts[i]->ofpbuf); -- 1.7.9.5 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev