Sorry about the long wait. I think this is the right direction, I like the idea of having a thread local map of the txqs. It will allow us to assign the txqs in a more sophisticated way.
I see some unit tests failing with this patch applied. Could you try running the testsuite with 'make check'? The txq distribution code is run only when a pmd device is added. If a "system" or a "dummy" device is added its txq will not be used by any pmd thread (or to the main thread). Two more comments below. On 06/08/2015 15:32, "Ilya Maximets" <i.maxim...@samsung.com> wrote: >Currently tx_qid is equal to pmd->core_id. This leads to unexpected >behavior if pmd-cpu-mask different from '/(0*)(1|3|7)?(f*)/', >e.g. if core_ids are not sequential, or doesn't start from 0, or both. > >Example: > starting 2 pmd threads with 1 port, 2 rxqs per port, > pmd-cpu-mask = 00000014 and let dev->real_n_txq = 2 > > It that case pmd_1->tx_qid = 2, pmd_2->tx_qid = 4 and > txq_needs_locking = true (if device hasn't ovs_numa_get_n_cores()+1 > queues). > > In that case, after truncating in netdev_dpdk_send__(): > 'qid = qid % dev->real_n_txq;' > pmd_1: qid = 2 % 2 = 0 > pmd_2: qid = 4 % 2 = 0 > > So, both threads will call dpdk_queue_pkts() with same qid = 0. > This is unexpected behavior if there is 2 tx queues in device. > Queue #1 will not be used and both threads will lock queue #0 > on each send. > >Fix that by introducing per pmd thread hash map 'tx_queues', where will >be stored all available tx queues for that pmd thread with >port_no as a key(hash). All tx_qid-s will be unique per port and >sequential to prevent described unexpected mapping after truncating. > >Implemented infrastructure can be used in the future to choose >between all tx queues available for that pmd thread. > >Signed-off-by: Ilya Maximets <i.maxim...@samsung.com> >--- > lib/dpif-netdev.c | 110 >++++++++++++++++++++++++++++++++++++++++++++---------- > 1 file changed, 91 insertions(+), 19 deletions(-) > >diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >index c144352..309994c 100644 >--- a/lib/dpif-netdev.c >+++ b/lib/dpif-netdev.c >@@ -365,6 +365,13 @@ struct dp_netdev_pmd_cycles { > atomic_ullong n[PMD_N_CYCLES]; > }; > >+struct dp_netdev_pmd_txq { >+ struct cmap_node node; /* In owning dp_netdev_pmd_thread's */ >+ /* 'tx_queues'. */ >+ struct dp_netdev_port *port; >+ int tx_qid; >+}; >+ > /* PMD: Poll modes drivers. PMD accesses devices via polling to >eliminate > * the performance overhead of interrupt processing. Therefore netdev >can > * not implement rx-wait for these devices. dpif-netdev needs to poll >@@ -420,8 +427,8 @@ struct dp_netdev_pmd_thread { > /* threads on same numa node. */ > unsigned core_id; /* CPU core id of this pmd thread. */ > int numa_id; /* numa node id of this pmd thread. >*/ >- int tx_qid; /* Queue id used by this pmd thread >to >- * send packets on all netdevs */ >+ struct cmap tx_queues; /* Queue ids used by this pmd thread >to >+ * send packets to ports */ > > /* Only a pmd thread can write on its own 'cycles' and 'stats'. > * The main thread keeps 'stats_zero' and 'cycles_zero' as base >@@ -2602,6 +2609,46 @@ dpif_netdev_wait(struct dpif *dpif) > seq_wait(tnl_conf_seq, dp->last_tnl_conf_seq); > } > >+static void >+pmd_flush_tx_queues(struct dp_netdev_pmd_thread *pmd) >+{ >+ struct dp_netdev_pmd_txq *txq; >+ >+ CMAP_FOR_EACH (txq, node, &pmd->tx_queues) { >+ cmap_remove(&pmd->tx_queues, &txq->node, >+ hash_port_no(txq->port->port_no)); >+ port_unref(txq->port); >+ free(txq); >+ } >+} The term "flush" used for txqs makes me think that the packets in the device transmission queue will be sent. >+ >+static void OVS_UNUSED >+pmd_tx_queues_print(struct dp_netdev_pmd_thread *pmd) >+{ >+ struct dp_netdev_pmd_txq *txq; >+ >+ CMAP_FOR_EACH (txq, node, &pmd->tx_queues) { >+ VLOG_INFO("Core_id: %d, Port: %s, tx_qid: %d\n", >+ pmd->core_id, netdev_get_name(txq->port->netdev), >+ txq->tx_qid); >+ } >+} >+ >+static struct dp_netdev_pmd_txq * >+pmd_lookup_txq(const struct dp_netdev_pmd_thread *pmd, >+ const struct dp_netdev_port *port) >+{ >+ struct dp_netdev_pmd_txq *txq; >+ >+ CMAP_FOR_EACH_WITH_HASH (txq, node, hash_port_no(port->port_no), >+ &pmd->tx_queues) { >+ if (txq->port == port) { >+ return txq; >+ } >+ } >+ return NULL; >+} >+ > struct rxq_poll { > struct dp_netdev_port *port; > struct netdev_rxq *rx; >@@ -2613,16 +2660,20 @@ pmd_load_queues(struct dp_netdev_pmd_thread *pmd, > { > struct rxq_poll *poll_list = *ppoll_list; > struct dp_netdev_port *port; >- int n_pmds_on_numa, index, i; >+ struct dp_netdev_pmd_txq *txq; >+ int n_pmds_on_numa, rx_index, tx_index, i; > > /* Simple scheduler for netdev rx polling. */ >+ pmd_flush_tx_queues(pmd); >+ > for (i = 0; i < poll_cnt; i++) { > port_unref(poll_list[i].port); > } > > poll_cnt = 0; > n_pmds_on_numa = get_n_pmd_threads_on_numa(pmd->dp, pmd->numa_id); >- index = 0; >+ rx_index = 0; >+ tx_index = 0; > > CMAP_FOR_EACH (port, node, &pmd->dp->ports) { > /* Calls port_try_ref() to prevent the main thread >@@ -2633,7 +2684,7 @@ pmd_load_queues(struct dp_netdev_pmd_thread *pmd, > int i; > > for (i = 0; i < netdev_n_rxq(port->netdev); i++) { >- if ((index % n_pmds_on_numa) == pmd->index) { >+ if ((rx_index % n_pmds_on_numa) == pmd->index) { > poll_list = xrealloc(poll_list, > sizeof *poll_list * (poll_cnt + >1)); > >@@ -2642,7 +2693,20 @@ pmd_load_queues(struct dp_netdev_pmd_thread *pmd, > poll_list[poll_cnt].rx = port->rxq[i]; > poll_cnt++; > } >- index++; >+ rx_index++; >+ } >+ >+ /* Last queue reserved for non pmd threads */ >+ for (i = 0; i < netdev_n_txq(port->netdev) - 1; i++) { >+ if ((tx_index % n_pmds_on_numa) == pmd->index) { >+ txq = xmalloc(sizeof *txq); >+ port_ref(port); >+ txq->port = port; >+ txq->tx_qid = i; >+ cmap_insert(&pmd->tx_queues, &txq->node, >+ hash_port_no(port->port_no)); >+ } >+ tx_index++; > } > } > /* Unrefs the port_try_ref(). */ >@@ -2712,6 +2776,8 @@ reload: > goto reload; > } > >+ pmd_flush_tx_queues(pmd); >+ > for (i = 0; i < poll_cnt; i++) { > port_unref(poll_list[i].port); > } >@@ -2825,16 +2891,6 @@ dp_netdev_pmd_get_next(struct dp_netdev *dp, >struct cmap_position *pos) > return next; > } > >-static int >-core_id_to_qid(unsigned core_id) >-{ >- if (core_id != NON_PMD_CORE_ID) { >- return core_id; >- } else { >- return ovs_numa_get_n_cores(); >- } >-} >- > /* Configures the 'pmd' based on the input argument. */ > static void > dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct >dp_netdev *dp, >@@ -2843,7 +2899,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread >*pmd, struct dp_netdev *dp, > pmd->dp = dp; > pmd->index = index; > pmd->core_id = core_id; >- pmd->tx_qid = core_id_to_qid(core_id); > pmd->numa_id = numa_id; > > ovs_refcount_init(&pmd->ref_cnt); >@@ -2854,9 +2909,20 @@ dp_netdev_configure_pmd(struct >dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, > ovs_mutex_init(&pmd->flow_mutex); > dpcls_init(&pmd->cls); > cmap_init(&pmd->flow_table); >- /* init the 'flow_cache' since there is no >+ cmap_init(&pmd->tx_queues); >+ /* init the 'flow_cache' and 'tx_queues' since there is no > * actual thread created for NON_PMD_CORE_ID. */ > if (core_id == NON_PMD_CORE_ID) { >+ struct dp_netdev_port *port; >+ CMAP_FOR_EACH (port, node, &pmd->dp->ports) { >+ if (port_try_ref(port)) { >+ struct dp_netdev_pmd_txq * txq = xmalloc(sizeof *txq); >+ txq->port = port; >+ txq->tx_qid = ovs_numa_get_n_cores(); >+ cmap_insert(&pmd->tx_queues, &txq->node, >+ hash_port_no(port->port_no)); >+ } >+ } > emc_cache_init(&pmd->flow_cache); > } > cmap_insert(&dp->poll_threads, CONST_CAST(struct cmap_node *, >&pmd->node), >@@ -2869,6 +2935,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread >*pmd) > dp_netdev_pmd_flow_flush(pmd); > dpcls_destroy(&pmd->cls); > cmap_destroy(&pmd->flow_table); >+ cmap_destroy(&pmd->tx_queues); > ovs_mutex_destroy(&pmd->flow_mutex); > latch_destroy(&pmd->exit_latch); > xpthread_cond_destroy(&pmd->cond); >@@ -2885,6 +2952,7 @@ dp_netdev_del_pmd(struct dp_netdev_pmd_thread *pmd) > * no actual thread uninit it for NON_PMD_CORE_ID. */ > if (pmd->core_id == NON_PMD_CORE_ID) { > emc_cache_uninit(&pmd->flow_cache); >+ pmd_flush_tx_queues(pmd); > } else { > latch_set(&pmd->exit_latch); > dp_netdev_reload_pmd__(pmd); >@@ -3480,13 +3548,17 @@ dp_execute_cb(void *aux_, struct dp_packet >**packets, int cnt, > struct dp_netdev *dp = pmd->dp; > int type = nl_attr_type(a); > struct dp_netdev_port *p; >+ struct dp_netdev_pmd_txq *txq; > int i; > > switch ((enum ovs_action_attr)type) { > case OVS_ACTION_ATTR_OUTPUT: > p = dp_netdev_lookup_port(dp, u32_to_odp(nl_attr_get_u32(a))); > if (OVS_LIKELY(p)) { >- netdev_send(p->netdev, pmd->tx_qid, packets, cnt, may_steal); >+ txq = pmd_lookup_txq(pmd, p); >+ if (OVS_UNLIKELY(!txq)) >+ break; >+ netdev_send(p->netdev, txq->tx_qid, packets, cnt, may_steal); I think that performing two lookups (for the port and for the txq) is more expensive than necessary. One solution could be to include a pointer to the netdev in the 'txq'. > return; > } > break; >-- >2.1.4 > _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev