Thanks for review. I'll prepare new version.
Best regards, Ilya Maximets. On 26.01.2016 03:21, Daniele Di Proietto wrote: > Hi, > > Thanks again for the patch, > > As I said in the discussion of v2, I think we should keep the > condition variable, so that we can return from the datapath > functions when the port operations are effectively completed. > > I also have a couple of minor coding style suggestions (inline). > > > Other than this the patch looks good to me > > Thanks, > > Daniele > > > On 25/01/2016 07:00, "Ilya Maximets" <i.maxim...@samsung.com> wrote: > >> Current rx queue management model is buggy and will not work properly >> without additional barriers and other syncronization between PMD >> threads and main thread. >> >> Known BUGS of current model: >> * While reloading, two PMD threads, one already reloaded and >> one not yet reloaded, can poll same queue of the same port. >> This behavior may lead to dpdk driver failure, because they >> are not thread-safe. >> * Same bug as fixed in commit e4e74c3a2b >> ("dpif-netdev: Purge all ukeys when reconfigure pmd.") but >> reproduced while only reconfiguring of pmd threads without >> restarting, because addition may change the sequence of >> other ports, which is important in time of reconfiguration. >> >> Introducing the new model, where distribution of queues made by main >> thread with minimal synchronizations and without data races between >> pmd threads. Also, this model should work faster, because only >> needed threads will be interrupted for reconfiguraition and total >> computational complexity of reconfiguration is less. >> >> Signed-off-by: Ilya Maximets <i.maxim...@samsung.com> >> --- >> lib/dpif-netdev.c | 245 >> ++++++++++++++++++++++++++++++++---------------------- >> 1 file changed, 147 insertions(+), 98 deletions(-) >> >> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >> index cd72e62..f055c6e 100644 >> --- a/lib/dpif-netdev.c >> +++ b/lib/dpif-netdev.c >> @@ -372,6 +372,13 @@ struct dp_netdev_pmd_cycles { >> atomic_ullong n[PMD_N_CYCLES]; >> }; >> >> +/* Contained by struct dp_netdev_pmd_thread's 'poll_list' member. */ >> +struct rxq_poll { >> + struct dp_netdev_port *port; >> + struct netdev_rxq *rx; >> + struct ovs_list node; >> +}; >> + >> /* PMD: Poll modes drivers. PMD accesses devices via polling to >> eliminate >> * the performance overhead of interrupt processing. Therefore netdev >> can >> * not implement rx-wait for these devices. dpif-netdev needs to poll >> @@ -393,9 +400,6 @@ struct dp_netdev_pmd_thread { >> struct ovs_refcount ref_cnt; /* Every reference must be >> refcount'ed. */ >> struct cmap_node node; /* In 'dp->poll_threads'. */ >> >> - pthread_cond_t cond; /* For synchronizing pmd thread >> reload. */ >> - struct ovs_mutex cond_mutex; /* Mutex for condition variable. */ >> - >> /* Per thread exact-match cache. Note, the instance for cpu core >> * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly >> * need to be protected (e.g. by 'dp_netdev_mutex'). All other >> @@ -430,6 +434,11 @@ struct dp_netdev_pmd_thread { >> int tx_qid; /* Queue id used by this pmd thread >> to >> * send packets on all netdevs */ >> >> + struct ovs_mutex poll_mutex; /* Mutex for poll_list. */ >> + /* List of rx queues to poll. */ >> + struct ovs_list poll_list OVS_GUARDED; >> + int poll_cnt; /* Number of elemints in poll_list. >> */ >> + >> /* Only a pmd thread can write on its own 'cycles' and 'stats'. >> * The main thread keeps 'stats_zero' and 'cycles_zero' as base >> * values and subtracts them from 'stats' and 'cycles' before >> @@ -469,7 +478,6 @@ static void dp_netdev_input(struct >> dp_netdev_pmd_thread *, >> struct dp_packet **, int cnt); >> >> static void dp_netdev_disable_upcall(struct dp_netdev *); >> -void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd); > > Assuming we're keeping this function, I think it should be made static. > This wasn't introduced by this patch, but it would be nice to fix it > anyway. > > >> static void dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, >> struct dp_netdev *dp, int index, >> unsigned core_id, int numa_id); >> @@ -482,6 +490,11 @@ dp_netdev_pmd_get_next(struct dp_netdev *dp, struct >> cmap_position *pos); >> static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp); >> static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int >> numa_id); >> static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int >> numa_id); >> +static void >> +dp_netdev_add_rxq_to_pmd(struct dp_netdev_pmd_thread *pmd, >> + struct dp_netdev_port *port, struct netdev_rxq >> *rx); >> +static struct dp_netdev_pmd_thread * >> +dp_netdev_less_loaded_pmd_on_numa(struct dp_netdev *dp, int numa_id); >> static void dp_netdev_reset_pmd_threads(struct dp_netdev *dp); >> static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd); >> static void dp_netdev_pmd_unref(struct dp_netdev_pmd_thread *pmd); >> @@ -1019,22 +1032,7 @@ dp_netdev_reload_pmd__(struct dp_netdev_pmd_thread >> *pmd) >> return; >> } >> >> - ovs_mutex_lock(&pmd->cond_mutex); >> atomic_add_relaxed(&pmd->change_seq, 1, &old_seq); >> - ovs_mutex_cond_wait(&pmd->cond, &pmd->cond_mutex); >> - ovs_mutex_unlock(&pmd->cond_mutex); >> -} >> - >> -/* Causes all pmd threads to reload its tx/rx devices. >> - * Must be called after adding/removing ports. */ >> -static void >> -dp_netdev_reload_pmds(struct dp_netdev *dp) >> -{ >> - struct dp_netdev_pmd_thread *pmd; >> - >> - CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { >> - dp_netdev_reload_pmd__(pmd); >> - } >> } >> >> static uint32_t >> @@ -1128,8 +1126,26 @@ do_add_port(struct dp_netdev *dp, const char >> *devname, const char *type, >> cmap_insert(&dp->ports, &port->node, hash_port_no(port_no)); >> >> if (netdev_is_pmd(netdev)) { >> - dp_netdev_set_pmds_on_numa(dp, netdev_get_numa_id(netdev)); >> - dp_netdev_reload_pmds(dp); >> + int numa_id = netdev_get_numa_id(netdev); >> + struct dp_netdev_pmd_thread *pmd; >> + >> + /* Cannot create pmd threads for invalid numa node. */ >> + ovs_assert(ovs_numa_numa_id_is_valid(numa_id)); >> + >> + for (i = 0; i < netdev_n_rxq(netdev); i++) { >> + pmd = dp_netdev_less_loaded_pmd_on_numa(dp, numa_id); >> + if (!pmd) { >> + /* There is no pmd threads on this numa node. */ >> + dp_netdev_set_pmds_on_numa(dp, numa_id); >> + /* Assigning of rx queues done. */ >> + break; >> + } >> + >> + ovs_mutex_lock(&pmd->poll_mutex); >> + dp_netdev_add_rxq_to_pmd(pmd, port, port->rxq[i]); >> + ovs_mutex_unlock(&pmd->poll_mutex); >> + dp_netdev_reload_pmd__(pmd); >> + } >> } >> seq_change(dp->port_seq); >> >> @@ -1226,16 +1242,6 @@ port_ref(struct dp_netdev_port *port) >> } >> } >> >> -static bool >> -port_try_ref(struct dp_netdev_port *port) >> -{ >> - if (port) { >> - return ovs_refcount_try_ref_rcu(&port->ref_cnt); >> - } >> - >> - return false; >> -} >> - >> static void >> port_unref(struct dp_netdev_port *port) >> { >> @@ -1313,12 +1319,38 @@ do_del_port(struct dp_netdev *dp, struct >> dp_netdev_port *port) >> if (netdev_is_pmd(port->netdev)) { >> int numa_id = netdev_get_numa_id(port->netdev); >> >> + /* PMD threads can not be on invalid numa node. */ >> + ovs_assert(ovs_numa_numa_id_is_valid(numa_id)); >> /* If there is no netdev on the numa node, deletes the pmd >> threads >> - * for that numa. Else, just reloads the queues. */ >> + * for that numa. Else, deletes the queues from polling lists. >> */ >> if (!has_pmd_port_for_numa(dp, numa_id)) { >> dp_netdev_del_pmds_on_numa(dp, numa_id); >> } >> - dp_netdev_reload_pmds(dp); >> + else { > > This should be > > } else { > > on the same line > > >> + struct dp_netdev_pmd_thread *pmd; >> + struct rxq_poll *poll, *next; >> + >> + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { >> + if (pmd->numa_id == numa_id) { >> + bool found = false; >> + >> + ovs_mutex_lock(&pmd->poll_mutex); >> + LIST_FOR_EACH_SAFE (poll, next, node, >> &pmd->poll_list) { >> + if (poll->port == port) { >> + found = true; >> + port_unref(poll->port); >> + list_remove(&poll->node); >> + pmd->poll_cnt--; >> + free(poll); >> + } >> + } >> + ovs_mutex_unlock(&pmd->poll_mutex); >> + if (found) { >> + dp_netdev_reload_pmd__(pmd); >> + } >> + } >> + } >> + } >> } >> >> port_unref(port); > > > _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev