That looks better to me. Minor comment: we usually prefer 'sizeof *pmds' to 'sizeof(struct dp_netdev_pmd_thread*)'
https://github.com/openvswitch/ovs/blob/master/CodingStyle.md#user-content- expressions Would you mind changing that, updating the commit message and resending to the list? Thanks! On 27/07/2015 10:34, "Ilya Maximets" <i.maxim...@samsung.com> wrote: >Previous diff was completely wrong. Sorry. > >@@ -2971,6 +2970,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, >int numa_id) > * pmd threads for the numa node. */ > if (!n_pmds) { > int can_have, n_unpinned, i; >+ struct dp_netdev_pmd_thread **pmds; > > n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id); > if (!n_unpinned) { >@@ -2982,15 +2982,22 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, >int numa_id) > /* If cpu mask is specified, uses all unpinned cores, otherwise > * tries creating NR_PMD_THREADS pmd threads. */ > can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, >NR_PMD_THREADS); >+ pmds = xzalloc(can_have * sizeof(struct dp_netdev_pmd_thread*)); > for (i = 0; i < can_have; i++) { >- struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd); > unsigned core_id = >ovs_numa_get_unpinned_core_on_numa(numa_id); >- >- dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id); >+ pmds[i] = xzalloc(sizeof(struct dp_netdev_pmd_thread)); >+ dp_netdev_configure_pmd(pmds[i], dp, i, core_id, numa_id); >+ } >+ /* The pmd thread code needs to see all the others configured pmd >+ * threads on the same numa node. That's why we call >+ * 'dp_netdev_configure_pmd()' on all the threads and then we >actually >+ * start them. */ >+ for (i = 0; i < can_have; i++) { > /* Each thread will distribute all devices rx-queues among > * themselves. */ >- pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd); >+ pmds[i]->thread = ovs_thread_create("pmd", pmd_thread_main, >pmds[i]); > } >+ free(pmds); > VLOG_INFO("Created %d pmd threads on numa node %d", can_have, >numa_id); > } > } > > >On 27.07.2015 11:17, Ilya Maximets wrote: >> Sorry, >> without dp_netdev_reload_pmds(dp) at the end. >> >> On 27.07.2015 10:58, Ilya Maximets wrote: >>> Yes, I think, this way is better. But it may be more simple >>> if we just keep all the dp_netdev_pmd_thread structures. >>> There is no need to search over all pmd threads on system. >>> >>> Like this: >>> >>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >>> index 79c4612..8e4c025 100644 >>> --- a/lib/dpif-netdev.c >>> +++ b/lib/dpif-netdev.c >>> @@ -2961,6 +2960,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, >>>int numa_id) >>> * pmd threads for the numa node. */ >>> if (!n_pmds) { >>> int can_have, n_unpinned, i; >>> + struct dp_netdev_pmd_thread *pmd; >>> >>> n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id); >>> if (!n_unpinned) { >>> @@ -2972,16 +2972,22 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev >>>*dp, int numa_id) >>> /* If cpu mask is specified, uses all unpinned cores, >>>otherwise >>> * tries creating NR_PMD_THREADS pmd threads. */ >>> can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, >>>NR_PMD_THREADS); >>> + pmd = xzalloc(can_have * sizeof(*pmd)); >>> for (i = 0; i < can_have; i++) { >>> - struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd); >>> unsigned core_id = >>>ovs_numa_get_unpinned_core_on_numa(numa_id); >>> - >>> - dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id); >>> + dp_netdev_configure_pmd(&pmd[i], dp, i, core_id, numa_id); >>> + } >>> + /* The pmd thread code needs to see all the others configured >>>pmd >>> + * threads on the same numa node. That's why we call >>> + * 'dp_netdev_configure_pmd()' on all the threads and then we >>>actually >>> + * start them. */ >>> + for (i = 0; i < can_have; i++) { >>> /* Each thread will distribute all devices rx-queues among >>> * themselves. */ >>> - pmd->thread = ovs_thread_create("pmd", pmd_thread_main, >>>pmd); >>> + pmd[i].thread = ovs_thread_create("pmd", pmd_thread_main, >>>&pmd[i]); >>> } >>> VLOG_INFO("Created %d pmd threads on numa node %d", can_have, >>>numa_id); >>> + dp_netdev_reload_pmds(dp); >>> } >>> } >>> >>> >>> On 24.07.2015 19:42, Daniele Di Proietto wrote: >>>> That's a bad race condition, thanks for reporting it! >>>> >>>> Regarding the fix, I agree that reloading the threads would >>>> restore the correct mapping, but it would still allow >>>> the threads to run with the incorrect mapping for a brief >>>> interval. >>>> >>>> How about postponing the actual threads creation until all >>>> the pmds are configured? >>>> >>>> Something like: >>>> >>>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >>>> index 79c4612..26d9f1f 100644 >>>> --- a/lib/dpif-netdev.c >>>> +++ b/lib/dpif-netdev.c >>>> @@ -2961,6 +2961,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev >>>>*dp, int >>>> numa_id) >>>> * pmd threads for the numa node. */ >>>> if (!n_pmds) { >>>> int can_have, n_unpinned, i; >>>> + struct dp_netdev_pmd_thread *pmd; >>>> >>>> n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id); >>>> if (!n_unpinned) { >>>> @@ -2973,13 +2974,22 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev >>>>*dp, >>>> int numa_id) >>>> * tries creating NR_PMD_THREADS pmd threads. */ >>>> can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, >>>> NR_PMD_THREADS); >>>> for (i = 0; i < can_have; i++) { >>>> - struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd); >>>> unsigned core_id = >>>> ovs_numa_get_unpinned_core_on_numa(numa_id); >>>> >>>> + pmd = xzalloc(sizeof *pmd); >>>> dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id); >>>> - /* Each thread will distribute all devices rx-queues >>>>among >>>> - * themselves. */ >>>> - pmd->thread = ovs_thread_create("pmd", pmd_thread_main, >>>>pmd); >>>> + } >>>> + >>>> + /* The pmd thread code needs to see all the others >>>>configured pmd >>>> + * threads on the same numa node. That's why we call >>>> + * 'dp_netdev_configure_pmd()' on all the threads and then we >>>> actually >>>> + * start them. */ >>>> + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { >>>> + if (pmd->numa_id == numa_id) { >>>> + /* Each thread will distribute all devices rx-queues >>>>among >>>> + * themselves. */ >>>> + pmd->thread = ovs_thread_create("pmd", >>>>pmd_thread_main, >>>> pmd); >>>> + } >>>> } >>>> VLOG_INFO("Created %d pmd threads on numa node %d", can_have, >>>> numa_id); >>>> } >>>> >>>> >>>> What do you think? >>>> >>>> Thanks! >>>> >>>> On 24/07/2015 12:18, "Ilya Maximets" <i.maxim...@samsung.com> wrote: >>>> >>>>> Currently pmd threads select queues in pmd_load_queues() according to >>>>> get_n_pmd_threads_on_numa(). This behavior leads to race between >>>>>pmds, >>>>> beacause dp_netdev_set_pmds_on_numa() starts them one by one and >>>>> current number of threads changes incrementally. >>>>> >>>>> As a result we may have the following situation with 2 pmd threads: >>>>> >>>>> * dp_netdev_set_pmds_on_numa() >>>>> * pmd12 thread started. Currently only 1 pmd thread exists. >>>>> dpif_netdev(pmd12)|INFO|Core 1 processing port 'port_1' >>>>> dpif_netdev(pmd12)|INFO|Core 1 processing port 'port_2' >>>>> * pmd14 thread started. 2 pmd threads exists. >>>>> dpif_netdev|INFO|Created 2 pmd threads on numa node 0 >>>>> dpif_netdev(pmd14)|INFO|Core 2 processing port 'port_2' >>>>> >>>>> We have: >>>>> core 1 --> port 1, port 2 >>>>> core 2 --> port 2 >>>>> >>>>> Fix this by reloading all pmds to get right port mapping. >>>>> >>>>> If we reload pmds, we'll have: >>>>> core 1 --> port 1 >>>>> core 2 --> port 2 >>>>> >>>>> Cc: Dyasly Sergey <s.dya...@samsung.com> >>>>> Signed-off-by: Ilya Maximets <i.maxim...@samsung.com> >>>>> --- >>>>> lib/dpif-netdev.c | 6 +++--- >>>>> 1 file changed, 3 insertions(+), 3 deletions(-) >>>>> >>>>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >>>>> index 2958d52..fd700f9 100644 >>>>> --- a/lib/dpif-netdev.c >>>>> +++ b/lib/dpif-netdev.c >>>>> @@ -1127,10 +1127,9 @@ do_add_port(struct dp_netdev *dp, const char >>>>> *devname, const char *type, >>>>> ovs_refcount_init(&port->ref_cnt); >>>>> cmap_insert(&dp->ports, &port->node, hash_port_no(port_no)); >>>>> >>>>> - if (netdev_is_pmd(netdev)) { >>>>> + if (netdev_is_pmd(netdev)) >>>>> dp_netdev_set_pmds_on_numa(dp, netdev_get_numa_id(netdev)); >>>>> - dp_netdev_reload_pmds(dp); >>>>> - } >>>>> + >>>>> seq_change(dp->port_seq); >>>>> >>>>> return 0; >>>>> @@ -2978,6 +2977,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev >>>>>*dp, >>>>> int numa_id) >>>>> pmd->thread = ovs_thread_create("pmd", pmd_thread_main, >>>>>pmd); >>>>> } >>>>> VLOG_INFO("Created %d pmd threads on numa node %d", can_have, >>>>> numa_id); >>>>> + dp_netdev_reload_pmds(dp); >>>>> } >>>>> } >>>>> >>>>> -- >>>>> 2.1.4 >>>>> >>>>> _______________________________________________ >>>>> dev mailing list >>>>> dev@openvswitch.org >>>>> >>>>>https://urldefense.proofpoint.com/v2/url?u=http-3A__openvswitch.org_ma >>>>>ilma >>>>> >>>>>n_listinfo_dev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs& >>>>>r=Sm >>>>> >>>>>B5nZacmXNq0gKCC1s_Cw5yUNjxgD4v5kJqZ2uWLlE&m=YcYFmCgJtVWAOpEYl7OaM4nqi7 >>>>>maE7 >>>>> XOiMnLOpnugHY&s=Zf_P-5VtI2yJNO-f1ZXolxD5syuhr7WdbyNvdwH3zQM&e= >>>> >>>> _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev