Re: [ovs-dev] [PATCH] dpif-netdev: fix race for queues between pmd threads

Daniele Di Proietto Mon, 27 Jul 2015 04:13:36 -0700

That looks better to me.  Minor comment: we usually prefer
'sizeof *pmds' to 'sizeof(struct dp_netdev_pmd_thread*)'


https://github.com/openvswitch/ovs/blob/master/CodingStyle.md#user-content-
expressions

Would you mind changing that, updating the commit message and
resending to the list?

Thanks!

On 27/07/2015 10:34, "Ilya Maximets" <i.maxim...@samsung.com> wrote:

>Previous diff was completely wrong. Sorry.
>
>@@ -2971,6 +2970,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp,
>int numa_id)
>      * pmd threads for the numa node. */
>     if (!n_pmds) {
>         int can_have, n_unpinned, i;
>+        struct dp_netdev_pmd_thread **pmds;
> 
>         n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id);
>         if (!n_unpinned) {
>@@ -2982,15 +2982,22 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp,
>int numa_id)
>         /* If cpu mask is specified, uses all unpinned cores, otherwise
>          * tries creating NR_PMD_THREADS pmd threads. */
>         can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned,
>NR_PMD_THREADS);
>+        pmds = xzalloc(can_have * sizeof(struct dp_netdev_pmd_thread*));
>         for (i = 0; i < can_have; i++) {
>-            struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
>             unsigned core_id =
>ovs_numa_get_unpinned_core_on_numa(numa_id);
>-
>-            dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id);
>+            pmds[i] = xzalloc(sizeof(struct dp_netdev_pmd_thread));
>+            dp_netdev_configure_pmd(pmds[i], dp, i, core_id, numa_id);
>+        }
>+        /* The pmd thread code needs to see all the others configured pmd
>+         * threads on the same numa node.  That's why we call
>+         * 'dp_netdev_configure_pmd()' on all the threads and then we
>actually
>+         * start them. */
>+        for (i = 0; i < can_have; i++) {
>             /* Each thread will distribute all devices rx-queues among
>              * themselves. */
>-            pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
>+            pmds[i]->thread = ovs_thread_create("pmd", pmd_thread_main,
>pmds[i]);
>         }
>+        free(pmds);
>         VLOG_INFO("Created %d pmd threads on numa node %d", can_have,
>numa_id);
>     }
> }
>
>
>On 27.07.2015 11:17, Ilya Maximets wrote:
>> Sorry,
>> without dp_netdev_reload_pmds(dp) at the end.
>> 
>> On 27.07.2015 10:58, Ilya Maximets wrote:
>>> Yes, I think, this way is better. But it may be more simple
>>> if we just keep all the dp_netdev_pmd_thread structures.
>>> There is no need to search over all pmd threads on system.
>>>
>>> Like this:
>>>
>>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>>> index 79c4612..8e4c025 100644
>>> --- a/lib/dpif-netdev.c
>>> +++ b/lib/dpif-netdev.c
>>> @@ -2961,6 +2960,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp,
>>>int numa_id)
>>>       * pmd threads for the numa node. */
>>>      if (!n_pmds) {
>>>          int can_have, n_unpinned, i;
>>> +        struct dp_netdev_pmd_thread *pmd;
>>>  
>>>          n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id);
>>>          if (!n_unpinned) {
>>> @@ -2972,16 +2972,22 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev
>>>*dp, int numa_id)
>>>          /* If cpu mask is specified, uses all unpinned cores,
>>>otherwise
>>>           * tries creating NR_PMD_THREADS pmd threads. */
>>>          can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned,
>>>NR_PMD_THREADS);
>>> +        pmd = xzalloc(can_have * sizeof(*pmd));
>>>          for (i = 0; i < can_have; i++) {
>>> -            struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
>>>              unsigned core_id =
>>>ovs_numa_get_unpinned_core_on_numa(numa_id);
>>> -
>>> -            dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id);
>>> +            dp_netdev_configure_pmd(&pmd[i], dp, i, core_id, numa_id);
>>> +        }
>>> +        /* The pmd thread code needs to see all the others configured
>>>pmd
>>> +         * threads on the same numa node.  That's why we call
>>> +         * 'dp_netdev_configure_pmd()' on all the threads and then we
>>>actually
>>> +         * start them. */
>>> +        for (i = 0; i < can_have; i++) {
>>>              /* Each thread will distribute all devices rx-queues among
>>>               * themselves. */
>>> -            pmd->thread = ovs_thread_create("pmd", pmd_thread_main,
>>>pmd);
>>> +            pmd[i].thread = ovs_thread_create("pmd", pmd_thread_main,
>>>&pmd[i]);
>>>          }
>>>          VLOG_INFO("Created %d pmd threads on numa node %d", can_have,
>>>numa_id);
>>> +        dp_netdev_reload_pmds(dp);
>>>      }
>>>  }
>>>
>>>
>>> On 24.07.2015 19:42, Daniele Di Proietto wrote:
>>>> That's a bad race condition, thanks for reporting it!
>>>>
>>>> Regarding the fix, I agree that reloading the threads would
>>>> restore the correct mapping, but it would still allow
>>>> the threads to run with the incorrect mapping for a brief
>>>> interval.
>>>>
>>>> How about postponing the actual threads creation until all
>>>> the pmds are configured?
>>>>
>>>> Something like:
>>>>
>>>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>>>> index 79c4612..26d9f1f 100644
>>>> --- a/lib/dpif-netdev.c
>>>> +++ b/lib/dpif-netdev.c
>>>> @@ -2961,6 +2961,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev
>>>>*dp, int
>>>> numa_id)
>>>>       * pmd threads for the numa node. */
>>>>      if (!n_pmds) {
>>>>          int can_have, n_unpinned, i;
>>>> +        struct dp_netdev_pmd_thread *pmd;
>>>>
>>>>          n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id);
>>>>          if (!n_unpinned) {
>>>> @@ -2973,13 +2974,22 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev
>>>>*dp,
>>>> int numa_id)
>>>>           * tries creating NR_PMD_THREADS pmd threads. */
>>>>          can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned,
>>>> NR_PMD_THREADS);
>>>>          for (i = 0; i < can_have; i++) {
>>>> -            struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
>>>>              unsigned core_id =
>>>> ovs_numa_get_unpinned_core_on_numa(numa_id);
>>>>
>>>> +            pmd = xzalloc(sizeof *pmd);
>>>>              dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id);
>>>> -            /* Each thread will distribute all devices rx-queues
>>>>among
>>>> -             * themselves. */
>>>> -            pmd->thread = ovs_thread_create("pmd", pmd_thread_main,
>>>>pmd);
>>>> +        }
>>>> +
>>>> +        /* The pmd thread code needs to see all the others
>>>>configured pmd
>>>> +         * threads on the same numa node.  That's why we call
>>>> +         * 'dp_netdev_configure_pmd()' on all the threads and then we
>>>> actually
>>>> +         * start them. */
>>>> +        CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>>>> +            if (pmd->numa_id == numa_id) {
>>>> +                /* Each thread will distribute all devices rx-queues
>>>>among
>>>> +                 * themselves. */
>>>> +                pmd->thread = ovs_thread_create("pmd",
>>>>pmd_thread_main,
>>>> pmd);
>>>> +            }
>>>>          }
>>>>          VLOG_INFO("Created %d pmd threads on numa node %d", can_have,
>>>> numa_id);
>>>>      }
>>>>
>>>>
>>>> What do you think?
>>>>
>>>> Thanks!
>>>>
>>>> On 24/07/2015 12:18, "Ilya Maximets" <i.maxim...@samsung.com> wrote:
>>>>
>>>>> Currently pmd threads select queues in pmd_load_queues() according to
>>>>> get_n_pmd_threads_on_numa(). This behavior leads to race between
>>>>>pmds,
>>>>> beacause dp_netdev_set_pmds_on_numa() starts them one by one and
>>>>> current number of threads changes incrementally.
>>>>>
>>>>> As a result we may have the following situation with 2 pmd threads:
>>>>>
>>>>> * dp_netdev_set_pmds_on_numa()
>>>>> * pmd12 thread started. Currently only 1 pmd thread exists.
>>>>> dpif_netdev(pmd12)|INFO|Core 1 processing port 'port_1'
>>>>> dpif_netdev(pmd12)|INFO|Core 1 processing port 'port_2'
>>>>> * pmd14 thread started. 2 pmd threads exists.
>>>>> dpif_netdev|INFO|Created 2 pmd threads on numa node 0
>>>>> dpif_netdev(pmd14)|INFO|Core 2 processing port 'port_2'
>>>>>
>>>>> We have:
>>>>> core 1 --> port 1, port 2
>>>>> core 2 --> port 2
>>>>>
>>>>> Fix this by reloading all pmds to get right port mapping.
>>>>>
>>>>> If we reload pmds, we'll have:
>>>>> core 1 --> port 1
>>>>> core 2 --> port 2
>>>>>
>>>>> Cc: Dyasly Sergey <s.dya...@samsung.com>
>>>>> Signed-off-by: Ilya Maximets <i.maxim...@samsung.com>
>>>>> ---
>>>>> lib/dpif-netdev.c | 6 +++---
>>>>> 1 file changed, 3 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>>>>> index 2958d52..fd700f9 100644
>>>>> --- a/lib/dpif-netdev.c
>>>>> +++ b/lib/dpif-netdev.c
>>>>> @@ -1127,10 +1127,9 @@ do_add_port(struct dp_netdev *dp, const char
>>>>> *devname, const char *type,
>>>>>     ovs_refcount_init(&port->ref_cnt);
>>>>>     cmap_insert(&dp->ports, &port->node, hash_port_no(port_no));
>>>>>
>>>>> -    if (netdev_is_pmd(netdev)) {
>>>>> +    if (netdev_is_pmd(netdev))
>>>>>         dp_netdev_set_pmds_on_numa(dp, netdev_get_numa_id(netdev));
>>>>> -        dp_netdev_reload_pmds(dp);
>>>>> -    }
>>>>> +
>>>>>     seq_change(dp->port_seq);
>>>>>
>>>>>     return 0;
>>>>> @@ -2978,6 +2977,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev
>>>>>*dp,
>>>>> int numa_id)
>>>>>             pmd->thread = ovs_thread_create("pmd", pmd_thread_main,
>>>>>pmd);
>>>>>         }
>>>>>         VLOG_INFO("Created %d pmd threads on numa node %d", can_have,
>>>>> numa_id);
>>>>> +        dp_netdev_reload_pmds(dp);
>>>>>     }
>>>>> }
>>>>>
>>>>> -- 
>>>>> 2.1.4
>>>>>
>>>>> _______________________________________________
>>>>> dev mailing list
>>>>> dev@openvswitch.org
>>>>> 
>>>>>https://urldefense.proofpoint.com/v2/url?u=http-3A__openvswitch.org_ma
>>>>>ilma
>>>>> 
>>>>>n_listinfo_dev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&
>>>>>r=Sm
>>>>> 
>>>>>B5nZacmXNq0gKCC1s_Cw5yUNjxgD4v5kJqZ2uWLlE&m=YcYFmCgJtVWAOpEYl7OaM4nqi7
>>>>>maE7
>>>>> XOiMnLOpnugHY&s=Zf_P-5VtI2yJNO-f1ZXolxD5syuhr7WdbyNvdwH3zQM&e=
>>>>
>>>>

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Re: [ovs-dev] [PATCH] dpif-netdev: fix race for queues between pmd threads

Reply via email to