2019-09-28, 16:48:32 +0000, Taehee Yoo wrote:
> @@ -6790,23 +6878,45 @@ int netdev_walk_all_lower_dev(struct net_device *dev,
>                                       void *data),
>                             void *data)
>  {
> -     struct net_device *ldev;
> -     struct list_head *iter;
> -     int ret;
> +     struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
> +     struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
> +     int ret, cur = 0;
>  
> -     for (iter = &dev->adj_list.lower,
> -          ldev = netdev_next_lower_dev(dev, &iter);
> -          ldev;
> -          ldev = netdev_next_lower_dev(dev, &iter)) {
> -             /* first is the lower device itself */
> -             ret = fn(ldev, data);
> -             if (ret)
> -                     return ret;
> +     now = dev;
> +     iter = &dev->adj_list.lower;
>  
> -             /* then look at all of its lower devices */
> -             ret = netdev_walk_all_lower_dev(ldev, fn, data);
> -             if (ret)
> -                     return ret;
> +     while (1) {
> +             if (now != dev) {
> +                     ret = fn(now, data);
> +                     if (ret)
> +                             return ret;
> +             }
> +
> +             next = NULL;
> +             while (1) {
> +                     ldev = netdev_next_lower_dev(now, &iter);
> +                     if (!ldev)
> +                             break;
> +
> +                     if (!next) {
> +                             next = ldev;
> +                             niter = &ldev->adj_list.lower;
> +                     } else {
> +                             dev_stack[cur] = ldev;
> +                             iter_stack[cur++] = &ldev->adj_list.lower;
> +                             break;
> +                     }
> +             }
> +
> +             if (!next) {
> +                     if (!cur)
> +                             return 0;

Hmm, I don't think this condition is correct.

If we have this topology:


                bridge0
                /  |  \
               /   |   \
              /    |    \
        dummy0   vlan1   vlan2
                   |       \
                 dummy1    dummy2

We end up with the expected lower/upper levels for all devices:

    | device  | upper | lower |
    |---------+-------+-------|
    | dummy0  |     2 |     1 |
    | dummy1  |     3 |     1 |
    | dummy2  |     3 |     1 |
    | vlan1   |     2 |     2 |
    | vlan2   |     2 |     2 |
    | bridge0 |     1 |     3 |


If we then add macvlan0 on top of bridge0:


                macvlan0
                   |
                   |
                bridge0
                /  |  \
               /   |   \
              /    |    \
        dummy0   vlan1   vlan2
                   |       \
                 dummy1    dummy2


we can observe that __netdev_update_upper_level is only called for
some of the devices under bridge0. I added a perf probe:

 # perf probe -a '__netdev_update_upper_level dev->name:string'

which gets hit for bridge0 (called directly by
__netdev_upper_dev_link) and then dummy0, vlan1, dummy1. It is never
called for vlan2 and dummy2.

After this, we have the following levels (*):

    | device   | upper | lower |
    |----------+-------+-------|
    | dummy0   |     3 |     1 |
    | dummy1   |     4 |     1 |
    | dummy2   |     3 |     1 |
    | vlan1    |     3 |     2 |
    | vlan2    |     2 |     2 |
    | bridge0  |     2 |     3 |
    | macvlan0 |     1 |     4 |

For dummy0, dummy1, vlan1, the upper level has increased by 1, as
expected. For dummy2 and vlan2, it's still the same, which is wrong.


(*) observed easily by adding another probe:

 # perf probe -a 'dev_get_stats dev->name:string dev->upper_level 
dev->lower_level'

and running "ip link"

Or you can just add prints and recompile, of course :)

> +                     next = dev_stack[--cur];
> +                     niter = iter_stack[cur];
> +             }
> +
> +             now = next;
> +             iter = niter;
>       }
>  
>       return 0;

-- 
Sabrina

Reply via email to