Eric W. Biederman wrote:
> There is no good reason to not support userspace specifying the
> network namespace during device creation and it seems a handy
> thing to do.
> 
> We have to be a little extra careful in this case to ensure that
> the network namespace exists through the point where we call
> register_netdevice.
> 
> In addition we need to pass the network namespace to the
> rtnl_link_ops.newlink method so we can properly create
> the new device in another namespace and have it be a vlan
> device of a device in our current network namespace.
> 
> In summary this patch makes ip link add somename netns NNN type sometype
> do the obvious thing instead of ignoring the network namespace parameter.
> 
> Signed-off-by: Eric W. Biederman <[EMAIL PROTECTED]>
> ---
>  drivers/net/macvlan.c    |    4 ++--
>  drivers/net/veth.c       |    5 +++--
>  include/net/rtnetlink.h  |    3 ++-
>  net/8021q/vlan_netlink.c |    4 ++--
>  net/core/rtnetlink.c     |   17 ++++++++++++++++-
>  5 files changed, 25 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
> index 4239450..fc5933b 100644
> --- a/drivers/net/macvlan.c
> +++ b/drivers/net/macvlan.c
> @@ -416,7 +416,7 @@ static int macvlan_validate(struct nlattr *tb[], struct 
> nlattr *data[])
>       return 0;
>  }
> 
> -static int macvlan_newlink(struct net_device *dev,
> +static int macvlan_newlink(struct net *net, struct net_device *dev,
>                          struct nlattr *tb[], struct nlattr *data[])
>  {
>       struct macvlan_dev *vlan = netdev_priv(dev);
> @@ -427,7 +427,7 @@ static int macvlan_newlink(struct net_device *dev,
>       if (!tb[IFLA_LINK])
>               return -EINVAL;
> 
> -     lowerdev = __dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK]));
> +     lowerdev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK]));
>       if (lowerdev == NULL)
>               return -ENODEV;
> 
> diff --git a/drivers/net/veth.c b/drivers/net/veth.c
> index 31cd817..3a2d818 100644
> --- a/drivers/net/veth.c
> +++ b/drivers/net/veth.c
> @@ -335,7 +335,7 @@ static int veth_validate(struct nlattr *tb[], struct 
> nlattr *data[])
> 
>  static struct rtnl_link_ops veth_link_ops;
> 
> -static int veth_newlink(struct net_device *dev,
> +static int veth_newlink(struct net *net, struct net_device *dev,
>                        struct nlattr *tb[], struct nlattr *data[])
>  {
>       int err;
> @@ -375,7 +375,7 @@ static int veth_newlink(struct net_device *dev,
>       else
>               snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
> 
> -     peer = rtnl_create_link(dev_net(dev), ifname, &veth_link_ops, tbp);
> +     peer = rtnl_create_link(net, ifname, &veth_link_ops, tbp);
>       if (IS_ERR(peer))
>               return PTR_ERR(peer);
> 
> @@ -383,6 +383,7 @@ static int veth_newlink(struct net_device *dev,
>               random_ether_addr(peer->dev_addr);
> 
>       err = register_netdevice(peer);
> +     put_net(peer->nd_net);
>       if (err < 0)
>               goto err_register_peer;
> 
> diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
> index 3c1895e..dbf546f 100644
> --- a/include/net/rtnetlink.h
> +++ b/include/net/rtnetlink.h
> @@ -55,7 +55,8 @@ struct rtnl_link_ops {
>       int                     (*validate)(struct nlattr *tb[],
>                                           struct nlattr *data[]);
> 
> -     int                     (*newlink)(struct net_device *dev,
> +     int                     (*newlink)(struct net *net,
> +                                        struct net_device *dev,
>                                          struct nlattr *tb[],
>                                          struct nlattr *data[]);
>       int                     (*changelink)(struct net_device *dev,
> diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
> index e9c91dc..e6190f7 100644
> --- a/net/8021q/vlan_netlink.c
> +++ b/net/8021q/vlan_netlink.c
> @@ -100,7 +100,7 @@ static int vlan_changelink(struct net_device *dev,
>       return 0;
>  }
> 
> -static int vlan_newlink(struct net_device *dev,
> +static int vlan_newlink(struct net *net, struct net_device *dev,
>                       struct nlattr *tb[], struct nlattr *data[])
>  {
>       struct vlan_dev_info *vlan = vlan_dev_info(dev);
> @@ -112,7 +112,7 @@ static int vlan_newlink(struct net_device *dev,
> 
>       if (!tb[IFLA_LINK])
>               return -EINVAL;
> -     real_dev = __dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK]));
> +     real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK]));

Hmm, if the macvlan is created inside a namespace, the network namespace 
specified in the parameter function will not be the namespace where 
belongs IFLA_LINK and the __dev_get_by_index will fail, no ?

>       if (!real_dev)
>               return -ENODEV;
> 
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index 8862498..069b176 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -1002,6 +1002,19 @@ struct net_device *rtnl_create_link(struct net *net, 
> char *ifname,
>                       goto err_free;
>       }
> 
> +     /* To support userspace specifying a network namespace during
> +      * device creation we grab the network namespace here and hold
> +      * it until just after register_netdevice to prevent races.
> +      */
> +     if (!tb[IFLA_NET_NS_PID])
> +             get_net(net);
> +     else {
> +             net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
> +             if (IS_ERR(net)) {
> +                     err = PTR_ERR(net);
> +                     goto err_free;
> +             }
> +     }
>       dev_net_set(dev, net);
>       dev->rtnl_link_ops = ops;
> 
> @@ -1150,10 +1163,12 @@ replay:
>               if (IS_ERR(dev))
>                       err = PTR_ERR(dev);
>               else if (ops->newlink)
> -                     err = ops->newlink(dev, tb, data);
> +                     err = ops->newlink(net, dev, tb, data);
>               else
>                       err = register_netdevice(dev);
> 
> +             if (!IS_ERR(dev))
> +                     put_net(dev->nd_net);

If there is an error in ops->newlink or register_netdevice, we will exit 
without releasing the net refcount.

>               if (err < 0 && !IS_ERR(dev))
>                       free_netdev(dev);
>               return err;
_______________________________________________
Containers mailing list
[EMAIL PROTECTED]
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
Devel@openvz.org
https://openvz.org/mailman/listinfo/devel

Reply via email to