On Thu, Jan 10, 2019 at 3:25 PM David Herrmann <dh.herrm...@gmail.com> wrote: > > This introduces a new generic SOL_SOCKET-level socket option called > SO_BINDTOIF. It behaves similar to SO_BINDTODEVICE, but takes a network > interface index as argument, rather than the network interface name. > > User-space often refers to network-interfaces via their index, but has > to temporarily resolve it to a name for a call into SO_BINDTODEVICE. > This might pose problems when the network-device is renamed > asynchronously by other parts of the system. When this happens, the > SO_BINDTODEVICE might either fail, or worse, it might bind to the wrong > device. > > In most cases user-space only ever operates on devices which they > either manage themselves, or otherwise have a guarantee that the device > name will not change (e.g., devices that are UP cannot be renamed). > However, particularly in libraries this guarantee is non-obvious and it > would be nice if that race-condition would simply not exist. It would > make it easier for those libraries to operate even in situations where > the device-name might change under the hood. > > A real use-case that we recently hit is trying to start the network > stack early in the initrd but make it survive into the real system. > Existing distributions rename network-interfaces during the transition > from initrd into the real system. This, obviously, cannot affect > devices that are up and running (unless you also consider moving them > between network-namespaces). However, the network manager now has to > make sure its management engine for dormant devices will not run in > parallel to these renames. Particularly, when you offload operations > like DHCP into separate processes, these might setup their sockets > early, and thus have to resolve the device-name possibly running into > this race-condition. > > By avoiding a call to resolve the device-name, we no longer depend on > the name and can run network setup of dormant devices in parallel to > the transition off the initrd. The SO_BINDTOIF ioctl plugs this race. > > Signed-off-by: David Herrmann <dh.herrm...@gmail.com> Reviewed-by: Tom Gundersen <t...@jklm.no>
> --- > arch/alpha/include/uapi/asm/socket.h | 2 ++ > arch/ia64/include/uapi/asm/socket.h | 2 ++ > arch/mips/include/uapi/asm/socket.h | 2 ++ > arch/parisc/include/uapi/asm/socket.h | 2 ++ > arch/s390/include/uapi/asm/socket.h | 2 ++ > arch/sparc/include/uapi/asm/socket.h | 2 ++ > arch/xtensa/include/uapi/asm/socket.h | 2 ++ > include/uapi/asm-generic/socket.h | 2 ++ > net/core/sock.c | 46 +++++++++++++++++++++------ > 9 files changed, 52 insertions(+), 10 deletions(-) > > diff --git a/arch/alpha/include/uapi/asm/socket.h > b/arch/alpha/include/uapi/asm/socket.h > index 065fb372e355..6e346e51eec7 100644 > --- a/arch/alpha/include/uapi/asm/socket.h > +++ b/arch/alpha/include/uapi/asm/socket.h > @@ -115,4 +115,6 @@ > #define SO_TXTIME 61 > #define SCM_TXTIME SO_TXTIME > > +#define SO_BINDTOIF 62 > + > #endif /* _UAPI_ASM_SOCKET_H */ > diff --git a/arch/ia64/include/uapi/asm/socket.h > b/arch/ia64/include/uapi/asm/socket.h > index c872c4e6bafb..ece83ba17b9d 100644 > --- a/arch/ia64/include/uapi/asm/socket.h > +++ b/arch/ia64/include/uapi/asm/socket.h > @@ -117,4 +117,6 @@ > #define SO_TXTIME 61 > #define SCM_TXTIME SO_TXTIME > > +#define SO_BINDTOIF 62 > + > #endif /* _ASM_IA64_SOCKET_H */ > diff --git a/arch/mips/include/uapi/asm/socket.h > b/arch/mips/include/uapi/asm/socket.h > index 71370fb3ceef..27f7f761ace5 100644 > --- a/arch/mips/include/uapi/asm/socket.h > +++ b/arch/mips/include/uapi/asm/socket.h > @@ -126,4 +126,6 @@ > #define SO_TXTIME 61 > #define SCM_TXTIME SO_TXTIME > > +#define SO_BINDTOIF 62 > + > #endif /* _UAPI_ASM_SOCKET_H */ > diff --git a/arch/parisc/include/uapi/asm/socket.h > b/arch/parisc/include/uapi/asm/socket.h > index 061b9cf2a779..efd3917f23e1 100644 > --- a/arch/parisc/include/uapi/asm/socket.h > +++ b/arch/parisc/include/uapi/asm/socket.h > @@ -107,4 +107,6 @@ > #define SO_TXTIME 0x4036 > #define SCM_TXTIME SO_TXTIME > > +#define SO_BINDTOIF 0x4037 > + > #endif /* _UAPI_ASM_SOCKET_H */ > diff --git a/arch/s390/include/uapi/asm/socket.h > b/arch/s390/include/uapi/asm/socket.h > index 39d901476ee5..c8ba542e69e6 100644 > --- a/arch/s390/include/uapi/asm/socket.h > +++ b/arch/s390/include/uapi/asm/socket.h > @@ -114,4 +114,6 @@ > #define SO_TXTIME 61 > #define SCM_TXTIME SO_TXTIME > > +#define SO_BINDTOIF 62 > + > #endif /* _ASM_SOCKET_H */ > diff --git a/arch/sparc/include/uapi/asm/socket.h > b/arch/sparc/include/uapi/asm/socket.h > index 7ea35e5601b6..50006bde7dc0 100644 > --- a/arch/sparc/include/uapi/asm/socket.h > +++ b/arch/sparc/include/uapi/asm/socket.h > @@ -104,6 +104,8 @@ > #define SO_TXTIME 0x003f > #define SCM_TXTIME SO_TXTIME > > +#define SO_BINDTOIF 0x0040 > + > /* Security levels - as per NRL IPv6 - don't actually do anything */ > #define SO_SECURITY_AUTHENTICATION 0x5001 > #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 > diff --git a/arch/xtensa/include/uapi/asm/socket.h > b/arch/xtensa/include/uapi/asm/socket.h > index 1de07a7f7680..a36241ffbd86 100644 > --- a/arch/xtensa/include/uapi/asm/socket.h > +++ b/arch/xtensa/include/uapi/asm/socket.h > @@ -119,4 +119,6 @@ > #define SO_TXTIME 61 > #define SCM_TXTIME SO_TXTIME > > +#define SO_BINDTOIF 62 > + > #endif /* _XTENSA_SOCKET_H */ > diff --git a/include/uapi/asm-generic/socket.h > b/include/uapi/asm-generic/socket.h > index a12692e5f7a8..31fb8414ea4c 100644 > --- a/include/uapi/asm-generic/socket.h > +++ b/include/uapi/asm-generic/socket.h > @@ -110,4 +110,6 @@ > #define SO_TXTIME 61 > #define SCM_TXTIME SO_TXTIME > > +#define SO_BINDTOIF 62 > + > #endif /* __ASM_GENERIC_SOCKET_H */ > diff --git a/net/core/sock.c b/net/core/sock.c > index 6aa2e7e0b4fb..df8f83bc22b3 100644 > --- a/net/core/sock.c > +++ b/net/core/sock.c > @@ -520,20 +520,43 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 > cookie) > } > EXPORT_SYMBOL(sk_dst_check); > > -static int sock_setbindtodevice(struct sock *sk, char __user *optval, > - int optlen) > +static int sock_setbindtodevice_locked(struct sock *sk, int ifindex) > { > int ret = -ENOPROTOOPT; > #ifdef CONFIG_NETDEVICES > struct net *net = sock_net(sk); > - char devname[IFNAMSIZ]; > - int index; > > /* Sorry... */ > ret = -EPERM; > if (!ns_capable(net->user_ns, CAP_NET_RAW)) > goto out; > > + ret = -EINVAL; > + if (ifindex < 0) > + goto out; > + > + sk->sk_bound_dev_if = ifindex; > + if (sk->sk_prot->rehash) > + sk->sk_prot->rehash(sk); > + sk_dst_reset(sk); > + > + ret = 0; > + > +out: > +#endif > + > + return ret; > +} > + > +static int sock_setbindtodevice(struct sock *sk, char __user *optval, > + int optlen) > +{ > + int ret = -ENOPROTOOPT; > +#ifdef CONFIG_NETDEVICES > + struct net *net = sock_net(sk); > + char devname[IFNAMSIZ]; > + int index; > + > ret = -EINVAL; > if (optlen < 0) > goto out; > @@ -566,14 +589,9 @@ static int sock_setbindtodevice(struct sock *sk, char > __user *optval, > } > > lock_sock(sk); > - sk->sk_bound_dev_if = index; > - if (sk->sk_prot->rehash) > - sk->sk_prot->rehash(sk); > - sk_dst_reset(sk); > + ret = sock_setbindtodevice_locked(sk, index); > release_sock(sk); > > - ret = 0; > - > out: > #endif > > @@ -1055,6 +1073,10 @@ int sock_setsockopt(struct socket *sock, int level, > int optname, > } > break; > > + case SO_BINDTOIF: > + ret = sock_setbindtodevice_locked(sk, val); > + break; > + > default: > ret = -ENOPROTOOPT; > break; > @@ -1399,6 +1421,10 @@ int sock_getsockopt(struct socket *sock, int level, > int optname, > SOF_TXTIME_REPORT_ERRORS : 0; > break; > > + case SO_BINDTOIF: > + v.val = sk->sk_bound_dev_if; > + break; > + > default: > /* We implement the SO_SNDLOWAT etc to not be settable > * (1003.1g 7). > -- > 2.20.1 >