This option makes possible to programatically bind sockets to netdevices.
With the help of this option sockets of VRF unaware applications
could be distributed between multiple VRFs with eBPF sock_ops program.
This let the applications benefit from the multiple possible routes.

Signed-off-by: Ferenc Fejes <fe...@inf.elte.hu>
---
 net/core/filter.c | 39 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 822d662f97ef..25dac75bfc5d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4248,6 +4248,9 @@ static const struct bpf_func_proto 
bpf_get_socket_uid_proto = {
 static int _bpf_setsockopt(struct sock *sk, int level, int optname,
                           char *optval, int optlen, u32 flags)
 {
+       char devname[IFNAMSIZ];
+       struct net *net;
+       int ifindex;
        int ret = 0;
        int val;
 
@@ -4257,7 +4260,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, 
int optname,
        sock_owned_by_me(sk);
 
        if (level == SOL_SOCKET) {
-               if (optlen != sizeof(int))
+               if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
                        return -EINVAL;
                val = *((int *)optval);
 
@@ -4298,6 +4301,40 @@ static int _bpf_setsockopt(struct sock *sk, int level, 
int optname,
                                sk_dst_reset(sk);
                        }
                        break;
+               case SO_BINDTODEVICE:
+                       ret = -ENOPROTOOPT;
+#ifdef CONFIG_NETDEVICES
+                       net = sock_net(sk);
+                       strncpy(devname, optval,
+                               min_t(long, optlen, IFNAMSIZ-1));
+                       devname[IFNAMSIZ-1] = 0;
+                       ifindex = 0;
+                       if (devname[0] != '\0') {
+                               struct net_device *dev;
+
+                               rcu_read_lock();
+                               dev = dev_get_by_name_rcu(net, devname);
+                               if (dev)
+                                       ifindex = dev->ifindex;
+                               rcu_read_unlock();
+                               ret = -ENODEV;
+                               if (!dev)
+                                       break;
+                       }
+                       ret = -EPERM;
+                       if (sk->sk_bound_dev_if &&
+                               !ns_capable(net->user_ns, CAP_NET_RAW))
+                               break;
+                       ret = -EINVAL;
+                       if (ifindex < 0)
+                               break;
+                       sk->sk_bound_dev_if = ifindex;
+                       if (sk->sk_prot->rehash)
+                               sk->sk_prot->rehash(sk);
+                       sk_dst_reset(sk);
+                       ret = 0;
+#endif
+                       break;
                default:
                        ret = -EINVAL;
                }
-- 
2.17.1

Reply via email to