On Tue, Oct 31, 2017 at 06:32:18PM +0800, Jason Wang wrote: > This patch introduces an eBPF based queue selection method based on > the flow steering policy ops. Userspace could load an eBPF program > through TUNSETSTEERINGEBPF. This gives much more flexibility compare > to simple but hard coded policy in kernel. > > Signed-off-by: Jason Wang <jasow...@redhat.com> > --- > drivers/net/tun.c | 79 > ++++++++++++++++++++++++++++++++++++++++++++- > include/uapi/linux/if_tun.h | 2 ++ > 2 files changed, 80 insertions(+), 1 deletion(-) > > diff --git a/drivers/net/tun.c b/drivers/net/tun.c > index ab109ff..4bdde21 100644 > --- a/drivers/net/tun.c > +++ b/drivers/net/tun.c > @@ -191,6 +191,20 @@ struct tun_steering_ops { > u32 data); > }; > > +void tun_steering_xmit_nop(struct tun_struct *tun, struct sk_buff *skb) > +{ > +} > + > +u32 tun_steering_pre_rx_nop(struct tun_struct *tun, struct sk_buff *skb) > +{ > + return 0; > +} > + > +void tun_steering_post_rx_nop(struct tun_struct *tun, struct tun_file *tfile, > + u32 data) > +{ > +} > + > struct tun_flow_entry { > struct hlist_node hash_link; > struct rcu_head rcu; > @@ -241,6 +255,7 @@ struct tun_struct { > u32 rx_batched; > struct tun_pcpu_stats __percpu *pcpu_stats; > struct bpf_prog __rcu *xdp_prog; > + struct bpf_prog __rcu *steering_prog; > struct tun_steering_ops *steering_ops; > }; > > @@ -576,6 +591,19 @@ static u16 tun_automq_select_queue(struct tun_struct > *tun, struct sk_buff *skb) > return txq; > } > > +static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb) > +{ > + struct bpf_prog *prog; > + u16 ret = 0; > + > + rcu_read_lock(); > + prog = rcu_dereference(tun->steering_prog); > + if (prog) > + ret = bpf_prog_run_clear_cb(prog, skb); > + rcu_read_unlock(); > + > + return ret % tun->numqueues; > +} > static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, > void *accel_priv, select_queue_fallback_t fallback) > { > @@ -2017,6 +2045,20 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, > struct iov_iter *to) > return ret; > } > > +static void __tun_set_steering_ebpf(struct tun_struct *tun, > + struct bpf_prog *new) > +{ > + struct bpf_prog *old; > + > + old = rtnl_dereference(tun->steering_prog); > + rcu_assign_pointer(tun->steering_prog, new); > + > + if (old) { > + synchronize_net(); > + bpf_prog_destroy(old); > + } > +} > +
Is this really called under rtnl? If no then rtnl_dereference is wrong. If yes I'm not sure you can call synchronize_net under rtnl. > static void tun_free_netdev(struct net_device *dev) > { > struct tun_struct *tun = netdev_priv(dev); > @@ -2025,6 +2067,7 @@ static void tun_free_netdev(struct net_device *dev) > free_percpu(tun->pcpu_stats); > tun_flow_uninit(tun); > security_tun_dev_free_security(tun->security); > + __tun_set_steering_ebpf(tun, NULL); > } > > static void tun_setup(struct net_device *dev) > @@ -2159,6 +2202,13 @@ static struct tun_steering_ops tun_automq_ops = { > .post_rx = tun_automq_post_rx, > }; > > +static struct tun_steering_ops tun_ebpf_ops = { > + .select_queue = tun_ebpf_select_queue, > + .xmit = tun_steering_xmit_nop, > + .pre_rx = tun_steering_pre_rx_nop, > + .post_rx = tun_steering_post_rx_nop, > +}; > + > static int tun_flags(struct tun_struct *tun) > { > return tun->flags & (TUN_FEATURES | IFF_PERSIST | IFF_TUN | IFF_TAP); > @@ -2311,6 +2361,7 @@ static int tun_set_iff(struct net *net, struct file > *file, struct ifreq *ifr) > tun->filter_attached = false; > tun->sndbuf = tfile->socket.sk->sk_sndbuf; > tun->rx_batched = 0; > + RCU_INIT_POINTER(tun->steering_prog, NULL); > > tun->pcpu_stats = netdev_alloc_pcpu_stats(struct > tun_pcpu_stats); > if (!tun->pcpu_stats) { > @@ -2503,6 +2554,23 @@ static int tun_set_queue(struct file *file, struct > ifreq *ifr) > return ret; > } > > +static int tun_set_steering_ebpf(struct tun_struct *tun, void __user *data) > +{ > + struct bpf_prog *prog; > + u32 fd; > + > + if (copy_from_user(&fd, data, sizeof(fd))) > + return -EFAULT; > + > + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); > + if (IS_ERR(prog)) > + return PTR_ERR(prog); > + > + __tun_set_steering_ebpf(tun, prog); > + > + return 0; > +} > + > static long __tun_chr_ioctl(struct file *file, unsigned int cmd, > unsigned long arg, int ifreq_len) > { > @@ -2785,6 +2853,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned > int cmd, > case TUN_STEERING_AUTOMQ: > tun->steering_ops = &tun_automq_ops; > break; > + case TUN_STEERING_EBPF: > + tun->steering_ops = &tun_ebpf_ops; > + break; > default: > ret = -EFAULT; > } > @@ -2794,6 +2865,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned > int cmd, > ret = 0; > if (tun->steering_ops == &tun_automq_ops) > steering = TUN_STEERING_AUTOMQ; > + else if (tun->steering_ops == &tun_ebpf_ops) > + steering = TUN_STEERING_EBPF; > else > BUG(); > if (copy_to_user(argp, &steering, sizeof(steering))) > @@ -2802,11 +2875,15 @@ static long __tun_chr_ioctl(struct file *file, > unsigned int cmd, > > case TUNGETSTEERINGFEATURES: > ret = 0; > - steering = TUN_STEERING_AUTOMQ; > + steering = TUN_STEERING_AUTOMQ | TUN_STEERING_EBPF; > if (copy_to_user(argp, &steering, sizeof(steering))) > ret = -EFAULT; > break; > > + case TUNSETSTEERINGEBPF: > + ret = tun_set_steering_ebpf(tun, argp); > + break; > + > default: > ret = -EINVAL; > break; > diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h > index 109760e..927f7e4 100644 > --- a/include/uapi/linux/if_tun.h > +++ b/include/uapi/linux/if_tun.h > @@ -59,6 +59,7 @@ > #define TUNSETSTEERING _IOW('T', 224, unsigned int) > #define TUNGETSTEERING _IOR('T', 225, unsigned int) > #define TUNGETSTEERINGFEATURES _IOR('T', 226, unsigned int) > +#define TUNSETSTEERINGEBPF _IOR('T', 227, int) > > /* TUNSETIFF ifr flags */ > #define IFF_TUN 0x0001 > @@ -112,5 +113,6 @@ struct tun_filter { > }; > > #define TUN_STEERING_AUTOMQ 0x01 /* Automatic flow steering */ > +#define TUN_STEERING_EBPF 0x02 /* eBPF based flow steering */ > > #endif /* _UAPI__IF_TUN_H */ > -- > 2.7.4