Add vxlan multiplexer for vxlan packet handler. This is required for openvswitch vxlan support.
Signed-off-by: Pravin B Shelar <pshe...@nicira.com> --- drivers/net/vxlan.c | 208 ++++++++++++++++++++++++++++++++++++++------------- include/net/vxlan.h | 17 ++++ 2 files changed, 172 insertions(+), 53 deletions(-) create mode 100644 include/net/vxlan.h diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 62a4438..bcfa933 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -42,6 +42,7 @@ #include <net/inet_ecn.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/vxlan.h> #define VXLAN_VERSION "0.1" @@ -66,19 +67,24 @@ struct vxlanhdr { }; /* UDP port for VXLAN traffic. */ -static unsigned int vxlan_port __read_mostly = 8472; -module_param_named(udp_port, vxlan_port, uint, 0444); +static unsigned int vxlan_portno __read_mostly = 8472; +module_param_named(udp_port, vxlan_portno, uint, 0444); MODULE_PARM_DESC(udp_port, "Destination UDP port"); static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); +#define MAX_VXLAN_PORTS 8 + /* per-net private data for this module */ static unsigned int vxlan_net_id; + +static DEFINE_MUTEX(vxlan_mutex); struct vxlan_net { - struct socket *sock; /* UDP encap socket */ struct hlist_head vni_list[VNI_HASH_SIZE]; + struct vxlan_port __rcu *vxlan_ports[MAX_VXLAN_PORTS]; + struct vxlan_port port; }; struct vxlan_rdst { @@ -191,7 +197,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, if (send_ip && nla_put_be32(skb, NDA_DST, rdst->remote_ip)) goto nla_put_failure; - if (rdst->remote_port && rdst->remote_port != vxlan_port && + if (rdst->remote_port && rdst->remote_port != vxlan_portno && nla_put_be16(skb, NDA_PORT, rdst->remote_port)) goto nla_put_failure; if (rdst->remote_vni != vxlan->vni && @@ -458,7 +464,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; port = nla_get_u32(tb[NDA_PORT]); } else - port = vxlan_port; + port = vxlan_portno; if (tb[NDA_VNI]) { if (nla_len(tb[NDA_VNI]) != sizeof(u32)) @@ -570,7 +576,7 @@ static void vxlan_snoop(struct net_device *dev, err = vxlan_fdb_create(vxlan, src_mac, src_ip, NUD_REACHABLE, NLM_F_EXCL|NLM_F_CREATE, - vxlan_port, vxlan->vni, 0); + vxlan_portno, vxlan->vni, 0); spin_unlock(&vxlan->hash_lock); } } @@ -603,7 +609,7 @@ static int vxlan_join_group(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); - struct sock *sk = vn->sock->sk; + struct sock *sk = vn->port.sock->sk; struct ip_mreqn mreq = { .imr_multiaddr.s_addr = vxlan->gaddr, .imr_ifindex = vxlan->link, @@ -631,7 +637,7 @@ static int vxlan_leave_group(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); int err = 0; - struct sock *sk = vn->sock->sk; + struct sock *sk = vn->port.sock->sk; struct ip_mreqn mreq = { .imr_multiaddr.s_addr = vxlan->gaddr, .imr_ifindex = vxlan->link, @@ -654,12 +660,9 @@ static int vxlan_leave_group(struct net_device *dev) /* Callback from net/ipv4/udp.c to receive packets */ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { - struct iphdr *oip; + struct vxlan_net *vn = net_generic(dev_net(skb->dev), vxlan_net_id); struct vxlanhdr *vxh; - struct vxlan_dev *vxlan; - struct pcpu_tstats *stats; - __u32 vni; - int err; + int i; /* pop off outer UDP header */ __skb_pull(skb, sizeof(struct udphdr)); @@ -678,13 +681,43 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) } __skb_pull(skb, sizeof(struct vxlanhdr)); + rcu_read_lock(); + for (i = 0; i < MAX_VXLAN_PORTS; i++) { + struct vxlan_port *port = rcu_dereference(vn->vxlan_ports[i]); + int ret; + + if (!port) + continue; + if (port->portno != udp_hdr(skb)->dest) + continue; + ret = port->vx_rcv(port, skb, vxh->vx_vni); + if (ret == PACKET_RCVD) { + rcu_read_unlock(); + return 0; + } + } + rcu_read_unlock(); +error: + /* Put UDP header back */ + __skb_push(skb, sizeof(struct udphdr)); + return 1; +} + +static int vxlan_rcv(struct vxlan_port *port, struct sk_buff *skb, + __be32 _vni) +{ + struct vxlan_dev *vxlan; + struct net *net = dev_net(skb->dev); + struct iphdr *oip; + struct pcpu_tstats *stats; + int err; + int vni; /* Is this VNI defined? */ - vni = ntohl(vxh->vx_vni) >> 8; - vxlan = vxlan_find_vni(sock_net(sk), vni); + vni = ntohl(_vni) >> 8; + vxlan = vxlan_find_vni(net, vni); if (!vxlan) { - netdev_dbg(skb->dev, "unknown vni %d\n", vni); - goto drop; + return PACKET_REJECT; } if (!pskb_may_pull(skb, ETH_HLEN)) { @@ -741,16 +774,11 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) netif_rx(skb); - return 0; -error: - /* Put UDP header back */ - __skb_push(skb, sizeof(struct udphdr)); - - return 1; + return PACKET_RCVD; drop: /* Consume bad packet */ kfree_skb(skb); - return 0; + return PACKET_RCVD; } static int arp_reduce(struct net_device *dev, struct sk_buff *skb) @@ -869,10 +897,9 @@ static void vxlan_sock_free(struct sk_buff *skb) } /* On transmit, associate with the tunnel socket */ -static void vxlan_set_owner(struct net_device *dev, struct sk_buff *skb) +static void vxlan_set_owner(const struct vxlan_port *port, struct sk_buff *skb) { - struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); - struct sock *sk = vn->sock->sk; + struct sock *sk = port->sock->sk; skb_orphan(skb); sock_hold(sk); @@ -915,6 +942,7 @@ static int handle_offloads(struct sk_buff *skb) static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_rdst *rdst, bool did_rsc) { + struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); struct vxlan_dev *vxlan = netdev_priv(dev); struct rtable *rt; const struct iphdr *old_iph; @@ -929,7 +957,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, __be16 df = 0; __u8 tos, ttl; - dst_port = rdst->remote_port ? rdst->remote_port : vxlan_port; + dst_port = rdst->remote_port ? rdst->remote_port : vxlan_portno; vni = rdst->remote_vni; dst = rdst->remote_ip; @@ -1032,7 +1060,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, nf_reset(skb); - vxlan_set_owner(dev, skb); + vxlan_set_owner(&vn->port, skb); if (handle_offloads(skb)) goto drop; @@ -1077,7 +1105,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) f = vxlan_find_mac(vxlan, eth->h_dest); if (f == NULL) { did_rsc = false; - group.remote_port = vxlan_port; + group.remote_port = vxlan_portno; group.remote_vni = vxlan->vni; group.remote_ip = vxlan->gaddr; group.remote_ifindex = vxlan->link; @@ -1526,37 +1554,35 @@ static struct rtnl_link_ops vxlan_link_ops __read_mostly = { .fill_info = vxlan_fill_info, }; -static __net_init int vxlan_init_net(struct net *net) +struct socket *vxlan_create_socket(struct net *net, __be16 portno) { - struct vxlan_net *vn = net_generic(net, vxlan_net_id); + struct socket *sock; struct sock *sk; struct sockaddr_in vxlan_addr = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_ANY), }; int rc; - unsigned h; /* Create UDP socket for encapsulation receive. */ - rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &vn->sock); + rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (rc < 0) { pr_debug("UDP socket create failed\n"); - return rc; + return ERR_PTR(rc); } /* Put in proper namespace */ - sk = vn->sock->sk; + sk = sock->sk; sk_change_net(sk, net); - vxlan_addr.sin_port = htons(vxlan_port); + vxlan_addr.sin_port = portno; - rc = kernel_bind(vn->sock, (struct sockaddr *) &vxlan_addr, + rc = kernel_bind(sock, (struct sockaddr *) &vxlan_addr, sizeof(vxlan_addr)); if (rc < 0) { pr_debug("bind for UDP socket %pI4:%u (%d)\n", &vxlan_addr.sin_addr, ntohs(vxlan_addr.sin_port), rc); sk_release_kernel(sk); - vn->sock = NULL; - return rc; + return ERR_PTR(rc); } /* Disable multicast loopback */ @@ -1567,28 +1593,104 @@ static __net_init int vxlan_init_net(struct net *net) udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv; udp_encap_enable(); - for (h = 0; h < VNI_HASH_SIZE; ++h) - INIT_HLIST_HEAD(&vn->vni_list[h]); + return sock; +} - return 0; +int vxlan_add_handler(struct net *net, struct vxlan_port *new) +{ + struct vxlan_net *vn = net_generic(net, vxlan_net_id); + struct socket *sock = NULL; + int i, p = -1; + int err; + + mutex_lock(&vxlan_mutex); + for (i = 0; i < MAX_VXLAN_PORTS; i++) { + struct vxlan_port *port = vn->vxlan_ports[i]; + + if (!port) { + if (p < 0) + p = i; + continue; + } + if (port->portno == new->portno) + sock = port->sock; + } + + if (p < 0) { + err = -EBUSY; + goto out; + } + + if (!sock) { + sock = vxlan_create_socket(net, new->portno); + if (IS_ERR(sock)) { + err = PTR_ERR(sock); + goto out; + } + } + + new->sock = sock; + rcu_assign_pointer(vn->vxlan_ports[p], new); + err = 0; +out: + mutex_unlock(&vxlan_mutex); + return err; } +EXPORT_SYMBOL_GPL(vxlan_add_handler); -static __net_exit void vxlan_exit_net(struct net *net) +void vxlan_del_handler(struct net *net, const struct vxlan_port *del) +{ + struct vxlan_net *vn = net_generic(net, vxlan_net_id); + bool inuse = false; + int i; + + mutex_lock(&vxlan_mutex); + + /* check if sock is still used. */ + for (i = 0; i < MAX_VXLAN_PORTS; i++) { + struct vxlan_port *port = vn->vxlan_ports[i]; + + if (!port) + continue; + + if (port == del) { + RCU_INIT_POINTER(vn->vxlan_ports[i], NULL); + synchronize_net(); + continue; + } + if (port->portno == del->portno) + inuse = true; + } + + if (!inuse) + sk_release_kernel(del->sock->sk); + mutex_unlock(&vxlan_mutex); +} +EXPORT_SYMBOL_GPL(vxlan_del_handler); + +static __net_init int vxlan_init_net(struct net *net) { struct vxlan_net *vn = net_generic(net, vxlan_net_id); - struct vxlan_dev *vxlan; unsigned h; + int err; + + vn->port.portno = htons(vxlan_portno); + vn->port.vx_rcv = vxlan_rcv; + + err = vxlan_add_handler(net, &vn->port); + if (err) + return err; - rtnl_lock(); for (h = 0; h < VNI_HASH_SIZE; ++h) - hlist_for_each_entry(vxlan, &vn->vni_list[h], hlist) - dev_close(vxlan->dev); - rtnl_unlock(); + INIT_HLIST_HEAD(&vn->vni_list[h]); - if (vn->sock) { - sk_release_kernel(vn->sock->sk); - vn->sock = NULL; - } + return 0; +} + +static __net_exit void vxlan_exit_net(struct net *net) +{ + struct vxlan_net *vn = net_generic(net, vxlan_net_id); + vxlan_del_handler(net, &vn->port); } static struct pernet_operations vxlan_net_ops = { diff --git a/include/net/vxlan.h b/include/net/vxlan.h new file mode 100644 index 0000000..dae9619 --- /dev/null +++ b/include/net/vxlan.h @@ -0,0 +1,17 @@ +#ifndef __NET_IP_VXLAN_H +#define __NET_IP_VXLAN_H 1 + +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/udp.h> + +struct vxlan_port { + int (*vx_rcv)(struct vxlan_port *port, struct sk_buff *skb, __be32 key); + void *user_data; + struct socket *sock; + __be16 portno; +}; + +int vxlan_add_handler(struct net *net, struct vxlan_port *); +void vxlan_del_handler(struct net *net, const struct vxlan_port *port); +#endif -- 1.7.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev