Add vxlan multiplexer for vxlan packet handler. This is required for
openvswitch vxlan support.

Signed-off-by: Pravin B Shelar <pshe...@nicira.com>
---
 drivers/net/vxlan.c |  208 ++++++++++++++++++++++++++++++++++++++-------------
 include/net/vxlan.h |   17 ++++
 2 files changed, 172 insertions(+), 53 deletions(-)
 create mode 100644 include/net/vxlan.h

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 62a4438..bcfa933 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -42,6 +42,7 @@
 #include <net/inet_ecn.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/vxlan.h>
 
 #define VXLAN_VERSION  "0.1"
 
@@ -66,19 +67,24 @@ struct vxlanhdr {
 };
 
 /* UDP port for VXLAN traffic. */
-static unsigned int vxlan_port __read_mostly = 8472;
-module_param_named(udp_port, vxlan_port, uint, 0444);
+static unsigned int vxlan_portno __read_mostly = 8472;
+module_param_named(udp_port, vxlan_portno, uint, 0444);
 MODULE_PARM_DESC(udp_port, "Destination UDP port");
 
 static bool log_ecn_error = true;
 module_param(log_ecn_error, bool, 0644);
 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
+#define MAX_VXLAN_PORTS        8
+
 /* per-net private data for this module */
 static unsigned int vxlan_net_id;
+
+static DEFINE_MUTEX(vxlan_mutex);
 struct vxlan_net {
-       struct socket     *sock;        /* UDP encap socket */
        struct hlist_head vni_list[VNI_HASH_SIZE];
+       struct vxlan_port __rcu *vxlan_ports[MAX_VXLAN_PORTS];
+       struct vxlan_port port;
 };
 
 struct vxlan_rdst {
@@ -191,7 +197,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct 
vxlan_dev *vxlan,
        if (send_ip && nla_put_be32(skb, NDA_DST, rdst->remote_ip))
                goto nla_put_failure;
 
-       if (rdst->remote_port && rdst->remote_port != vxlan_port &&
+       if (rdst->remote_port && rdst->remote_port != vxlan_portno &&
            nla_put_be16(skb, NDA_PORT, rdst->remote_port))
                goto nla_put_failure;
        if (rdst->remote_vni != vxlan->vni &&
@@ -458,7 +464,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr 
*tb[],
                        return -EINVAL;
                port = nla_get_u32(tb[NDA_PORT]);
        } else
-               port = vxlan_port;
+               port = vxlan_portno;
 
        if (tb[NDA_VNI]) {
                if (nla_len(tb[NDA_VNI]) != sizeof(u32))
@@ -570,7 +576,7 @@ static void vxlan_snoop(struct net_device *dev,
                err = vxlan_fdb_create(vxlan, src_mac, src_ip,
                                       NUD_REACHABLE,
                                       NLM_F_EXCL|NLM_F_CREATE,
-                                      vxlan_port, vxlan->vni, 0);
+                                      vxlan_portno, vxlan->vni, 0);
                spin_unlock(&vxlan->hash_lock);
        }
 }
@@ -603,7 +609,7 @@ static int vxlan_join_group(struct net_device *dev)
 {
        struct vxlan_dev *vxlan = netdev_priv(dev);
        struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
-       struct sock *sk = vn->sock->sk;
+       struct sock *sk = vn->port.sock->sk;
        struct ip_mreqn mreq = {
                .imr_multiaddr.s_addr   = vxlan->gaddr,
                .imr_ifindex            = vxlan->link,
@@ -631,7 +637,7 @@ static int vxlan_leave_group(struct net_device *dev)
        struct vxlan_dev *vxlan = netdev_priv(dev);
        struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
        int err = 0;
-       struct sock *sk = vn->sock->sk;
+       struct sock *sk = vn->port.sock->sk;
        struct ip_mreqn mreq = {
                .imr_multiaddr.s_addr   = vxlan->gaddr,
                .imr_ifindex            = vxlan->link,
@@ -654,12 +660,9 @@ static int vxlan_leave_group(struct net_device *dev)
 /* Callback from net/ipv4/udp.c to receive packets */
 static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 {
-       struct iphdr *oip;
+       struct vxlan_net *vn = net_generic(dev_net(skb->dev), vxlan_net_id);
        struct vxlanhdr *vxh;
-       struct vxlan_dev *vxlan;
-       struct pcpu_tstats *stats;
-       __u32 vni;
-       int err;
+       int i;
 
        /* pop off outer UDP header */
        __skb_pull(skb, sizeof(struct udphdr));
@@ -678,13 +681,43 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct 
sk_buff *skb)
        }
 
        __skb_pull(skb, sizeof(struct vxlanhdr));
+       rcu_read_lock();
+       for (i = 0; i < MAX_VXLAN_PORTS; i++) {
+               struct vxlan_port *port = rcu_dereference(vn->vxlan_ports[i]);
+               int ret;
+
+               if (!port)
+                       continue;
+               if (port->portno != udp_hdr(skb)->dest)
+                       continue;
+               ret = port->vx_rcv(port, skb, vxh->vx_vni);
+               if (ret == PACKET_RCVD) {
+                       rcu_read_unlock();
+                       return 0;
+               }
+       }
+       rcu_read_unlock();
+error:
+       /* Put UDP header back */
+       __skb_push(skb, sizeof(struct udphdr));
+       return 1;
+}
+
+static int vxlan_rcv(struct vxlan_port *port, struct sk_buff *skb,
+                    __be32 _vni)
+{
+       struct vxlan_dev *vxlan;
+       struct net *net = dev_net(skb->dev);
+       struct iphdr *oip;
+       struct pcpu_tstats *stats;
+       int err;
+       int vni;
 
        /* Is this VNI defined? */
-       vni = ntohl(vxh->vx_vni) >> 8;
-       vxlan = vxlan_find_vni(sock_net(sk), vni);
+       vni = ntohl(_vni) >> 8;
+       vxlan = vxlan_find_vni(net, vni);
        if (!vxlan) {
-               netdev_dbg(skb->dev, "unknown vni %d\n", vni);
-               goto drop;
+               return PACKET_REJECT;
        }
 
        if (!pskb_may_pull(skb, ETH_HLEN)) {
@@ -741,16 +774,11 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct 
sk_buff *skb)
 
        netif_rx(skb);
 
-       return 0;
-error:
-       /* Put UDP header back */
-       __skb_push(skb, sizeof(struct udphdr));
-
-       return 1;
+       return PACKET_RCVD;
 drop:
        /* Consume bad packet */
        kfree_skb(skb);
-       return 0;
+       return PACKET_RCVD;
 }
 
 static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
@@ -869,10 +897,9 @@ static void vxlan_sock_free(struct sk_buff *skb)
 }
 
 /* On transmit, associate with the tunnel socket */
-static void vxlan_set_owner(struct net_device *dev, struct sk_buff *skb)
+static void vxlan_set_owner(const struct vxlan_port *port, struct sk_buff *skb)
 {
-       struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
-       struct sock *sk = vn->sock->sk;
+       struct sock *sk = port->sock->sk;
 
        skb_orphan(skb);
        sock_hold(sk);
@@ -915,6 +942,7 @@ static int handle_offloads(struct sk_buff *skb)
 static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                                  struct vxlan_rdst *rdst, bool did_rsc)
 {
+       struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
        struct vxlan_dev *vxlan = netdev_priv(dev);
        struct rtable *rt;
        const struct iphdr *old_iph;
@@ -929,7 +957,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, 
struct net_device *dev,
        __be16 df = 0;
        __u8 tos, ttl;
 
-       dst_port = rdst->remote_port ? rdst->remote_port : vxlan_port;
+       dst_port = rdst->remote_port ? rdst->remote_port : vxlan_portno;
        vni = rdst->remote_vni;
        dst = rdst->remote_ip;
 
@@ -1032,7 +1060,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, 
struct net_device *dev,
 
        nf_reset(skb);
 
-       vxlan_set_owner(dev, skb);
+       vxlan_set_owner(&vn->port, skb);
 
        if (handle_offloads(skb))
                goto drop;
@@ -1077,7 +1105,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct 
net_device *dev)
        f = vxlan_find_mac(vxlan, eth->h_dest);
        if (f == NULL) {
                did_rsc = false;
-               group.remote_port = vxlan_port;
+               group.remote_port = vxlan_portno;
                group.remote_vni = vxlan->vni;
                group.remote_ip = vxlan->gaddr;
                group.remote_ifindex = vxlan->link;
@@ -1526,37 +1554,35 @@ static struct rtnl_link_ops vxlan_link_ops 
__read_mostly = {
        .fill_info      = vxlan_fill_info,
 };
 
-static __net_init int vxlan_init_net(struct net *net)
+struct socket *vxlan_create_socket(struct net *net, __be16 portno)
 {
-       struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+       struct socket *sock;
        struct sock *sk;
        struct sockaddr_in vxlan_addr = {
                .sin_family = AF_INET,
                .sin_addr.s_addr = htonl(INADDR_ANY),
        };
        int rc;
-       unsigned h;
 
        /* Create UDP socket for encapsulation receive. */
-       rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &vn->sock);
+       rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
        if (rc < 0) {
                pr_debug("UDP socket create failed\n");
-               return rc;
+               return ERR_PTR(rc);
        }
        /* Put in proper namespace */
-       sk = vn->sock->sk;
+       sk = sock->sk;
        sk_change_net(sk, net);
 
-       vxlan_addr.sin_port = htons(vxlan_port);
+       vxlan_addr.sin_port = portno;
 
-       rc = kernel_bind(vn->sock, (struct sockaddr *) &vxlan_addr,
+       rc = kernel_bind(sock, (struct sockaddr *) &vxlan_addr,
                         sizeof(vxlan_addr));
        if (rc < 0) {
                pr_debug("bind for UDP socket %pI4:%u (%d)\n",
                         &vxlan_addr.sin_addr, ntohs(vxlan_addr.sin_port), rc);
                sk_release_kernel(sk);
-               vn->sock = NULL;
-               return rc;
+               return ERR_PTR(rc);
        }
 
        /* Disable multicast loopback */
@@ -1567,28 +1593,104 @@ static __net_init int vxlan_init_net(struct net *net)
        udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
        udp_encap_enable();
 
-       for (h = 0; h < VNI_HASH_SIZE; ++h)
-               INIT_HLIST_HEAD(&vn->vni_list[h]);
+       return sock;
+}
 
-       return 0;
+int vxlan_add_handler(struct net *net, struct vxlan_port *new)
+{
+       struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+       struct socket *sock = NULL;
+       int i, p = -1;
+       int err;
+
+       mutex_lock(&vxlan_mutex);
+       for (i = 0; i < MAX_VXLAN_PORTS; i++) {
+               struct vxlan_port *port = vn->vxlan_ports[i];
+
+               if (!port) {
+                       if (p < 0)
+                               p = i;
+                       continue;
+               }
+               if (port->portno == new->portno)
+                       sock = port->sock;
+       }
+
+       if (p < 0) {
+               err = -EBUSY;
+               goto out;
+       }
+
+       if (!sock) {
+               sock = vxlan_create_socket(net, new->portno);
+               if (IS_ERR(sock)) {
+                       err = PTR_ERR(sock);
+                       goto out;
+               }
+       }
+
+       new->sock = sock;
+       rcu_assign_pointer(vn->vxlan_ports[p], new);
+       err = 0;
+out:
+       mutex_unlock(&vxlan_mutex);
+       return err;
 }
+EXPORT_SYMBOL_GPL(vxlan_add_handler);
 
-static __net_exit void vxlan_exit_net(struct net *net)
+void vxlan_del_handler(struct net *net, const struct vxlan_port *del)
+{
+       struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+       bool inuse = false;
+       int i;
+
+       mutex_lock(&vxlan_mutex);
+
+       /* check if sock is still used. */
+       for (i = 0; i < MAX_VXLAN_PORTS; i++) {
+               struct vxlan_port *port = vn->vxlan_ports[i];
+
+               if (!port)
+                       continue;
+
+               if (port == del) {
+                       RCU_INIT_POINTER(vn->vxlan_ports[i], NULL);
+                       synchronize_net();
+                       continue;
+               }
+               if (port->portno == del->portno)
+                       inuse = true;
+       }
+
+       if (!inuse)
+               sk_release_kernel(del->sock->sk);
+       mutex_unlock(&vxlan_mutex);
+}
+EXPORT_SYMBOL_GPL(vxlan_del_handler);
+
+static __net_init int vxlan_init_net(struct net *net)
 {
        struct vxlan_net *vn = net_generic(net, vxlan_net_id);
-       struct vxlan_dev *vxlan;
        unsigned h;
+       int err;
+
+       vn->port.portno = htons(vxlan_portno);
+       vn->port.vx_rcv = vxlan_rcv;
+
+       err = vxlan_add_handler(net, &vn->port);
+       if (err)
+               return err;
 
-       rtnl_lock();
        for (h = 0; h < VNI_HASH_SIZE; ++h)
-               hlist_for_each_entry(vxlan, &vn->vni_list[h], hlist)
-                       dev_close(vxlan->dev);
-       rtnl_unlock();
+               INIT_HLIST_HEAD(&vn->vni_list[h]);
 
-       if (vn->sock) {
-               sk_release_kernel(vn->sock->sk);
-               vn->sock = NULL;
-       }
+       return 0;
+}
+
+static __net_exit void vxlan_exit_net(struct net *net)
+{
+       struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+       vxlan_del_handler(net, &vn->port);
 }
 
 static struct pernet_operations vxlan_net_ops = {
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
new file mode 100644
index 0000000..dae9619
--- /dev/null
+++ b/include/net/vxlan.h
@@ -0,0 +1,17 @@
+#ifndef __NET_IP_VXLAN_H
+#define __NET_IP_VXLAN_H 1
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/udp.h>
+
+struct vxlan_port {
+       int (*vx_rcv)(struct vxlan_port *port, struct sk_buff *skb, __be32 key);
+       void *user_data;
+       struct socket *sock;
+       __be16 portno;
+};
+
+int vxlan_add_handler(struct net *net, struct vxlan_port *);
+void vxlan_del_handler(struct net *net, const struct vxlan_port *port);
+#endif
-- 
1.7.1

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to