On Mon, Jun 20, 2016 at 08:36:43PM +0800, Yi Yang wrote: > Current Linux kernel git tree has included VxLAN-gpe implementation > > author Jiri Benc <jb...@redhat.com> > committer David S. Miller <da...@davemloft.net> > commit e1e5314de08ba6003b358125eafc9ad9e75a950c (patch) > tree 1e18cdabf1c9d9ef17e26c6480e629465447f77f /drivers/net/vxlan.c > parent a6d5bbf34efa8330af7b0b1dba0f38148516ed97 (diff) > vxlan: implement GPE > > This patch is to port it to ovs in order that people also can use VxLAN-gpe > even if they don't replace their kernels with latest Linux kernel. > > Signed-off-by: Johnson Li <johnson...@intel.com> > Signed-off-by: Yi Yang <yi.y.y...@intel.com>
Hi, Yi Yang. Before adding the OVS_VXLAN_EXT_GPE extension to the out-of-tree module, you should send it to the mainline kernel. Besides, you need a very good justification why you can't wait for my patchset to be accepted and have VXLAN-GPE enabled using rtnetlink. Also, I would split any changes to the datapath and userspace parts of the code into multiple commits. Meanwhile, you could backport only the upstreamed portions of VXLAN-GPE and send that as a single commit, no userspace changes. Cascardo. > --- > datapath/linux/compat/include/linux/if_link.h | 4 + > datapath/linux/compat/include/linux/openvswitch.h | 1 + > datapath/linux/compat/include/net/vxlan.h | 73 ++++ > datapath/linux/compat/vxlan.c | 461 > ++++++++++++++++++++-- > lib/dpif-netlink.c | 5 + > lib/netdev-vport.c | 4 +- > 6 files changed, 512 insertions(+), 36 deletions(-) > > diff --git a/datapath/linux/compat/include/linux/if_link.h > b/datapath/linux/compat/include/linux/if_link.h > index 6209dcb..de87769 100644 > --- a/datapath/linux/compat/include/linux/if_link.h > +++ b/datapath/linux/compat/include/linux/if_link.h > @@ -100,6 +100,10 @@ enum { > IFLA_VXLAN_REMCSUM_NOPARTIAL, > #define IFLA_VXLAN_COLLECT_METADATA rpl_IFLA_VXLAN_COLLECT_METADATA > IFLA_VXLAN_COLLECT_METADATA, > +#define IFLA_VXLAN_LABEL rpl_IFLA_VXLAN_LABEL > + IFLA_VXLAN_LABEL, > +#define IFLA_VXLAN_GPE rpl_IFLA_VXLAN_GPE > + IFLA_VXLAN_GPE, > #define __IFLA_VXLAN_MAX rpl___IFLA_VXLAN_MAX > __IFLA_VXLAN_MAX > }; > diff --git a/datapath/linux/compat/include/linux/openvswitch.h > b/datapath/linux/compat/include/linux/openvswitch.h > index edfa7a1..761d9c6 100644 > --- a/datapath/linux/compat/include/linux/openvswitch.h > +++ b/datapath/linux/compat/include/linux/openvswitch.h > @@ -287,6 +287,7 @@ enum ovs_vport_attr { > enum { > OVS_VXLAN_EXT_UNSPEC, > OVS_VXLAN_EXT_GBP, /* Flag or __u32 */ > + OVS_VXLAN_EXT_GPE, /* Flag, Generic Protocol Extension */ > __OVS_VXLAN_EXT_MAX, > }; > > diff --git a/datapath/linux/compat/include/net/vxlan.h > b/datapath/linux/compat/include/net/vxlan.h > index 75a5a7a..b3f45c4 100644 > --- a/datapath/linux/compat/include/net/vxlan.h > +++ b/datapath/linux/compat/include/net/vxlan.h > @@ -84,6 +84,66 @@ struct vxlanhdr_gbp { > #define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) > #define VXLAN_GBP_ID_MASK (0xFFFF) > > +/* > + * VXLAN Generic Protocol Extension (VXLAN_F_GPE): > + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > + * |R|R|Ver|I|P|R|O| Reserved |Next Protocol | > + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > + * | VXLAN Network Identifier (VNI) | Reserved | > + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > + * > + * Ver = Version. Indicates VXLAN GPE protocol version. > + * > + * P = Next Protocol Bit. The P bit is set to indicate that the > + * Next Protocol field is present. > + * > + * O = OAM Flag Bit. The O bit is set to indicate that the packet > + * is an OAM packet. > + * > + * Next Protocol = This 8 bit field indicates the protocol header > + * immediately following the VXLAN GPE header. > + * > + * https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01 > + */ > + > +struct vxlanhdr_gpe { > +#if defined(__LITTLE_ENDIAN_BITFIELD) > + u8 oam_flag:1, > + reserved_flags1:1, > + np_applied:1, > + instance_applied:1, > + version:2, > +reserved_flags2:2; > +#elif defined(__BIG_ENDIAN_BITFIELD) > + u8 reserved_flags2:2, > + version:2, > + instance_applied:1, > + np_applied:1, > + reserved_flags1:1, > + oam_flag:1; > +#endif > + u8 reserved_flags3; > + u8 reserved_flags4; > + u8 next_protocol; > + __be32 vx_vni; > +}; > + > +/* VXLAN-GPE header flags. */ > +#define VXLAN_HF_VER (BIT(29) | BIT(28)) > +#define VXLAN_HF_NP (BIT(26)) > +#define VXLAN_HF_OAM (BIT(24)) > +#define VXLAN_HF_GPE (BIT(26)) > + > +#define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \ > + (0xFF)) > + > +/* VXLAN-GPE header Next Protocol. */ > +#define VXLAN_GPE_NP_IPV4 0x01 > +#define VXLAN_GPE_NP_IPV6 0x02 > +#define VXLAN_GPE_NP_ETHERNET 0x03 > +#define VXLAN_GPE_NP_NSH 0x04 > +#define ETH_P_NSH 0x894f > + > /* VXLAN protocol header: > * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > * |G|R|R|R|I|R|R|C| Reserved | > @@ -167,6 +227,7 @@ struct vxlan_config { > __u16 port_max; > __u8 tos; > __u8 ttl; > + __be32 label; > u32 flags; > unsigned long age_interval; > unsigned int addrmax; > @@ -205,15 +266,27 @@ struct vxlan_dev { > #define VXLAN_F_GBP 0x800 > #define VXLAN_F_REMCSUM_NOPARTIAL 0x1000 > #define VXLAN_F_COLLECT_METADATA 0x2000 > +#define VXLAN_F_GPE 0x4000 > +#define VXLAN_F_UDP_ZERO_CSUM_TX VXLAN_F_UDP_CSUM > > /* Flags that are used in the receive path. These flags must match in > * order for a socket to be shareable > */ > #define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \ > + VXLAN_F_GPE | \ > VXLAN_F_UDP_ZERO_CSUM6_RX | \ > VXLAN_F_REMCSUM_RX | \ > VXLAN_F_REMCSUM_NOPARTIAL | \ > VXLAN_F_COLLECT_METADATA) > + > +/* Flags that can be set together with VXLAN_F_GPE. */ > +#define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \ > + VXLAN_F_IPV6 | \ > + VXLAN_F_UDP_CSUM | \ > + VXLAN_F_UDP_ZERO_CSUM6_TX | \ > + VXLAN_F_UDP_ZERO_CSUM6_RX | \ > + VXLAN_F_COLLECT_METADATA) > + > #define vxlan_dev_create rpl_vxlan_dev_create > struct net_device *rpl_vxlan_dev_create(struct net *net, const char *name, > u8 name_assign_type, struct vxlan_config > *conf); > diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c > index 4faa18f..570d2d9 100644 > --- a/datapath/linux/compat/vxlan.c > +++ b/datapath/linux/compat/vxlan.c > @@ -812,6 +812,45 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff > *skb, struct vxlanhdr *vh, > } > #endif > > +static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed, > + __be32 *protocol, > + struct sk_buff *skb, u32 vxflags) > +{ > + struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed; > + > + /* Need to have Next Protocol set for interfaces in GPE mode. */ > + if (!gpe->np_applied) > + return false; > + /* "The initial version is 0. If a receiver does not support the > + * version indicated it MUST drop the packet. > + */ > + if (gpe->version != 0) > + return false; > + /* "When the O bit is set to 1, the packet is an OAM packet and OAM > + * processing MUST occur." However, we don't implement OAM > + * processing, thus drop the packet. > + */ > + if (gpe->oam_flag) > + return false; > + > + switch (gpe->next_protocol) { > + case VXLAN_GPE_NP_IPV4: > + *protocol = htons(ETH_P_IP); > + break; > + case VXLAN_GPE_NP_IPV6: > + *protocol = htons(ETH_P_IPV6); > + break; > + case VXLAN_GPE_NP_ETHERNET: > + *protocol = htons(ETH_P_TEB); > + break; > + default: > + return false; > + } > + > + unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS; > + return true; > +} > + > static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, > struct vxlan_metadata *md, u32 vni, > struct metadata_dst *tun_dst) > @@ -822,6 +861,9 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct > sk_buff *skb, > struct pcpu_sw_netstats *stats; > union vxlan_addr saddr; > int err = 0; > + struct vxlanhdr unparsed; > + __be32 protocol = htons(ETH_P_TEB); > + bool raw_proto = false; > > /* For flow based devices, map all packets to VNI 0 */ > if (vs->flags & VXLAN_F_COLLECT_METADATA) > @@ -832,14 +874,35 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct > sk_buff *skb, > if (!vxlan) > goto drop; > > - skb_reset_mac_header(skb); > - skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev))); > - skb->protocol = eth_type_trans(skb, vxlan->dev); > - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); > + /* For backwards compatibility, only allow reserved fields to be > + * used by VXLAN extensions if explicitly requested. > + */ > + if (vs->flags & VXLAN_F_GPE) { > + unparsed = *(struct vxlanhdr *)(udp_hdr(skb) + 1); > + if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags)) > + goto drop; > + if (protocol != htons(ETH_P_TEB)) { > + raw_proto = true; > + } > + } > > - /* Ignore packet loops (and multicast echo) */ > - if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr)) > - goto drop; > + if (!raw_proto) { > + skb_reset_mac_header(skb); > + skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev))); > + skb->protocol = eth_type_trans(skb, vxlan->dev); > + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); > + > + /* Ignore packet loops (and multicast echo) */ > + if (ether_addr_equal(eth_hdr(skb)->h_source, > vxlan->dev->dev_addr)) > + goto drop; > + > + if ((vxlan->flags & VXLAN_F_LEARN) && > + vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source)) > + goto drop; > + } else { > + skb->dev = vxlan->dev; > + skb->pkt_type = PACKET_HOST; > + } > > /* Get data from the outer IP header */ > if (vxlan_get_sk_family(vs) == AF_INET) { > @@ -861,10 +924,6 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct > sk_buff *skb, > goto drop; > } > > - if ((vxlan->flags & VXLAN_F_LEARN) && > - vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source)) > - goto drop; > - > skb_reset_network_header(skb); > /* In flow-based mode, GBP is carried in dst_metadata */ > if (!(vs->flags & VXLAN_F_COLLECT_METADATA)) > @@ -908,6 +967,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct > sk_buff *skb) > struct metadata_dst dst; > char buf[sizeof(struct metadata_dst) + sizeof(*md)]; > } buf; > + struct vxlanhdr unparsed; > + __be32 protocol = htons(ETH_P_TEB); > > /* Need Vxlan and inner Ethernet header to be present */ > if (!pskb_may_pull(skb, VXLAN_HLEN)) > @@ -924,14 +985,25 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct > sk_buff *skb) > goto bad_flags; > } > > - if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB))) > - goto drop; > - vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1); > - > vs = rcu_dereference_sk_user_data(sk); > if (!vs) > goto drop; > > + /* For backwards compatibility, only allow reserved fields to be > + * used by VXLAN extensions if explicitly requested. > + */ > + if (vs->flags & VXLAN_F_GPE) { > + unparsed = *(struct vxlanhdr *)(udp_hdr(skb) + 1); > + if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags)) > + goto drop; > + buf.dst.u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT; > + flags &= ~VXLAN_GPE_USED_BITS; > + } > + > + if (iptunnel_pull_header(skb, VXLAN_HLEN, protocol)) > + goto drop; > + vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1); > + > #ifdef HAVE_VXLAN_HF_RCO > if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) { > vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni, > @@ -1023,6 +1095,33 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, > u32 vxflags, > gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK); > } > > +static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags, > + __be16 protocol) > +{ > + struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh; > + > + vxh->vx_flags |= htonl(VXLAN_HF_GPE); > + gpe->np_applied = 1; > + gpe->version = 0; > + gpe->oam_flag = 0; > + > + switch (protocol) { > + case htons(ETH_P_IP): > + gpe->next_protocol = VXLAN_GPE_NP_IPV4; > + return 0; > + case htons(ETH_P_IPV6): > + gpe->next_protocol = VXLAN_GPE_NP_IPV6; > + return 0; > + case htons(ETH_P_TEB): > + gpe->next_protocol = VXLAN_GPE_NP_ETHERNET; > + return 0; > + case htons(ETH_P_NSH): > + gpe->next_protocol = VXLAN_GPE_NP_NSH; > + return 0; > + } > + return -EPFNOSUPPORT; > +} > + > #if IS_ENABLED(CONFIG_IPV6) > static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk, > struct sk_buff *skb, > @@ -1036,6 +1135,7 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, > struct sock *sk, > int err; > bool udp_sum = !(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX); > int type = 0; > + __be16 inner_protocol = htons(ETH_P_TEB); > > if ((vxflags & VXLAN_F_REMCSUM_TX) && > skb->ip_summed == CHECKSUM_PARTIAL) { > @@ -1106,8 +1206,14 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, > struct sock *sk, > > if (vxflags & VXLAN_F_GBP) > vxlan_build_gbp_hdr(vxh, vxflags, md); > + if (vxflags & VXLAN_F_GPE) { > + err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol); > + if (err < 0) > + goto err; > + inner_protocol = skb->protocol; > + } > > - ovs_skb_set_inner_protocol(skb, htons(ETH_P_TEB)); > + ovs_skb_set_inner_protocol(skb, inner_protocol); > > udp_tunnel6_xmit_skb(dst, sk, skb, dev, saddr, daddr, prio, > ttl, src_port, dst_port, > @@ -1129,6 +1235,7 @@ static int vxlan_xmit_skb(struct rtable *rt, struct > sock *sk, struct sk_buff *sk > int err; > bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM); > int type = 0; > + __be16 inner_protocol = htons(ETH_P_TEB); > > if ((vxflags & VXLAN_F_REMCSUM_TX) && > skb->ip_summed == CHECKSUM_PARTIAL) { > @@ -1191,8 +1298,14 @@ static int vxlan_xmit_skb(struct rtable *rt, struct > sock *sk, struct sk_buff *sk > } > if (vxflags & VXLAN_F_GBP) > vxlan_build_gbp_hdr(vxh, vxflags, md); > + if (vxflags & VXLAN_F_GPE) { > + err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol); > + if (err < 0) > + return err; > + inner_protocol = skb->protocol; > + } > > - ovs_skb_set_inner_protocol(skb, htons(ETH_P_TEB)); > + ovs_skb_set_inner_protocol(skb, inner_protocol); > > return udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos, > ttl, df, src_port, dst_port, xnet, > @@ -1419,7 +1532,7 @@ tx_free: > * > * Outer IP header inherits ECN and DF from inner header. > * Outer UDP destination is the VXLAN assigned port. > - * source port is based on hash of flow > + * source port is based on hash of flow > */ > netdev_tx_t rpl_vxlan_xmit(struct sk_buff *skb) > { > @@ -1648,7 +1761,7 @@ static netdev_tx_t vxlan_dev_xmit(struct sk_buff *skb, > struct net_device *dev) > return NETDEV_TX_OK; > } > > -static const struct net_device_ops vxlan_netdev_ops = { > +static const struct net_device_ops vxlan_netdev_ether_ops = { > .ndo_init = vxlan_init, > .ndo_uninit = vxlan_uninit, > .ndo_get_stats64 = ip_tunnel_get_stats64, > @@ -1661,6 +1774,16 @@ static const struct net_device_ops vxlan_netdev_ops = { > .ndo_set_mac_address = eth_mac_addr, > }; > > +static const struct net_device_ops vxlan_netdev_raw_ops = { > + .ndo_init = vxlan_init, > + .ndo_uninit = vxlan_uninit, > + .ndo_get_stats64 = ip_tunnel_get_stats64, > + .ndo_open = vxlan_open, > + .ndo_stop = vxlan_stop, > + .ndo_start_xmit = vxlan_dev_xmit, > + .ndo_change_mtu = vxlan_change_mtu, > +}; > + > /* Info for udev, that this is a virtual tunnel endpoint */ > static struct device_type vxlan_type = { > .name = "vxlan", > @@ -1675,7 +1798,7 @@ static void vxlan_setup(struct net_device *dev) > eth_hw_addr_random(dev); > ether_setup(dev); > > - dev->netdev_ops = &vxlan_netdev_ops; > + dev->netdev_ops = &vxlan_netdev_ether_ops; > dev->destructor = free_netdev; > SET_NETDEV_DEVTYPE(dev, &vxlan_type); > > @@ -1712,8 +1835,51 @@ static void vxlan_setup(struct net_device *dev) > INIT_HLIST_HEAD(&vxlan->fdb_head[h]); > } > > +static void vxlan_ether_setup(struct net_device *dev) > +{ > + dev->priv_flags &= ~IFF_TX_SKB_SHARING; > + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; > + dev->netdev_ops = &vxlan_netdev_ether_ops; > +} > + > +static void vxlan_raw_setup(struct net_device *dev) > +{ > + dev->header_ops = NULL; > + dev->type = ARPHRD_NONE; > + dev->hard_header_len = 0; > + dev->addr_len = 0; > + dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; > + dev->netdev_ops = &vxlan_netdev_raw_ops; > +} > + > static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { > - [IFLA_VXLAN_PORT] = { .type = NLA_U16 }, > + [IFLA_VXLAN_ID] = { .type = NLA_U32 }, > + [IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, > + [IFLA_VXLAN_GROUP6] = { .len = sizeof(struct in6_addr) }, > + [IFLA_VXLAN_LINK] = { .type = NLA_U32 }, > + [IFLA_VXLAN_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, > + [IFLA_VXLAN_LOCAL6] = { .len = sizeof(struct in6_addr) }, > + [IFLA_VXLAN_TOS] = { .type = NLA_U8 }, > + [IFLA_VXLAN_TTL] = { .type = NLA_U8 }, > + [IFLA_VXLAN_LABEL] = { .type = NLA_U32 }, > + [IFLA_VXLAN_LEARNING] = { .type = NLA_U8 }, > + [IFLA_VXLAN_AGEING] = { .type = NLA_U32 }, > + [IFLA_VXLAN_LIMIT] = { .type = NLA_U32 }, > + [IFLA_VXLAN_PORT_RANGE] = { .len = sizeof(struct > ifla_vxlan_port_range) }, > + [IFLA_VXLAN_PROXY] = { .type = NLA_U8 }, > + [IFLA_VXLAN_RSC] = { .type = NLA_U8 }, > + [IFLA_VXLAN_L2MISS] = { .type = NLA_U8 }, > + [IFLA_VXLAN_L3MISS] = { .type = NLA_U8 }, > + [IFLA_VXLAN_COLLECT_METADATA] = { .type = NLA_U8 }, > + [IFLA_VXLAN_PORT] = { .type = NLA_U16 }, > + [IFLA_VXLAN_UDP_CSUM] = { .type = NLA_U8 }, > + [IFLA_VXLAN_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, > + [IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, > + [IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 }, > + [IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 }, > + [IFLA_VXLAN_GBP] = { .type = NLA_FLAG, }, > + [IFLA_VXLAN_GPE] = { .type = NLA_FLAG, }, > + [IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG }, > }; > > static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) > @@ -1897,6 +2063,21 @@ static int vxlan_dev_configure(struct net *src_net, > struct net_device *dev, > __be16 default_port = vxlan->cfg.dst_port; > struct net_device *lowerdev = NULL; > > + if (conf->flags & VXLAN_F_GPE) { > + if (conf->flags & ~VXLAN_F_ALLOWED_GPE) > + return -EINVAL; > + /* For now, allow GPE only together with COLLECT_METADATA. > + * This can be relaxed later; in such case, the other side > + * of the PtP link will have to be provided. > + */ > + if (!(conf->flags & VXLAN_F_COLLECT_METADATA)) > + return -EINVAL; > + > + vxlan_raw_setup(dev); > + } else { > + vxlan_ether_setup(dev); > + } > + > vxlan->net = src_net; > > dst->remote_vni = conf->vni; > @@ -2023,7 +2204,136 @@ static int vxlan_newlink(struct net_device *dev, > struct nlattr *tb[], struct nlattr *data[]) > #endif > { > - return -EINVAL; > + struct vxlan_config conf; > + int err; > + > + memset(&conf, 0, sizeof(conf)); > + > + if (data[IFLA_VXLAN_ID]) > + conf.vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID])); > + > + if (data[IFLA_VXLAN_GROUP]) { > + conf.remote_ip.sin.sin_addr.s_addr = > nla_get_in_addr(data[IFLA_VXLAN_GROUP]); > + } else if (data[IFLA_VXLAN_GROUP6]) { > + if (!IS_ENABLED(CONFIG_IPV6)) > + return -EPFNOSUPPORT; > + > + conf.remote_ip.sin6.sin6_addr = > nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]); > + conf.remote_ip.sa.sa_family = AF_INET6; > + } > + > + if (data[IFLA_VXLAN_LOCAL]) { > + conf.saddr.sin.sin_addr.s_addr = > nla_get_in_addr(data[IFLA_VXLAN_LOCAL]); > + conf.saddr.sa.sa_family = AF_INET; > + } else if (data[IFLA_VXLAN_LOCAL6]) { > + if (!IS_ENABLED(CONFIG_IPV6)) > + return -EPFNOSUPPORT; > + > + /* TODO: respect scope id */ > + conf.saddr.sin6.sin6_addr = > nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]); > + conf.saddr.sa.sa_family = AF_INET6; > + } > + > + if (data[IFLA_VXLAN_LINK]) > + conf.remote_ifindex = nla_get_u32(data[IFLA_VXLAN_LINK]); > + > + if (data[IFLA_VXLAN_TOS]) > + conf.tos = nla_get_u8(data[IFLA_VXLAN_TOS]); > + > + if (data[IFLA_VXLAN_TTL]) > + conf.ttl = nla_get_u8(data[IFLA_VXLAN_TTL]); > + > + if (data[IFLA_VXLAN_LABEL]) > + conf.label = nla_get_be32(data[IFLA_VXLAN_LABEL]) & > + IPV6_FLOWLABEL_MASK; > + > + if (!data[IFLA_VXLAN_LEARNING] || nla_get_u8(data[IFLA_VXLAN_LEARNING])) > + conf.flags |= VXLAN_F_LEARN; > + > + if (data[IFLA_VXLAN_AGEING]) > + conf.age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]); > + > + if (data[IFLA_VXLAN_PROXY] && nla_get_u8(data[IFLA_VXLAN_PROXY])) > + conf.flags |= VXLAN_F_PROXY; > + > + if (data[IFLA_VXLAN_RSC] && nla_get_u8(data[IFLA_VXLAN_RSC])) > + conf.flags |= VXLAN_F_RSC; > + > + if (data[IFLA_VXLAN_L2MISS] && nla_get_u8(data[IFLA_VXLAN_L2MISS])) > + conf.flags |= VXLAN_F_L2MISS; > + > + if (data[IFLA_VXLAN_L3MISS] && nla_get_u8(data[IFLA_VXLAN_L3MISS])) > + conf.flags |= VXLAN_F_L3MISS; > + > + if (data[IFLA_VXLAN_LIMIT]) > + conf.addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]); > + > + if (data[IFLA_VXLAN_COLLECT_METADATA] && > + nla_get_u8(data[IFLA_VXLAN_COLLECT_METADATA])) > + conf.flags |= VXLAN_F_COLLECT_METADATA; > + > + if (data[IFLA_VXLAN_PORT_RANGE]) { > + const struct ifla_vxlan_port_range *p > + = nla_data(data[IFLA_VXLAN_PORT_RANGE]); > + conf.port_min = ntohs(p->low); > + conf.port_max = ntohs(p->high); > + } > + > + if (data[IFLA_VXLAN_PORT]) > + conf.dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]); > + > + if (data[IFLA_VXLAN_UDP_CSUM] && > + !nla_get_u8(data[IFLA_VXLAN_UDP_CSUM])) > + conf.flags |= VXLAN_F_UDP_ZERO_CSUM_TX; > + > + if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] && > + nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX])) > + conf.flags |= VXLAN_F_UDP_ZERO_CSUM6_TX; > + > + if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] && > + nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX])) > + conf.flags |= VXLAN_F_UDP_ZERO_CSUM6_RX; > + > + if (data[IFLA_VXLAN_REMCSUM_TX] && > + nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX])) > + conf.flags |= VXLAN_F_REMCSUM_TX; > + > + if (data[IFLA_VXLAN_REMCSUM_RX] && > + nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX])) > + conf.flags |= VXLAN_F_REMCSUM_RX; > + > + if (data[IFLA_VXLAN_GBP]) > + conf.flags |= VXLAN_F_GBP; > + > + if (data[IFLA_VXLAN_GPE]) > + conf.flags |= VXLAN_F_GPE; > + > + if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) > + conf.flags |= VXLAN_F_REMCSUM_NOPARTIAL; > + > + if (tb[IFLA_MTU]) > + conf.mtu = nla_get_u32(tb[IFLA_MTU]); > + > + err = vxlan_dev_configure(src_net, dev, &conf); > + switch (err) { > + case -ENODEV: > + pr_info("ifindex %d does not exist\n", conf.remote_ifindex); > + break; > + > + case -EPERM: > + pr_info("IPv6 is disabled via sysctl\n"); > + break; > + > + case -EEXIST: > + pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni)); > + break; > + > + case -EINVAL: > + pr_info("unsupported combination of extensions\n"); > + break; > + } > + > + return err; > } > > #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39) > @@ -2047,20 +2357,21 @@ static void vxlan_dellink(struct net_device *dev) > static size_t vxlan_get_size(const struct net_device *dev) > { > > - return nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_ID */ > + return nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_ID */ > nla_total_size(sizeof(struct in6_addr)) + /* > IFLA_VXLAN_GROUP{6} */ > - nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */ > + nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */ > nla_total_size(sizeof(struct in6_addr)) + /* > IFLA_VXLAN_LOCAL{6} */ > - nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */ > - nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */ > - nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */ > - nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */ > - nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */ > - nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L2MISS */ > - nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L3MISS */ > - nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_COLLECT_METADATA > */ > - nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */ > - nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */ > + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */ > + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */ > + nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */ > + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */ > + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */ > + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */ > + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L2MISS */ > + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L3MISS */ > + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_COLLECT_METADATA > */ > + nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */ > + nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */ > nla_total_size(sizeof(struct ifla_vxlan_port_range)) + > nla_total_size(sizeof(__be16)) + /* IFLA_VXLAN_PORT */ > nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */ > @@ -2074,8 +2385,88 @@ static size_t vxlan_get_size(const struct net_device > *dev) > static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) > { > const struct vxlan_dev *vxlan = netdev_priv(dev); > + const struct vxlan_rdst *dst = &vxlan->default_dst; > + struct ifla_vxlan_port_range ports = { > + .low = htons(vxlan->cfg.port_min), > + .high = htons(vxlan->cfg.port_max), > + }; > + > + if (nla_put_u32(skb, IFLA_VXLAN_ID, be32_to_cpu(dst->remote_vni))) > + goto nla_put_failure; > + > + if (!vxlan_addr_any(&dst->remote_ip)) { > + if (dst->remote_ip.sa.sa_family == AF_INET) { > + if (nla_put_in_addr(skb, IFLA_VXLAN_GROUP, > + dst->remote_ip.sin.sin_addr.s_addr)) > + goto nla_put_failure; > +#if IS_ENABLED(CONFIG_IPV6) > + } else { > + if (nla_put_in6_addr(skb, IFLA_VXLAN_GROUP6, > + &dst->remote_ip.sin6.sin6_addr)) > + goto nla_put_failure; > +#endif > + } > + } > + > + if (dst->remote_ifindex && nla_put_u32(skb, IFLA_VXLAN_LINK, > dst->remote_ifindex)) > + goto nla_put_failure; > + > + if (!vxlan_addr_any(&vxlan->cfg.saddr)) { > + if (vxlan->cfg.saddr.sa.sa_family == AF_INET) { > + if (nla_put_in_addr(skb, IFLA_VXLAN_LOCAL, > + > vxlan->cfg.saddr.sin.sin_addr.s_addr)) > + goto nla_put_failure; > +#if IS_ENABLED(CONFIG_IPV6) > + } else { > + if (nla_put_in6_addr(skb, IFLA_VXLAN_LOCAL6, > + &vxlan->cfg.saddr.sin6.sin6_addr)) > + goto nla_put_failure; > +#endif > + } > + } > + > + if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) || > + nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) || > + nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) || > + nla_put_u8(skb, IFLA_VXLAN_LEARNING, > + !!(vxlan->flags & VXLAN_F_LEARN)) || > + nla_put_u8(skb, IFLA_VXLAN_PROXY, > + !!(vxlan->flags & VXLAN_F_PROXY)) || > + nla_put_u8(skb, IFLA_VXLAN_RSC, !!(vxlan->flags & VXLAN_F_RSC)) || > + nla_put_u8(skb, IFLA_VXLAN_L2MISS, > + !!(vxlan->flags & VXLAN_F_L2MISS)) || > + nla_put_u8(skb, IFLA_VXLAN_L3MISS, > + !!(vxlan->flags & VXLAN_F_L3MISS)) || > + nla_put_u8(skb, IFLA_VXLAN_COLLECT_METADATA, > + !!(vxlan->flags & VXLAN_F_COLLECT_METADATA)) || > + nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->cfg.age_interval) || > + nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) || > + nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) || > + nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM, > + !(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) || > + nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX, > + !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) || > + nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, > + !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) || > + nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX, > + !!(vxlan->flags & VXLAN_F_REMCSUM_TX)) || > + nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX, > + !!(vxlan->flags & VXLAN_F_REMCSUM_RX))) > + goto nla_put_failure; > + > + if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports)) > + goto nla_put_failure; > + > + if (vxlan->flags & VXLAN_F_GBP && > + nla_put_flag(skb, IFLA_VXLAN_GBP)) > + goto nla_put_failure; > + > + if (vxlan->flags & VXLAN_F_GPE && > + nla_put_flag(skb, IFLA_VXLAN_GPE)) > + goto nla_put_failure; > > - if (nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port)) > + if (vxlan->flags & VXLAN_F_REMCSUM_NOPARTIAL && > + nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL)) > goto nla_put_failure; > > return 0; > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c > index 1e88c13..2b07e54 100644 > --- a/lib/dpif-netlink.c > +++ b/lib/dpif-netlink.c > @@ -988,6 +988,8 @@ netdev_geneve_destroy(const char *name) > #define IFLA_VXLAN_UDP_ZERO_CSUM6_RX 20 > #define IFLA_VXLAN_GBP 23 > #define IFLA_VXLAN_COLLECT_METADATA 25 > +#define IFLA_VXLAN_LABEL 26 > +#define IFLA_VXLAN_GPE 27 > #endif > > #if IFLA_GRE_MAX < 18 > @@ -1037,6 +1039,9 @@ netdev_vxlan_create(struct netdev *netdev) > if (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GBP)) { > nl_msg_put_flag(&request, IFLA_VXLAN_GBP); > } > + else if (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GPE)) { > + nl_msg_put_flag(&request, IFLA_VXLAN_GPE); > + } > nl_msg_put_be16(&request, IFLA_VXLAN_PORT, tnl_cfg->dst_port); > nl_msg_end_nested(&request, infodata_off); > nl_msg_end_nested(&request, linkinfo_off); > diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c > index ec5c44e..fa56af5 100644 > --- a/lib/netdev-vport.c > +++ b/lib/netdev-vport.c > @@ -541,7 +541,9 @@ set_tunnel_config(struct netdev *dev_, const struct smap > *args) > while (ext) { > if (!strcmp(type, "vxlan") && !strcmp(ext, "gbp")) { > tnl_cfg.exts |= (1 << OVS_VXLAN_EXT_GBP); > - } else { > + } else if (!strcmp(type, "vxlan") && !strcmp(ext, "gpe")) { > + tnl_cfg.exts |= (1 << OVS_VXLAN_EXT_GPE); > + } else { > VLOG_WARN("%s: unknown extension '%s'", name, ext); > } > > -- > 1.9.3 > _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev