Hi, The LISP tunneling support as of now is not yet ready for upstreaming, for reasons outlined in this message:
http://openvswitch.org/pipermail/dev/2013-February/025459.html One solution to the above issues is to make OVS less Ethernet specific, meaning that it should accept and work with packets/flows without an Ethernet header. At a high level, we would introduce layer 3 (tunnel) vports, and LISP would be such a vport. Whenever a packet that ingressed on a L2 vport needs to egress on a L3 vport, we apply the internal pop_eth action automatically. For packets going from L3 vports to L2 vports, a push_eth action would add a MAC header, with addresses determined by ARP resolution in user space. I attached a patch to this email with proposed changes to the datapath to make this happen. I didn't use git-send-email since it is still early work, and I don't expect anyone to apply it, just wanted to get some early feedback on some of the design decisions. One such decision is how to handle the flow key. I set all fields in key->eth to 0, except the type, because we still need to know what kind of L3 packet do we have. Since a lot of code is accessing key->eth.type, this is easier than having this information in a different place, although it would be more elegant to set this field to 0 as well. Now, in order to differentiate flows with mac addresses set to 0 and flows without an Ethernet header, I added a boolean field to tun_key, to mark L3 flows. However, if we expect to have non-tunneled L3 ports (I couldn't find a good reason for this) then we should move it out into the main flow key structure. Let me know what you think. Thanks, -Lori
diff --git a/datapath/actions.c b/datapath/actions.c index 0dac658..fcb80a5 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -138,6 +138,26 @@ static int set_eth_addr(struct sk_buff *skb, return 0; } +static int pop_eth(struct sk_buff *skb) +{ + skb_pull(skb, skb_network_offset(skb)); + return 0; +} + +static int push_eth(struct sk_buff *skb, const struct ovs_action_push_eth *ethh) +{ + int err; + + skb_push(skb, ETH_HLEN); + + err = set_eth_addr(skb, ðh->addresses); + if (unlikely(err)) + return err; + + eth_hdr(skb)->h_proto = ethh->eth_type; + return 0; +} + static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh, __be32 *addr, __be32 new_addr) { @@ -504,6 +524,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, err = pop_vlan(skb); break; + case OVS_ACTION_ATTR_PUSH_ETH: + err = push_eth(skb, nla_data(a)); + break; + + case OVS_ACTION_ATTR_POP_ETH: + err = pop_eth(skb); + break; + case OVS_ACTION_ATTR_SET: err = execute_set_action(skb, nla_data(a)); break; diff --git a/datapath/flow.c b/datapath/flow.c index 7f897bd..98346d5 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -628,24 +628,27 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, skb_reset_mac_header(skb); - /* Link layer. We are guaranteed to have at least the 14 byte Ethernet - * header in the linear data area. - */ - eth = eth_hdr(skb); - memcpy(key->eth.src, eth->h_source, ETH_ALEN); - memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); + /* Link layer. */ + if (key->tun_key.is_layer3) { + /* The L3 tunnel should set the inner packet protocol on the skb */ + key->eth.type = skb->protocol; + } else { + eth = eth_hdr(skb); + memcpy(key->eth.src, eth->h_source, ETH_ALEN); + memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); - __skb_pull(skb, 2 * ETH_ALEN); + __skb_pull(skb, 2 * ETH_ALEN); - if (vlan_tx_tag_present(skb)) - key->eth.tci = htons(vlan_get_tci(skb)); - else if (eth->h_proto == htons(ETH_P_8021Q)) - if (unlikely(parse_vlan(skb, key))) - return -ENOMEM; + if (vlan_tx_tag_present(skb)) + key->eth.tci = htons(vlan_get_tci(skb)); + else if (eth->h_proto == htons(ETH_P_8021Q)) + if (unlikely(parse_vlan(skb, key))) + return -ENOMEM; - key->eth.type = parse_ethertype(skb); - if (unlikely(key->eth.type == htons(0))) - return -ENOMEM; + key->eth.type = parse_ethertype(skb); + if (unlikely(key->eth.type == htons(0))) + return -ENOMEM; + } skb_reset_network_header(skb); __skb_push(skb, skb->data - skb_mac_header(skb)); @@ -1034,6 +1037,9 @@ int ipv4_tun_from_nlattr(const struct nlattr *attr, case OVS_TUNNEL_KEY_ATTR_CSUM: tun_key->tun_flags |= OVS_TNL_F_CSUM; break; + case OVS_TUNNEL_KEY_ATTR_LAYER3: + tun_key->is_layer3 = true; + break; default: return -EINVAL; @@ -1079,6 +1085,9 @@ int ipv4_tun_to_nlattr(struct sk_buff *skb, if ((tun_key->tun_flags & OVS_TNL_F_CSUM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) return -EMSGSIZE; + if ((tun_key->is_layer3) && + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_LAYER3)) + return -EMSGSIZE; nla_nest_end(skb, nla); return 0; @@ -1140,13 +1149,13 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, } /* Data attributes. */ - if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET))) - return -EINVAL; - attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); + if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { + eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); + memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN); + memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN); - eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); - memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN); - memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN); + attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); + } if (attrs & (1u << OVS_KEY_ATTR_ETHERTYPE) && nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q)) { @@ -1335,7 +1344,7 @@ int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, const stru int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) { struct ovs_key_ethernet *eth_key; - struct nlattr *nla, *encap; + struct nlattr *nla, *encap = NULL; if (swkey->phy.priority && nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) @@ -1353,6 +1362,9 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, swkey->phy.skb_mark)) goto nla_put_failure; + if (swkey->tun_key.is_layer3) + goto noethernet; + nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); if (!nla) goto nla_put_failure; @@ -1374,6 +1386,7 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) if (swkey->eth.type == htons(ETH_P_802_2)) goto unencap; +noethernet: if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, swkey->eth.type)) goto nla_put_failure; diff --git a/datapath/flow.h b/datapath/flow.h index dba66cf..0033fcd 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -57,6 +57,7 @@ struct ovs_key_ipv4_tunnel { u16 tun_flags; u8 ipv4_tos; u8 ipv4_ttl; + bool is_layer3; }; struct sw_flow_key { diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c index 3d0a315..a1a5d22 100644 --- a/datapath/vport-lisp.c +++ b/datapath/vport-lisp.c @@ -197,8 +197,6 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) struct iphdr *iph, *inner_iph; struct ovs_key_ipv4_tunnel tun_key; __be64 key; - struct ethhdr *ethh; - __be16 protocol; lisp_port = lisp_find_port(dev_net(skb->dev), udp_hdr(skb)->dest); if (unlikely(!lisp_port)) @@ -224,22 +222,15 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) inner_iph = (struct iphdr *)(lisph + 1); switch (inner_iph->version) { case 4: - protocol = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IP); break; case 6: - protocol = htons(ETH_P_IPV6); + skb->protocol = htons(ETH_P_IPV6); break; default: goto error; } - /* Add Ethernet header */ - ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN); - memset(ethh, 0, ETH_HLEN); - ethh->h_dest[0] = 0x02; - ethh->h_source[0] = 0x02; - ethh->h_proto = protocol; - ovs_tnl_rcv(vport_from_priv(lisp_port), skb, &tun_key); goto out; @@ -361,9 +352,6 @@ error: static int lisp_tnl_send(struct vport *vport, struct sk_buff *skb) { - int tnl_len; - int network_offset = skb_network_offset(skb); - if (unlikely(!OVS_CB(skb)->tun_key)) return -EINVAL; @@ -371,11 +359,8 @@ static int lisp_tnl_send(struct vport *vport, struct sk_buff *skb) switch (skb->protocol) { case htons(ETH_P_IP): case htons(ETH_P_IPV6): - /* Pop off "inner" Ethernet header */ - skb_pull(skb, network_offset); - tnl_len = ovs_tnl_send(vport, skb, IPPROTO_UDP, + return ovs_tnl_send(vport, skb, IPPROTO_UDP, LISP_HLEN, lisp_build_header); - return tnl_len > 0 ? tnl_len + network_offset : tnl_len; default: kfree_skb(skb); return 0; diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index e890fd8..dbff58f 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -303,6 +303,7 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_TTL, /* u8 Tunnel IP TTL. */ OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */ OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */ + OVS_TUNNEL_KEY_ATTR_LAYER3, /* No argument. Layer 3 tunnel. */ __OVS_TUNNEL_KEY_ATTR_MAX }; @@ -494,6 +495,16 @@ struct ovs_action_push_vlan { }; /** + * struct ovs_action_push_eth - %OVS_ACTION_ATTR_PUSH_ETH action argument. + * @addresses: Source and destination MAC addresses. + * @eth_type: Ethernet type + */ +struct ovs_action_push_eth { + struct ovs_key_ethernet addresses; + __be16 eth_type; +}; + +/** * enum ovs_action_attr - Action types. * * @OVS_ACTION_ATTR_OUTPUT: Output packet to port. @@ -532,6 +543,8 @@ enum ovs_action_attr { OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */ OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */ + OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */ + OVS_ACTION_ATTR_POP_ETH, /* No argument. */ __OVS_ACTION_ATTR_MAX };
_______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev