Implementation of the pop_eth and push_eth actions in the kernel, also layer 3 flow support. Jesse Gross provided feedback on a previous version of this RFC patch, all of those comments are resolved here.
Signed-off-by: Lorand Jakab <loja...@cisco.com> --- datapath/actions.c | 32 ++++++++++++++++++++++++++++++++ datapath/datapath.h | 1 + datapath/flow.c | 43 ++++++++++++++++++++++++------------------- datapath/flow.h | 1 + datapath/flow_netlink.c | 20 ++++++++++++++++++-- datapath/vport-gre.c | 1 + datapath/vport-lisp.c | 17 +++-------------- datapath/vport-netdev.c | 1 + datapath/vport-vxlan.c | 1 + 9 files changed, 82 insertions(+), 35 deletions(-) diff --git a/datapath/actions.c b/datapath/actions.c index 30ea1d2..b90e715 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -143,6 +143,30 @@ static int set_eth_addr(struct sk_buff *skb, return 0; } +static int pop_eth(struct sk_buff *skb) +{ + skb_pull(skb, skb_network_offset(skb)); + return 0; +} + +static int push_eth(struct sk_buff *skb, const struct ovs_action_push_eth *ethh) +{ + int err; + + skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + + err = set_eth_addr(skb, ðh->addresses); + if (unlikely(err)) + return err; + + eth_hdr(skb)->h_proto = ethh->eth_type; + + ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + + return 0; +} + static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh, __be32 *addr, __be32 new_addr) { @@ -546,6 +570,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, err = pop_vlan(skb); break; + case OVS_ACTION_ATTR_PUSH_ETH: + err = push_eth(skb, nla_data(a)); + break; + + case OVS_ACTION_ATTR_POP_ETH: + err = pop_eth(skb); + break; + case OVS_ACTION_ATTR_SET: err = execute_set_action(skb, nla_data(a)); break; diff --git a/datapath/datapath.h b/datapath/datapath.h index b3ae7cd..f470f14 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -105,6 +105,7 @@ struct ovs_skb_cb { struct sw_flow *flow; struct sw_flow_key *pkt_key; struct ovs_key_ipv4_tunnel *tun_key; + bool is_layer3; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) diff --git a/datapath/flow.c b/datapath/flow.c index 8be3801..e31fbed 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -458,26 +458,31 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) skb_reset_mac_header(skb); - /* Link layer. We are guaranteed to have at least the 14 byte Ethernet - * header in the linear data area. - */ - eth = eth_hdr(skb); - memcpy(key->eth.src, eth->h_source, ETH_ALEN); - memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); - - __skb_pull(skb, 2 * ETH_ALEN); - /* We are going to push all headers that we pull, so no need to - * update skb->csum here. */ - - if (vlan_tx_tag_present(skb)) - key->eth.tci = htons(vlan_get_tci(skb)); - else if (eth->h_proto == htons(ETH_P_8021Q)) - if (unlikely(parse_vlan(skb, key))) + /* Link layer. */ + if (OVS_CB(skb)->is_layer3) { + /* The receiving L3 vport should set the inner packet protocol + * on the skb. We use that here to set eth.type */ + key->noeth = true; + key->eth.type = skb->protocol; + } else { + eth = eth_hdr(skb); + memcpy(key->eth.src, eth->h_source, ETH_ALEN); + memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); + + /* We are going to push all headers that we pull, so no need to + * update skb->csum here. */ + __skb_pull(skb, 2 * ETH_ALEN); + + if (vlan_tx_tag_present(skb)) + key->eth.tci = htons(vlan_get_tci(skb)); + else if (eth->h_proto == htons(ETH_P_8021Q)) + if (unlikely(parse_vlan(skb, key))) + return -ENOMEM; + + key->eth.type = parse_ethertype(skb); + if (unlikely(key->eth.type == htons(0))) return -ENOMEM; - - key->eth.type = parse_ethertype(skb); - if (unlikely(key->eth.type == htons(0))) - return -ENOMEM; + } skb_reset_network_header(skb); __skb_push(skb, skb->data - skb_mac_header(skb)); diff --git a/datapath/flow.h b/datapath/flow.h index eafcfd8..df2fb05 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -69,6 +69,7 @@ static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, struct sw_flow_key { struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ + bool noeth; /* Packet has no Ethernet header */ struct { u32 priority; /* Packet QoS priority. */ u32 skb_mark; /* SKB mark. */ diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index 9b26528..8f71c49 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -106,7 +106,7 @@ static u16 range_n_bytes(const struct sw_flow_key_range *range) static bool match_validate(const struct sw_flow_match *match, u64 key_attrs, u64 mask_attrs) { - u64 key_expected = 1ULL << OVS_KEY_ATTR_ETHERNET; + u64 key_expected = 0; u64 mask_allowed = key_attrs; /* At most allow all key attributes */ /* The following mask attributes allowed only if they @@ -125,6 +125,7 @@ static bool match_validate(const struct sw_flow_match *match, /* Always allowed mask fields. */ mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL) | (1ULL << OVS_KEY_ATTR_IN_PORT) + | (1ULL << OVS_KEY_ATTR_ETHERNET) | (1ULL << OVS_KEY_ATTR_ETHERTYPE)); /* Check key attributes. */ @@ -521,6 +522,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple SW_FLOW_KEY_MEMCPY(match, eth.dst, eth_key->eth_dst, ETH_ALEN, is_mask); attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERNET); + } else { + SW_FLOW_KEY_PUT(match, noeth, true, is_mask); } if (attrs & (1ULL << OVS_KEY_ATTR_VLAN)) { @@ -951,7 +954,7 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, const struct sw_flow_key *output, struct sk_buff *skb) { struct ovs_key_ethernet *eth_key; - struct nlattr *nla, *encap; + struct nlattr *nla, *encap = NULL; bool is_mask = (swkey != output); if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) @@ -977,6 +980,9 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) goto nla_put_failure; + if (swkey->noeth) + goto noethernet; + nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); if (!nla) goto nla_put_failure; @@ -1011,6 +1017,7 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, goto unencap; } +noethernet: if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) goto nla_put_failure; @@ -1512,6 +1519,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr, static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, + [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth), + [OVS_ACTION_ATTR_POP_ETH] = 0, [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), [OVS_ACTION_ATTR_POP_VLAN] = 0, [OVS_ACTION_ATTR_SET] = (u32)-1, @@ -1543,6 +1552,13 @@ int ovs_nla_copy_actions(const struct nlattr *attr, break; + case OVS_ACTION_ATTR_POP_ETH: + break; + + case OVS_ACTION_ATTR_PUSH_ETH: + /* TODO May need to validate eth_type? */ + break; + case OVS_ACTION_ATTR_POP_VLAN: break; diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c index 8737b63..2737cd2 100644 --- a/datapath/vport-gre.c +++ b/datapath/vport-gre.c @@ -112,6 +112,7 @@ static int gre_rcv(struct sk_buff *skb, key = key_to_tunnel_id(tpi->key, tpi->seq); ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, filter_tnl_flags(tpi->flags)); + OVS_CB(skb)->is_layer3 = false; ovs_vport_receive(vport, skb, &tun_key); return PACKET_RCVD; } diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c index c2698ae..b56eec7 100644 --- a/datapath/vport-lisp.c +++ b/datapath/vport-lisp.c @@ -218,8 +218,6 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) struct iphdr *iph, *inner_iph; struct ovs_key_ipv4_tunnel tun_key; __be64 key; - struct ethhdr *ethh; - __be16 protocol; lisp_port = lisp_find_port(dev_net(skb->dev), udp_hdr(skb)->dest); if (unlikely(!lisp_port)) @@ -243,25 +241,16 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) inner_iph = (struct iphdr *)(lisph + 1); switch (inner_iph->version) { case 4: - protocol = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IP); break; case 6: - protocol = htons(ETH_P_IPV6); + skb->protocol = htons(ETH_P_IPV6); break; default: goto error; } - skb->protocol = protocol; - - /* Add Ethernet header */ - ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN); - memset(ethh, 0, ETH_HLEN); - ethh->h_dest[0] = 0x02; - ethh->h_source[0] = 0x02; - ethh->h_proto = protocol; - - ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + OVS_CB(skb)->is_layer3 = true; ovs_vport_receive(vport_from_priv(lisp_port), skb, &tun_key); goto out; diff --git a/datapath/vport-netdev.c b/datapath/vport-netdev.c index c15923b..14430e0 100644 --- a/datapath/vport-netdev.c +++ b/datapath/vport-netdev.c @@ -209,6 +209,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) skb_push(skb, ETH_HLEN); ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + OVS_CB(skb)->is_layer3 = false; ovs_vport_receive(vport, skb, NULL); return; diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c index ab2b6f7..dfac617 100644 --- a/datapath/vport-vxlan.c +++ b/datapath/vport-vxlan.c @@ -70,6 +70,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) key = cpu_to_be64(ntohl(vx_vni) >> 8); ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY); + OVS_CB(skb)->is_layer3 = false; ovs_vport_receive(vport, skb, &tun_key); } -- 1.8.3.4 (Apple Git-47) _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev