Implementation of the pop_eth and push_eth actions in the kernel, and layer 3 flow support.
Signed-off-by: Lorand Jakab <loja...@cisco.com> --- datapath/actions.c | 35 +++++++++++++++++++++ datapath/datapath.h | 1 + datapath/flow.c | 43 ++++++++++++++------------ datapath/flow.h | 1 + datapath/flow_netlink.c | 72 ++++++++++++++++++++++++++++++++++++------- datapath/vport-gre.c | 5 ++- datapath/vport-internal_dev.c | 5 ++- datapath/vport-lisp.c | 26 ++++------------ datapath/vport-netdev.c | 5 ++- datapath/vport-vxlan.c | 7 ++++- datapath/vport.c | 5 ++- datapath/vport.h | 2 +- 12 files changed, 151 insertions(+), 56 deletions(-) diff --git a/datapath/actions.c b/datapath/actions.c index 7fe2f54..cd0c91d 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -143,6 +143,33 @@ static int set_eth_addr(struct sk_buff *skb, return 0; } +static int pop_eth(struct sk_buff *skb) +{ + skb_pull_rcsum(skb, skb_network_offset(skb)); + skb_reset_mac_header(skb); + vlan_set_tci(skb, 0); + + OVS_CB(skb)->is_layer3 = true; + + return 0; +} + +static void push_eth(struct sk_buff *skb, const struct ovs_action_push_eth *ethh) +{ + skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + + ether_addr_copy(eth_hdr(skb)->h_source, ethh->addresses.eth_src); + ether_addr_copy(eth_hdr(skb)->h_dest, ethh->addresses.eth_dst); + + eth_hdr(skb)->h_proto = ethh->eth_type; + skb->protocol = ethh->eth_type; + + ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + + OVS_CB(skb)->is_layer3 = false; +} + static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh, __be32 *addr, __be32 new_addr) { @@ -585,6 +612,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, err = pop_vlan(skb); break; + case OVS_ACTION_ATTR_PUSH_ETH: + push_eth(skb, nla_data(a)); + break; + + case OVS_ACTION_ATTR_POP_ETH: + err = pop_eth(skb); + break; + case OVS_ACTION_ATTR_RECIRC: { struct sk_buff *recirc_skb; const bool last_action = (a->nla_len == rem); diff --git a/datapath/datapath.h b/datapath/datapath.h index a847bd9..b8dd33c 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -107,6 +107,7 @@ struct ovs_skb_cb { struct sw_flow_key *pkt_key; struct ovs_key_ipv4_tunnel *tun_key; struct vport *input_vport; + bool is_layer3; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) diff --git a/datapath/flow.c b/datapath/flow.c index c52081b..a31c529 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -458,26 +458,31 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) skb_reset_mac_header(skb); - /* Link layer. We are guaranteed to have at least the 14 byte Ethernet - * header in the linear data area. - */ - eth = eth_hdr(skb); - ether_addr_copy(key->eth.src, eth->h_source); - ether_addr_copy(key->eth.dst, eth->h_dest); - - __skb_pull(skb, 2 * ETH_ALEN); - /* We are going to push all headers that we pull, so no need to - * update skb->csum here. */ - - if (vlan_tx_tag_present(skb)) - key->eth.tci = htons(vlan_get_tci(skb)); - else if (eth->h_proto == htons(ETH_P_8021Q)) - if (unlikely(parse_vlan(skb, key))) + /* Link layer. */ + if (OVS_CB(skb)->is_layer3) { + /* The receiving L3 vport should set the inner packet protocol + * on the skb. We use that here to set eth.type */ + key->phy.noeth = true; + key->eth.type = skb->protocol; + } else { + eth = eth_hdr(skb); + ether_addr_copy(key->eth.src, eth->h_source); + ether_addr_copy(key->eth.dst, eth->h_dest); + + __skb_pull(skb, 2 * ETH_ALEN); + /* We are going to push all headers that we pull, so no need to + * update skb->csum here. */ + + if (vlan_tx_tag_present(skb)) + key->eth.tci = htons(vlan_get_tci(skb)); + else if (eth->h_proto == htons(ETH_P_8021Q)) + if (unlikely(parse_vlan(skb, key))) + return -ENOMEM; + + key->eth.type = parse_ethertype(skb); + if (unlikely(key->eth.type == htons(0))) return -ENOMEM; - - key->eth.type = parse_ethertype(skb); - if (unlikely(key->eth.type == htons(0))) - return -ENOMEM; + } skb_reset_network_header(skb); __skb_push(skb, skb->data - skb_mac_header(skb)); diff --git a/datapath/flow.h b/datapath/flow.h index 2018691..7c0407a 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -73,6 +73,7 @@ struct sw_flow_key { u32 priority; /* Packet QoS priority. */ u32 skb_mark; /* SKB mark. */ u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ + bool noeth; /* Packet has no Ethernet header */ } __packed phy; /* Safe when right after 'tun_key'. */ u32 ovs_flow_hash; /* Datapath computed hash value. */ u32 recirc_id; /* Recirculation ID. */ diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index 803a94c..849f415 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -109,14 +109,12 @@ static u16 range_n_bytes(const struct sw_flow_key_range *range) static bool match_validate(const struct sw_flow_match *match, u64 key_attrs, u64 mask_attrs) { - u64 key_expected = 1ULL << OVS_KEY_ATTR_ETHERNET; + u64 key_expected = 0; u64 mask_allowed = key_attrs; /* At most allow all key attributes */ /* The following mask attributes allowed only if they * pass the validation tests. */ - mask_allowed &= ~((1ULL << OVS_KEY_ATTR_IPV4) - | (1ULL << OVS_KEY_ATTR_IPV6) - | (1ULL << OVS_KEY_ATTR_TCP) + mask_allowed &= ~((1ULL << OVS_KEY_ATTR_TCP) | (1ULL << OVS_KEY_ATTR_TCP_FLAGS) | (1ULL << OVS_KEY_ATTR_UDP) | (1ULL << OVS_KEY_ATTR_SCTP) @@ -128,7 +126,10 @@ static bool match_validate(const struct sw_flow_match *match, /* Always allowed mask fields. */ mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL) | (1ULL << OVS_KEY_ATTR_IN_PORT) - | (1ULL << OVS_KEY_ATTR_ETHERTYPE)); + | (1ULL << OVS_KEY_ATTR_ETHERNET) + | (1ULL << OVS_KEY_ATTR_ETHERTYPE) + | (1ULL << OVS_KEY_ATTR_IPV4) + | (1ULL << OVS_KEY_ATTR_IPV6)); /* Check key attributes. */ if (match->key->eth.type == htons(ETH_P_ARP) @@ -524,8 +525,10 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, eth_key->eth_src, ETH_ALEN, is_mask); SW_FLOW_KEY_MEMCPY(match, eth.dst, eth_key->eth_dst, ETH_ALEN, is_mask); + SW_FLOW_KEY_PUT(match, phy.noeth, false, is_mask); attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERNET); - } + } else if (!is_mask) + SW_FLOW_KEY_PUT(match, phy.noeth, true, is_mask); if (attrs & (1ULL << OVS_KEY_ATTR_VLAN)) { __be16 tci; @@ -567,6 +570,18 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, if (attrs & (1ULL << OVS_KEY_ATTR_IPV4)) { const struct ovs_key_ipv4 *ipv4_key; + /* Add eth.type value for layer 3 flows */ + if (!(attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))) { + __be16 eth_type; + + if (is_mask) { + eth_type = htons(0xffff); + } else { + eth_type = htons(ETH_P_IP); + } + SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); + } + ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n", @@ -591,6 +606,18 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, if (attrs & (1ULL << OVS_KEY_ATTR_IPV6)) { const struct ovs_key_ipv6 *ipv6_key; + /* Add eth.type value for layer 3 flows */ + if (!(attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))) { + __be16 eth_type; + + if (is_mask) { + eth_type = htons(0xffff); + } else { + eth_type = htons(ETH_P_IPV6); + } + SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); + } + ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n", @@ -897,7 +924,7 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, const struct sw_flow_key *output, struct sk_buff *skb) { struct ovs_key_ethernet *eth_key; - struct nlattr *nla, *encap; + struct nlattr *nla, *encap = NULL; bool is_mask = (swkey != output); if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash)) @@ -929,6 +956,9 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) goto nla_put_failure; + if (swkey->phy.noeth) + goto noethernet; + nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); if (!nla) goto nla_put_failure; @@ -946,8 +976,7 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); if (!swkey->eth.tci) goto unencap; - } else - encap = NULL; + } if (swkey->eth.type == htons(ETH_P_802_2)) { /* @@ -966,6 +995,7 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) goto nla_put_failure; +noethernet: if (swkey->eth.type == htons(ETH_P_IP)) { struct ovs_key_ipv4 *ipv4_key; @@ -1301,7 +1331,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, static int validate_set(const struct nlattr *a, const struct sw_flow_key *flow_key, struct sw_flow_actions **sfa, - bool *set_tun) + bool *set_tun, + bool noeth) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); @@ -1322,7 +1353,11 @@ static int validate_set(const struct nlattr *a, case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: + break; + case OVS_KEY_ATTR_ETHERNET: + if (noeth) + return -EINVAL; break; case OVS_KEY_ATTR_TUNNEL: @@ -1434,6 +1469,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr, { const struct nlattr *a; int rem, err; + bool noeth = key->phy.noeth; if (depth >= SAMPLE_ACTION_DEPTH) return -EOVERFLOW; @@ -1444,6 +1480,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr, [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), [OVS_ACTION_ATTR_RECIRC] = sizeof(u32), [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, + [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth), + [OVS_ACTION_ATTR_POP_ETH] = 0, [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), [OVS_ACTION_ATTR_POP_VLAN] = 0, [OVS_ACTION_ATTR_SET] = (u32)-1, @@ -1488,10 +1526,22 @@ int ovs_nla_copy_actions(const struct nlattr *attr, break; } + case OVS_ACTION_ATTR_POP_ETH: + if (noeth) + return -EINVAL; + noeth = true; + break; + + case OVS_ACTION_ATTR_PUSH_ETH: + noeth = false; + break; + case OVS_ACTION_ATTR_POP_VLAN: break; case OVS_ACTION_ATTR_PUSH_VLAN: + if (noeth) + return -EINVAL; vlan = nla_data(a); if (vlan->vlan_tpid != htons(ETH_P_8021Q)) return -EINVAL; @@ -1503,7 +1553,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr, break; case OVS_ACTION_ATTR_SET: - err = validate_set(a, key, sfa, &skip_copy); + err = validate_set(a, key, sfa, &skip_copy, noeth); if (err) return err; break; diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c index 5d5090c..5115b3a 100644 --- a/datapath/vport-gre.c +++ b/datapath/vport-gre.c @@ -112,7 +112,7 @@ static int gre_rcv(struct sk_buff *skb, key = key_to_tunnel_id(tpi->key, tpi->seq); ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, filter_tnl_flags(tpi->flags)); - ovs_vport_receive(vport, skb, &tun_key); + ovs_vport_receive(vport, skb, &tun_key, false); return PACKET_RCVD; } @@ -289,6 +289,9 @@ static int gre_send(struct vport *vport, struct sk_buff *skb) if (unlikely(!OVS_CB(skb)->tun_key)) return -EINVAL; + if (unlikely(OVS_CB(skb)->is_layer3)) + return -EINVAL; + hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags); return __send(vport, skb, hlen, 0, 0); diff --git a/datapath/vport-internal_dev.c b/datapath/vport-internal_dev.c index 637d712..afb9a67 100644 --- a/datapath/vport-internal_dev.c +++ b/datapath/vport-internal_dev.c @@ -76,7 +76,7 @@ static struct net_device_stats *internal_dev_sys_stats(struct net_device *netdev static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) { rcu_read_lock(); - ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL); + ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL, false); rcu_read_unlock(); return 0; } @@ -236,6 +236,9 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) struct net_device *netdev = netdev_vport_priv(vport)->dev; int len; + if (unlikely(OVS_CB(skb)->is_layer3)) + return -EINVAL; + #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) if (vlan_tx_tag_present(skb)) { if (unlikely(!__vlan_put_tag(skb, diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c index a1e2b2b..46256dd 100644 --- a/datapath/vport-lisp.c +++ b/datapath/vport-lisp.c @@ -219,8 +219,6 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) struct iphdr *iph, *inner_iph; struct ovs_key_ipv4_tunnel tun_key; __be64 key; - struct ethhdr *ethh; - __be16 protocol; lisp_port = lisp_find_port(dev_net(skb->dev), udp_hdr(skb)->dest); if (unlikely(!lisp_port)) @@ -244,26 +242,16 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) inner_iph = (struct iphdr *)(lisph + 1); switch (inner_iph->version) { case 4: - protocol = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IP); break; case 6: - protocol = htons(ETH_P_IPV6); + skb->protocol = htons(ETH_P_IPV6); break; default: goto error; } - skb->protocol = protocol; - /* Add Ethernet header */ - ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN); - memset(ethh, 0, ETH_HLEN); - ethh->h_dest[0] = 0x02; - ethh->h_source[0] = 0x02; - ethh->h_proto = protocol; - - ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); - - ovs_vport_receive(vport_from_priv(lisp_port), skb, &tun_key); + ovs_vport_receive(vport_from_priv(lisp_port), skb, &tun_key, true); goto out; error: @@ -429,6 +417,9 @@ static int lisp_send(struct vport *vport, struct sk_buff *skb) if (unlikely(!OVS_CB(skb)->tun_key)) return -EINVAL; + if (unlikely(!OVS_CB(skb)->is_layer3)) + return -EINVAL; + if (skb->protocol != htons(ETH_P_IP) && skb->protocol != htons(ETH_P_IPV6)) { kfree_skb(skb); @@ -462,11 +453,6 @@ static int lisp_send(struct vport *vport, struct sk_buff *skb) goto err_free_rt; } - /* Reset l2 headers. */ - skb_pull(skb, network_offset); - skb_reset_mac_header(skb); - vlan_set_tci(skb, 0); - skb_reset_inner_headers(skb); __skb_push(skb, LISP_HLEN); diff --git a/datapath/vport-netdev.c b/datapath/vport-netdev.c index c15923b..1afef3f 100644 --- a/datapath/vport-netdev.c +++ b/datapath/vport-netdev.c @@ -209,7 +209,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) skb_push(skb, ETH_HLEN); ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); - ovs_vport_receive(vport, skb, NULL); + ovs_vport_receive(vport, skb, NULL, false); return; error: @@ -232,6 +232,9 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb) int mtu = netdev_vport->dev->mtu; int len; + if (unlikely(OVS_CB(skb)->is_layer3)) + return -EINVAL; + if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", netdev_vport->dev->name, diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c index cc9477d..9e79a9b 100644 --- a/datapath/vport-vxlan.c +++ b/datapath/vport-vxlan.c @@ -70,7 +70,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) key = cpu_to_be64(ntohl(vx_vni) >> 8); ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY); - ovs_vport_receive(vport, skb, &tun_key); + ovs_vport_receive(vport, skb, &tun_key, false); } static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) @@ -155,6 +155,11 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) goto error; } + if (unlikely(OVS_CB(skb)->is_layer3)) { + err = -EINVAL; + goto error; + } + /* Route lookup */ saddr = OVS_CB(skb)->tun_key->ipv4_src; rt = find_route(ovs_dp_get_net(vport->dp), diff --git a/datapath/vport.c b/datapath/vport.c index 0dcecd0..a3fdeac 100644 --- a/datapath/vport.c +++ b/datapath/vport.c @@ -461,13 +461,15 @@ u32 ovs_vport_find_upcall_portid(const struct vport *p, struct sk_buff *skb) * @vport: vport that received the packet * @skb: skb that was received * @tun_key: tunnel (if any) that carried packet + * @is_layer3: packet is layer 3 * * Must be called with rcu_read_lock. The packet cannot be shared and * skb->data should point to the Ethernet header. The caller must have already * called compute_ip_summed() to initialize the checksumming fields. */ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, - struct ovs_key_ipv4_tunnel *tun_key) + struct ovs_key_ipv4_tunnel *tun_key, + bool is_layer3) { struct pcpu_sw_netstats *stats; @@ -478,6 +480,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, u64_stats_update_end(&stats->syncp); OVS_CB(skb)->tun_key = tun_key; + OVS_CB(skb)->is_layer3 = is_layer3; ovs_dp_process_received_packet(vport, skb); } diff --git a/datapath/vport.h b/datapath/vport.h index cfaea09..cd2c7ba 100644 --- a/datapath/vport.h +++ b/datapath/vport.h @@ -211,7 +211,7 @@ static inline struct vport *vport_from_priv(void *priv) } void ovs_vport_receive(struct vport *, struct sk_buff *, - struct ovs_key_ipv4_tunnel *); + struct ovs_key_ipv4_tunnel *, bool); /* List of statically compiled vport implementations. Don't forget to also * add yours to the list at the top of vport.c. */ -- 1.8.5.2 (Apple Git-48) _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev