Allow datapath to recognize and extract MPLS labels into flow keys and execute actions which push, pop, and set labels on packets.
Based heavily on work by Leo Alterman and Ravi K. Cc: Ravi K <rke...@gmail.com> Cc: Leo Alterman <lalter...@nicira.com> Reviewed-by: Isaku Yamahata <yamah...@valinux.co.jp> Signed-off-by: Simon Horman <ho...@verge.net.au> --- v2.18 * No change v2.17 * As suggested by Ben Pfaff - Use consistent terminology for MPLS. + Consistently refer to the MPLS component of a packet as the MPLS label stack and entries in the stack as MPLS label stack entries (LSE). An MPLS label is a component of an MPLS label stack entry. The other components are the traffic class (TC), time to live (TTL) and bottom of stack (BoS) bit. - Rename compose_.*mpls_ functions as execute_.*mpls_ v2.16 * No change v2.15 * As suggested by Ben Pfaff - Use OVS_ACTION_SET to set OVS_KEY_ATTR_MPLS instead of OVS_ACTION_ATTR_SET_MPLS v2.14 * Remove include/linux/openvswitch.h portion which added add new key and action attributes. This now present in "User-Space MPLS actions and matches" which is now a dependency of this patch v2.13 * As suggested by Jarno Rajahalme - Rename mpls_bos element of ovs_skb_cb as l2_size as it is set and used regardless of if an MPLS stack is present or not. Update the name of helper functions and documentation accordingly. - Ensure that skb_cb_mpls_bos() never returns NULL * Correct endieness in eth_p_mpls() v2.12 * Update skb and network header on MPLS extraction in ovs_flow_extract() * Use NULL in skb_cb_mpls_bos() * Add eth_p_mpls helper v2.10 - v2.11 * No change v2.9 * datapath: Always update the mpls bos if vlan_pop is successful Regardless of the details of how a successful vlan_pop is achieved, the mpls bos needs to be updated. Without this fix it has been observed that the following results in malformed packets v2.8 * No change v2.7 * Rebase v2.6 * As suggested by Yamahata-san - Do not guard against label == 0 for OVS_ACTION_ATTR_SET_MPLS in validate_actions(). A label of 0 is valid - Remove comment stupulating that if the top_label element of struct sw_flow_key is 0 then there is no MPLS label. An MPLS label of 0 is valid and the correct check if ethertype is ntohs(ETH_TYPE_MPLS) or ntohs(ETH_TYPE_MPLS_MCAST) v2.4 - v2.5 * No change v2.3 * s/mpls_stack/mpls_bos/ This is in keeping with the naming used in the OpenFlow 1.3 specification v2.2 * Call skb_reset_mac_header() in skb_cb_set_mpls_stack() eth_hdr(skb) is non-NULL when called in skb_cb_set_mpls_stack(). * Add a call to skb_cb_set_mpls_stack() in ovs_packet_cmd_execute(). I apologise that I have mislaid my notes on this but it avoids a kernel panic. I can investigate again if necessary. * Use struct ovs_action_push_mpls instead of __be16 to decode OVS_ACTION_ATTR_PUSH_MPLS in validate_actions(). This is consistent with the data format for the attribute. * Indentation fix in skb_cb_mpls_stack(). [cosmetic] v2.1 * Manual rebase --- datapath/actions.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++ datapath/datapath.c | 57 +++++++++++++++++++++++++++++++++++++ datapath/datapath.h | 9 ++++++ datapath/flow.c | 31 ++++++++++++++++++++ datapath/flow.h | 13 +++++++++ datapath/vport.c | 2 ++ 6 files changed, 191 insertions(+) diff --git a/datapath/actions.c b/datapath/actions.c index f638ffc..60522be 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -49,6 +49,64 @@ static int make_writable(struct sk_buff *skb, int write_len) return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); } +static __be16 get_ethertype(const struct sk_buff *skb) +{ + struct ethhdr *hdr = (struct ethhdr *)(skb_cb_mpls_bos(skb) - ETH_HLEN); + return hdr->h_proto; +} + +static void set_ethertype(struct sk_buff *skb, const __be16 ethertype) +{ + struct ethhdr *hdr = (struct ethhdr *)(skb_cb_mpls_bos(skb) - ETH_HLEN); + hdr->h_proto = ethertype; +} + +static int push_mpls(struct sk_buff *skb, const struct ovs_action_push_mpls *mpls) +{ + u32 l2_size; + __be32 *new_mpls_lse; + + if (skb_cow_head(skb, MPLS_HLEN) < 0) { + kfree_skb(skb); + return -ENOMEM; + } + + l2_size = skb_cb_l2_size(skb); + skb_push(skb, MPLS_HLEN); + memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), l2_size); + skb_reset_mac_header(skb); + + new_mpls_lse = (__be32 *)(skb_mac_header(skb) + l2_size); + *new_mpls_lse = mpls->mpls_lse; + + set_ethertype(skb, mpls->mpls_ethertype); + return 0; +} + +static int pop_mpls(struct sk_buff *skb, const __be16 *ethertype) +{ + __be16 current_ethertype = get_ethertype(skb); + if (eth_p_mpls(current_ethertype)) { + u32 l2_size = skb_cb_l2_size(skb); + + memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), l2_size); + + skb_pull(skb, MPLS_HLEN); + skb_reset_mac_header(skb); + + set_ethertype(skb, *ethertype); + } + return 0; +} + +static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse) +{ + __be16 current_ethertype = get_ethertype(skb); + if (eth_p_mpls(current_ethertype)) + memcpy(skb_cb_mpls_bos(skb), mpls_lse, sizeof(__be32)); + return 0; +} + /* remove VLAN header from packet and update csum accordingly. */ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) { @@ -73,6 +131,9 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) skb->mac_header += VLAN_HLEN; skb_reset_mac_len(skb); + /* update pointer to MPLS label stack */ + OVS_CB(skb)->l2_size -= VLAN_HLEN; + return 0; } @@ -102,6 +163,7 @@ static int pop_vlan(struct sk_buff *skb) return err; __vlan_hwaccel_put_tag(skb, ntohs(tci)); + return 0; } @@ -116,6 +178,9 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla if (!__vlan_put_tag(skb, current_tag)) return -ENOMEM; + /* update pointer to MPLS label stack */ + OVS_CB(skb)->l2_size += VLAN_HLEN; + if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(skb->data + ETH_HLEN, VLAN_HLEN, 0)); @@ -478,6 +543,10 @@ static int execute_set_action(struct sk_buff *skb, case OVS_KEY_ATTR_UDP: err = set_udp(skb, nla_data(nested_attr)); break; + + case OVS_KEY_ATTR_MPLS: + err = set_mpls(skb, nla_data(nested_attr)); + break; } return err; @@ -514,6 +583,16 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, output_userspace(dp, skb, a); break; + case OVS_ACTION_ATTR_PUSH_MPLS: + err = push_mpls(skb, nla_data(a)); + if (unlikely(err)) /* skb already freed. */ + return err; + break; + + case OVS_ACTION_ATTR_POP_MPLS: + err = pop_mpls(skb, nla_data(a)); + break; + case OVS_ACTION_ATTR_PUSH_VLAN: err = push_vlan(skb, nla_data(a)); if (unlikely(err)) /* skb already freed. */ diff --git a/datapath/datapath.c b/datapath/datapath.c index 04a5e7f..897024d 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -71,6 +71,45 @@ static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); int ovs_net_id __read_mostly; +int (*ovs_dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd); +EXPORT_SYMBOL(ovs_dp_ioctl_hook); + +void skb_cb_set_l2_size(struct sk_buff *skb) +{ + struct ethhdr *eth; + int nh_ofs; + __be16 dl_type = 0; + + skb_reset_mac_header(skb); + + eth = eth_hdr(skb); + nh_ofs = sizeof(struct ethhdr); + if (likely(eth->h_proto >= htons(ETH_TYPE_MIN))) { + dl_type = eth->h_proto; + + while (dl_type == htons(ETH_P_8021Q) && + skb->len >= nh_ofs + sizeof(struct vlan_hdr)) { + struct vlan_hdr *vh = (struct vlan_hdr*)(skb->data + nh_ofs); + dl_type = vh->h_vlan_encapsulated_proto; + nh_ofs += sizeof(struct vlan_hdr); + } + + OVS_CB(skb)->l2_size = nh_ofs; + } else { + OVS_CB(skb)->l2_size = 0; + } +} + +unsigned char *skb_cb_mpls_bos(const struct sk_buff *skb) +{ + return skb_mac_header(skb) + OVS_CB(skb)->l2_size; +} + +ptrdiff_t skb_cb_l2_size(const struct sk_buff *skb) +{ + return OVS_CB(skb)->l2_size; +} + /** * DOC: Locking: * @@ -667,6 +706,11 @@ static int validate_set(const struct nlattr *a, return validate_tp_port(flow_key); + case OVS_KEY_ATTR_MPLS: + if (!eth_p_mpls(flow_key->eth.type)) + return -EINVAL; + break; + default: return -EINVAL; } @@ -725,6 +769,8 @@ static int validate_and_copy_actions(const struct nlattr *attr, static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, + [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls), + [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16), [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), [OVS_ACTION_ATTR_POP_VLAN] = 0, [OVS_ACTION_ATTR_SET] = (u32)-1, @@ -755,6 +801,15 @@ static int validate_and_copy_actions(const struct nlattr *attr, return -EINVAL; break; + case OVS_ACTION_ATTR_PUSH_MPLS: { + const struct ovs_action_push_mpls *mpls = nla_data(a); + if (!eth_p_mpls(mpls->mpls_ethertype)) + return -EINVAL; + break; + } + + case OVS_ACTION_ATTR_POP_MPLS: + break; case OVS_ACTION_ATTR_POP_VLAN: break; @@ -870,6 +925,8 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) packet->priority = flow->key.phy.priority; skb_set_mark(packet, flow->key.phy.skb_mark); + skb_cb_set_l2_size(packet); + rcu_read_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); err = -ENODEV; diff --git a/datapath/datapath.h b/datapath/datapath.h index 2b93348..11c908e 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -95,6 +95,10 @@ struct datapath { * @flow: The flow associated with this packet. May be %NULL if no flow. * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the * packet is not being tunneled. + * @l2_size: Length of the packet's Ethernet header, including any VLAN headers. + * This is the offset from the beginning of the ethernet frame where MPLS + * stack would be, if one is present. It is 0 when there is no L2 header. + * ethernet frame. It is 0 if no MPLS stack is present. * @ip_summed: Consistently stores L4 checksumming status across different * kernel versions. * @csum_start: Stores the offset from which to start checksumming independent @@ -106,6 +110,7 @@ struct datapath { struct ovs_skb_cb { struct sw_flow *flow; struct ovs_key_ipv4_tunnel *tun_key; + ptrdiff_t l2_size; #ifdef NEED_CSUM_NORMALIZE enum csum_type ip_summed; u16 csum_start; @@ -189,4 +194,8 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 portid, u32 seq, u8 cmd); int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb); + +void skb_cb_set_l2_size(struct sk_buff *skb); +unsigned char *skb_cb_mpls_bos(const struct sk_buff *skb); +ptrdiff_t skb_cb_l2_size(const struct sk_buff *skb); #endif /* datapath.h */ diff --git a/datapath/flow.c b/datapath/flow.c index fad9e19..27e1920 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -728,6 +728,17 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN); key_len = SW_FLOW_KEY_OFFSET(ipv4.arp); } + } else if (eth_p_mpls(key->eth.type)) { + error = check_header(skb, MPLS_HLEN); + if (unlikely(error)) + goto out; + + key_len = SW_FLOW_KEY_OFFSET(mpls.top_lse); + memcpy(&key->mpls.top_lse, skb_network_header(skb), MPLS_HLEN); + + /* Update network header */ + skb_set_network_header(skb, skb_network_header(skb) - + skb->data + MPLS_HLEN); } else if (key->eth.type == htons(ETH_P_IPV6)) { int nh_len; /* IPv6 Header + Extensions */ @@ -838,6 +849,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), [OVS_KEY_ATTR_VLAN] = sizeof(__be16), [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), + [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls), [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), @@ -1274,6 +1286,16 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, swkey->ip.proto = ntohs(arp_key->arp_op); memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN); memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN); + } else if (eth_p_mpls(swkey->eth.type)) { + const struct ovs_key_mpls *mpls_key; + + if (!(attrs & (1ULL << OVS_KEY_ATTR_MPLS))) + return -EINVAL; + attrs &= ~(1ULL << OVS_KEY_ATTR_MPLS); + + key_len = SW_FLOW_KEY_OFFSET(mpls.top_lse); + mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); + swkey->mpls.top_lse = mpls_key->mpls_top_lse; } if (attrs) @@ -1473,6 +1495,15 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) arp_key->arp_op = htons(swkey->ip.proto); memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN); memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN); + } else if (eth_p_mpls(swkey->eth.type)) { + struct ovs_key_mpls *mpls_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); + if (!nla) + goto nla_put_failure; + mpls_key = nla_data(nla); + memset(mpls_key, 0, sizeof(struct ovs_key_mpls)); + mpls_key->mpls_top_lse = swkey->mpls.top_lse; } if ((swkey->eth.type == htons(ETH_P_IP) || diff --git a/datapath/flow.h b/datapath/flow.h index 6949640..d8e350c 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -73,6 +73,9 @@ struct sw_flow_key { __be16 type; /* Ethernet frame type. */ } eth; struct { + __be32 top_lse; /* top label stack entry */ + } mpls; + struct { u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ u8 tos; /* IP ToS. */ u8 ttl; /* IP TTL/hop limit. */ @@ -143,6 +146,10 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; +#define ETH_TYPE_MIN 0x600 + +#define MPLS_HLEN 4 + int ovs_flow_init(void); void ovs_flow_exit(void); @@ -234,4 +241,10 @@ int ipv4_tun_from_nlattr(const struct nlattr *attr, int ipv4_tun_to_nlattr(struct sk_buff *skb, const struct ovs_key_ipv4_tunnel *tun_key); +static inline bool eth_p_mpls(__be16 eth_type) +{ + return eth_type == htons(ETH_P_MPLS_UC) || + eth_type == htons(ETH_P_MPLS_MC); +} + #endif /* flow.h */ diff --git a/datapath/vport.c b/datapath/vport.c index 9c0942b..82aba8c 100644 --- a/datapath/vport.c +++ b/datapath/vport.c @@ -422,6 +422,8 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) if (!(vport->ops->flags & VPORT_F_TUN_ID)) OVS_CB(skb)->tun_key = NULL; + skb_cb_set_l2_size(skb); + ovs_dp_process_received_packet(vport, skb); } -- 1.7.10.4 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev