Allow datapath to recognize and extract MPLS labels into flow keys
and execute actions which push, pop, and set labels on packets.

Based heavily on work by Leo Alterman.

Cc: Leo Alterman <lalter...@nicira.com>
Signed-off-by: Simon Horman <ho...@verge.net.au>

---

v2.2
* Call skb_reset_mac_header() in skb_cb_set_mpls_stack()
  eth_hdr(skb) is non-NULL when called in skb_cb_set_mpls_stack().
* Add a call to skb_cb_set_mpls_stack() in ovs_packet_cmd_execute().
  I apologise that I have mislaid my notes on this but
  it avoids a kernel panic. I can investigate again if necessary.
* Use struct ovs_action_push_mpls instead of
  __be16 to decode OVS_ACTION_ATTR_PUSH_MPLS in validate_actions(). This is
  consistent with the data format for the attribute.
* Indentation fix in skb_cb_mpls_stack(). [cosmetic]

v2.1
* Manual rebase

Previous version(s) by Leo Alterman
---
 datapath/actions.c          |   81 +++++++++++++++++++++++++++++++++++++++++++
 datapath/datapath.c         |   61 ++++++++++++++++++++++++++++++++
 datapath/datapath.h         |    8 +++++
 datapath/flow.c             |   30 ++++++++++++++++
 datapath/flow.h             |    7 ++++
 datapath/vport.c            |    2 ++
 include/linux/openvswitch.h |   32 +++++++++++++++++
 7 files changed, 221 insertions(+)

diff --git a/datapath/actions.c b/datapath/actions.c
index ec9b595..c1d1a7f 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -47,6 +47,67 @@ static int make_writable(struct sk_buff *skb, int write_len)
        return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
 }
 
+static __be16 get_ethertype(const struct sk_buff *skb)
+{
+       struct ethhdr *hdr = (struct ethhdr *)(skb_cb_mpls_stack(skb) - 
ETH_HLEN);
+       return hdr->h_proto;
+}
+
+static void set_ethertype(struct sk_buff *skb, const __be16 ethertype)
+{
+       struct ethhdr *hdr = (struct ethhdr *)(skb_cb_mpls_stack(skb) - 
ETH_HLEN);
+       hdr->h_proto = ethertype;
+}
+
+static int push_mpls(struct sk_buff *skb, const struct ovs_action_push_mpls 
*mpls)
+{
+       u32 l2_size;
+       __be32 *new_mpls_label;
+
+       if (skb_cow_head(skb, MPLS_HLEN) < 0) {
+               kfree_skb(skb);
+               return -ENOMEM;
+       }
+
+       l2_size = skb_cb_mpls_stack_offset(skb);
+       skb_push(skb, MPLS_HLEN);
+       memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), l2_size);
+       skb_reset_mac_header(skb);
+
+       new_mpls_label = (__be32 *)(skb_mac_header(skb) + l2_size);
+       *new_mpls_label = mpls->mpls_label;
+
+       set_ethertype(skb, mpls->mpls_ethertype);
+       return 0;
+}
+
+static int pop_mpls(struct sk_buff *skb, const __be16 *ethertype)
+{
+       __be16 current_ethertype = get_ethertype(skb);
+       if (current_ethertype == htons(ETH_P_MPLS_UC) ||
+               current_ethertype == htons(ETH_P_MPLS_MC)) {
+               u32 l2_size = skb_cb_mpls_stack_offset(skb);
+
+               memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), 
l2_size);
+
+               skb_pull(skb, MPLS_HLEN);
+               skb_reset_mac_header(skb);
+
+               set_ethertype(skb, *ethertype);
+       }
+       return 0;
+}
+
+static int set_mpls(struct sk_buff *skb, const __be32 *mpls_label)
+{
+       __be16 current_ethertype = get_ethertype(skb);
+       if (current_ethertype == htons(ETH_P_MPLS_UC) ||
+               current_ethertype == htons(ETH_P_MPLS_MC)) {
+               memcpy(skb_cb_mpls_stack(skb), mpls_label, sizeof(__be32));
+       }
+       return 0;
+}
+
 /* remove VLAN header from packet and update csum accordingly. */
 static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
 {
@@ -100,6 +161,9 @@ static int pop_vlan(struct sk_buff *skb)
                return err;
 
        __vlan_hwaccel_put_tag(skb, ntohs(tci));
+
+       /* update pointer to MPLS label stack */
+       skb_cb_set_mpls_stack(skb);
        return 0;
 }
 
@@ -120,6 +184,9 @@ static int push_vlan(struct sk_buff *skb, const struct 
ovs_action_push_vlan *vla
 
        }
        __vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
+
+       /* update pointer to MPLS label stack */
+       skb_cb_set_mpls_stack(skb);
        return 0;
 }
 
@@ -396,6 +463,20 @@ static int do_execute_actions(struct datapath *dp, struct 
sk_buff *skb,
                        output_userspace(dp, skb, a);
                        break;
 
+               case OVS_ACTION_ATTR_PUSH_MPLS:
+                       err = push_mpls(skb, nla_data(a));
+                       if (unlikely(err)) /* skb already freed. */
+                               return err;
+                       break;
+
+               case OVS_ACTION_ATTR_POP_MPLS:
+                       err = pop_mpls(skb, nla_data(a));
+                       break;
+
+               case OVS_ACTION_ATTR_SET_MPLS:
+                       err = set_mpls(skb, nla_data(a));
+                       break;
+
                case OVS_ACTION_ATTR_PUSH_VLAN:
                        err = push_vlan(skb, nla_data(a));
                        if (unlikely(err)) /* skb already freed. */
diff --git a/datapath/datapath.c b/datapath/datapath.c
index a6915fb..8ff244c 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -74,6 +74,43 @@ int ovs_net_id __read_mostly;
 int (*ovs_dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
 EXPORT_SYMBOL(ovs_dp_ioctl_hook);
 
+void skb_cb_set_mpls_stack(struct sk_buff *skb)
+{
+       struct ethhdr *eth;
+       int nh_ofs;
+       __be16 dl_type = 0;
+
+       skb_reset_mac_header(skb);
+
+       eth = eth_hdr(skb);
+       nh_ofs = sizeof(struct ethhdr);
+       if (likely(eth->h_proto >= htons(ETH_TYPE_MIN))) {
+               dl_type = eth->h_proto;
+
+               while (dl_type == htons(ETH_P_8021Q) &&
+                               skb->len >= nh_ofs + sizeof(struct vlan_hdr)) {
+                       struct vlan_hdr *vh = (struct vlan_hdr*)(skb->data + 
nh_ofs);
+                       dl_type = vh->h_vlan_encapsulated_proto;
+                       nh_ofs += sizeof(struct vlan_hdr);
+               }
+
+               OVS_CB(skb)->mpls_stack = nh_ofs;
+       } else {
+               OVS_CB(skb)->mpls_stack = 0;
+       }
+}
+
+unsigned char *skb_cb_mpls_stack(const struct sk_buff *skb)
+{
+       return OVS_CB(skb)->mpls_stack ?
+               skb_mac_header(skb) + OVS_CB(skb)->mpls_stack : 0;
+}
+
+ptrdiff_t skb_cb_mpls_stack_offset(const struct sk_buff *skb)
+{
+       return OVS_CB(skb)->mpls_stack;
+}
+
 /**
  * DOC: Locking:
  *
@@ -663,12 +700,17 @@ static int validate_actions(const struct nlattr *attr,
                static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
                        [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
                        [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
+                       [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct 
ovs_action_push_mpls),
+                       [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
+                       [OVS_ACTION_ATTR_SET_MPLS] = sizeof(__be32),
                        [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct 
ovs_action_push_vlan),
                        [OVS_ACTION_ATTR_POP_VLAN] = 0,
                        [OVS_ACTION_ATTR_SET] = (u32)-1,
                        [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
                };
                const struct ovs_action_push_vlan *vlan;
+               __be16 mpls_ethertype;
+               __be32 mpls_label;
                int type = nla_type(a);
 
                if (type > OVS_ACTION_ATTR_MAX ||
@@ -691,6 +733,23 @@ static int validate_actions(const struct nlattr *attr,
                                return -EINVAL;
                        break;
 
+               case OVS_ACTION_ATTR_PUSH_MPLS: {
+                       const struct ovs_action_push_mpls *mpls = nla_data(a);
+                       if (mpls->mpls_ethertype != htons(ETH_P_MPLS_UC) &&
+                           mpls->mpls_ethertype != htons(ETH_P_MPLS_MC))
+                               return -EINVAL;
+                       break;
+               }
+
+               case OVS_ACTION_ATTR_POP_MPLS:
+                       break;
+
+               case OVS_ACTION_ATTR_SET_MPLS:
+                       mpls_label = nla_get_be32(a);
+                       if (mpls_label == htonl(0)) {
+                               return -EINVAL;
+                       }
+                       break;
 
                case OVS_ACTION_ATTR_POP_VLAN:
                        break;
@@ -805,6 +864,8 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, 
struct genl_info *info)
        OVS_CB(packet)->flow = flow;
        packet->priority = flow->key.phy.priority;
 
+       skb_cb_set_mpls_stack(packet);
+
        rcu_read_lock();
        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
        err = -ENODEV;
diff --git a/datapath/datapath.h b/datapath/datapath.h
index affbf0e..1801ccd 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -97,6 +97,9 @@ struct datapath {
  * struct ovs_skb_cb - OVS data in skb CB
  * @flow: The flow associated with this packet.  May be %NULL if no flow.
  * @tun_id: ID of the tunnel that encapsulated this packet.  It is 0 if the
+ * packet is not being tunneled.
+ * @mpls_stack: Offset of the packet's MPLS stack from the beginning of the
+ * ethernet frame.  It is 0 if no MPLS stack is present.
  * @ip_summed: Consistently stores L4 checksumming status across different
  * kernel versions.
  * @csum_start: Stores the offset from which to start checksumming independent
@@ -108,6 +111,7 @@ struct datapath {
 struct ovs_skb_cb {
        struct sw_flow          *flow;
        __be64                  tun_id;
+       ptrdiff_t       mpls_stack;
 #ifdef NEED_CSUM_NORMALIZE
        enum csum_type          ip_summed;
        u16                     csum_start;
@@ -192,4 +196,8 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, 
u32 pid, u32 seq,
                                         u8 cmd);
 
 int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
+
+void skb_cb_set_mpls_stack(struct sk_buff *skb);
+unsigned char *skb_cb_mpls_stack(const struct sk_buff *skb);
+ptrdiff_t skb_cb_mpls_stack_offset(const struct sk_buff *skb);
 #endif /* datapath.h */
diff --git a/datapath/flow.c b/datapath/flow.c
index d07337c..40df8ff 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -739,6 +739,14 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, 
struct sw_flow_key *key,
                                key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
                        }
                }
+       } else if (key->eth.type == htons(ETH_P_MPLS_UC) ||
+                               key->eth.type == htons(ETH_P_MPLS_MC)) {
+               error = check_header(skb, MPLS_HLEN);
+               if (unlikely(error))
+                       goto out;
+
+               key_len = SW_FLOW_KEY_OFFSET(mpls.top_label);
+               memcpy(&key->mpls.top_label, skb_network_header(skb), 
MPLS_HLEN);
        } else if (key->eth.type == htons(ETH_P_IPV6)) {
                int nh_len;             /* IPv6 Header + Extensions */
 
@@ -836,6 +844,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
        [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
        [OVS_KEY_ATTR_VLAN] = sizeof(__be16),
        [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
+       [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
        [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
        [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
        [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
@@ -1141,6 +1150,17 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int 
*key_lenp,
                swkey->ip.proto = ntohs(arp_key->arp_op);
                memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN);
                memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN);
+       } else if (swkey->eth.type == htons(ETH_P_MPLS_UC) || 
+                               swkey->eth.type == htons(ETH_P_MPLS_MC)) {
+               const struct ovs_key_mpls *mpls_key;
+
+               if (!(attrs & (1 << OVS_KEY_ATTR_MPLS)))
+                       return -EINVAL;
+               attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
+
+               key_len = SW_FLOW_KEY_OFFSET(mpls.top_label);
+               mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
+               swkey->mpls.top_label = mpls_key->mpls_top_label;
        }
 
        if (attrs)
@@ -1284,6 +1304,16 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, 
struct sk_buff *skb)
                arp_key->arp_op = htons(swkey->ip.proto);
                memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN);
                memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN);
+       } else if (swkey->eth.type == htons(ETH_P_MPLS_UC) || 
+                               swkey->eth.type == htons(ETH_P_MPLS_MC)) {
+               struct ovs_key_mpls *mpls_key;
+
+               nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
+               if (!nla)
+                       goto nla_put_failure;
+               mpls_key = nla_data(nla);
+               memset(mpls_key, 0, sizeof(struct ovs_key_mpls));
+               mpls_key->mpls_top_label = swkey->mpls.top_label;
        }
 
        if ((swkey->eth.type == htons(ETH_P_IP) ||
diff --git a/datapath/flow.h b/datapath/flow.h
index 02c563a..eb3fe49 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -53,6 +53,9 @@ struct sw_flow_key {
                __be16 type;            /* Ethernet frame type. */
        } eth;
        struct {
+               __be32 top_label;       /* 0 if no MPLS, top label from stack 
otherwise */
+       } mpls;
+       struct {
                u8     proto;           /* IP protocol or lower 8 bits of ARP 
opcode. */
                u8     tos;             /* IP ToS. */
                u8     ttl;             /* IP TTL/hop limit. */
@@ -126,6 +129,10 @@ struct arp_eth_header {
        unsigned char       ar_tip[4];          /* target IP address        */
 } __packed;
 
+#define ETH_TYPE_MIN 0x600
+
+#define MPLS_HLEN 4
+
 int ovs_flow_init(void);
 void ovs_flow_exit(void);
 
diff --git a/datapath/vport.c b/datapath/vport.c
index dc7adfa..0ae4801 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -464,6 +464,8 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff 
*skb)
        if (!(vport->ops->flags & VPORT_F_TUN_ID))
                OVS_CB(skb)->tun_id = 0;
 
+       skb_cb_set_mpls_stack(skb);
+
        ovs_dp_process_received_packet(vport, skb);
 }
 
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index f5c9cca..78960d1 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -278,6 +278,7 @@ enum ovs_key_attr {
        OVS_KEY_ATTR_ICMPV6,    /* struct ovs_key_icmpv6 */
        OVS_KEY_ATTR_ARP,       /* struct ovs_key_arp */
        OVS_KEY_ATTR_ND,        /* struct ovs_key_nd */
+       OVS_KEY_ATTR_MPLS,      /* struct ovs_key_mpls */
        OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */
        __OVS_KEY_ATTR_MAX
 };
@@ -307,6 +308,10 @@ struct ovs_key_ethernet {
        __u8     eth_dst[6];
 };
 
+struct ovs_key_mpls {
+       __be32 mpls_top_label;
+};
+
 struct ovs_key_ipv4 {
        __be32 ipv4_src;
        __be32 ipv4_dst;
@@ -437,6 +442,20 @@ enum ovs_userspace_attr {
 #define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
 
 /**
+ * struct ovs_action_push_mpls - %OVS_ACTION_ATTR_PUSH_MPLS action argument.
+ * @mpls_label: MPLS label to push.
+ * @mpls_ethertype: Ethertype to set in the encapsulating ethernet frame.
+ *
+ * The only values @mpls_ethertype should ever be given are %ETH_P_MPLS_UC and
+ * %ETH_P_MPLS_MC, indicating MPLS unicast or multicast. Any other values would
+ * produce a corrupt packet.
+ */
+struct ovs_action_push_mpls {
+       __be32 mpls_label;
+       __be16 mpls_ethertype; /* Either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC */ 
+};
+
+/**
  * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument.
  * @vlan_tpid: Tag protocol identifier (TPID) to push.
  * @vlan_tci: Tag control identifier (TCI) to push.  The CFI bit must be set
@@ -461,6 +480,16 @@ struct ovs_action_push_vlan {
  * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header.  The
  * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
  * value.
+ * @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label onto the top of the packet
+ * MPLS stack. Set the ethertype of the encapsulating frame to either
+ * %ETH_P_MPLS_UC or %ETH_P_MPLS_MC to indicate the new packet contents.
+ * @OVS_ACTION_ATTR_POP_MPLS: Pop an MPLS label off of the packet's MPLS stack.
+ * Set the encapsulating frame's ethertype to indicate the new packet contents
+ * (this could potentially still be %ETH_P_MPLS_* if there are remaining MPLS
+ * labels).  If there are no MPLS labels as determined by ethertype, no action
+ * is taken.
+ * @OVS_ACTION_ATTR_SET_MPLS: Set the value of the top-most label in the MPLS
+ * label stack.  If there are no MPLS labels in the packet, no action is taken.
  * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
  * packet.
  * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
@@ -477,6 +506,9 @@ enum ovs_action_attr {
        OVS_ACTION_ATTR_OUTPUT,       /* u32 port number. */
        OVS_ACTION_ATTR_USERSPACE,    /* Nested OVS_USERSPACE_ATTR_*. */
        OVS_ACTION_ATTR_SET,          /* One nested OVS_KEY_ATTR_*. */
+       OVS_ACTION_ATTR_PUSH_MPLS,    /* struct ovs_action_push_mpls. */
+       OVS_ACTION_ATTR_POP_MPLS,     /* __be16 ethertype. */
+       OVS_ACTION_ATTR_SET_MPLS,     /* __be32 MPLS label */
        OVS_ACTION_ATTR_PUSH_VLAN,    /* struct ovs_action_push_vlan. */
        OVS_ACTION_ATTR_POP_VLAN,     /* No argument. */
        OVS_ACTION_ATTR_SAMPLE,       /* Nested OVS_SAMPLE_ATTR_*. */
-- 
1.7.10.4

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to