On Wed, Oct 03, 2012 at 12:17:16PM -0700, Jesse Gross wrote:
> From: Leo Alterman <lalter...@nicira.com>
> 
> Allow datapath to recognize and extract MPLS labels into flow keys
> and execute actions which push, pop, and set labels on packets.

Hi,

I now have this code working in conjunction with a modified
version of Ravi's user-space code. I will post a patchset which
works under for my light testing shortly.

What follows are details of changes I made to this patch in
order for it to work in my setup.

Firstly,

I needed the following to unsure that 

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 79c50af..8ff244c 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -861,6 +864,8 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, 
struct genl_info *info)
        OVS_CB(packet)->flow = flow;
        packet->priority = flow->key.phy.priority;
 
+       skb_cb_set_mpls_stack(packet);
+
        rcu_read_lock();
        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
        err = -ENODEV;

The rest of my proposed changes are inline below.

> 
> Signed-off-by: Leo Alterman <lalter...@nicira.com>
> --
> I haven't reviewed this but hopefully it can be used a good starting point.
> ---
>  datapath/actions.c          |   81 
> +++++++++++++++++++++++++++++++++++++++++++
>  datapath/datapath.c         |   56 ++++++++++++++++++++++++++++++
>  datapath/datapath.h         |    8 +++++
>  datapath/flow.c             |   30 ++++++++++++++++
>  datapath/flow.h             |    7 ++++
>  datapath/vport.c            |    2 ++
>  include/linux/openvswitch.h |   32 +++++++++++++++++
>  7 files changed, 216 insertions(+)
> 
> diff --git a/datapath/actions.c b/datapath/actions.c
> index 208f260..a199125 100644
> --- a/datapath/actions.c
> +++ b/datapath/actions.c
> @@ -47,6 +47,67 @@ static int make_writable(struct sk_buff *skb, int 
> write_len)
>       return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
>  }
>  
> +static __be16 get_ethertype(const struct sk_buff *skb)
> +{
> +     struct ethhdr *hdr = (struct ethhdr *)(skb_cb_mpls_stack(skb) - 
> ETH_HLEN);
> +     return hdr->h_proto;
> +}
> +
> +static void set_ethertype(struct sk_buff *skb, const __be16 ethertype)
> +{
> +     struct ethhdr *hdr = (struct ethhdr *)(skb_cb_mpls_stack(skb) - 
> ETH_HLEN);
> +     hdr->h_proto = ethertype;
> +}
> +
> +static int push_mpls(struct sk_buff *skb, const struct ovs_action_push_mpls 
> *mpls)
> +{
> +     u32 l2_size;
> +     __be32 *new_mpls_label;
> +
> +     if (skb_cow_head(skb, MPLS_HLEN) < 0) {
> +             kfree_skb(skb);
> +             return -ENOMEM;
> +     }
> +
> +     l2_size = skb_cb_mpls_stack_offset(skb);
> +     skb_push(skb, MPLS_HLEN);
> +     memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), l2_size);
> +     skb_reset_mac_header(skb);
> +
> +     new_mpls_label = (__be32 *)(skb_mac_header(skb) + l2_size);
> +     *new_mpls_label = mpls->mpls_label;
> +
> +     set_ethertype(skb, mpls->mpls_ethertype);
> +     return 0;
> +}
> +
> +static int pop_mpls(struct sk_buff *skb, const __be16 *ethertype)
> +{
> +     __be16 current_ethertype = get_ethertype(skb);
> +     if (current_ethertype == htons(ETH_P_MPLS_UC) ||
> +             current_ethertype == htons(ETH_P_MPLS_MC)) {
> +             u32 l2_size = skb_cb_mpls_stack_offset(skb);
> +
> +             memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), 
> l2_size);
> +
> +             skb_pull(skb, MPLS_HLEN);
> +             skb_reset_mac_header(skb);
> +
> +             set_ethertype(skb, *ethertype);
> +     }
> +     return 0;
> +}
> +
> +static int set_mpls(struct sk_buff *skb, const __be32 *mpls_label)
> +{
> +     __be16 current_ethertype = get_ethertype(skb);
> +     if (current_ethertype == htons(ETH_P_MPLS_UC) ||
> +             current_ethertype == htons(ETH_P_MPLS_MC)) {
> +             memcpy(skb_cb_mpls_stack(skb), mpls_label, sizeof(__be32));
> +     }
> +     return 0;
> +}
> +
>  /* remove VLAN header from packet and update csum accrodingly. */
>  static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
>  {
> @@ -100,6 +161,9 @@ static int pop_vlan(struct sk_buff *skb)
>               return err;
>  
>       __vlan_hwaccel_put_tag(skb, ntohs(tci));
> +
> +     /* update pointer to MPLS label stack */
> +     skb_cb_set_mpls_stack(skb);
>       return 0;
>  }
>  
> @@ -120,6 +184,9 @@ static int push_vlan(struct sk_buff *skb, const struct 
> ovs_action_push_vlan *vla
>  
>       }
>       __vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
> +
> +     /* update pointer to MPLS label stack */
> +     skb_cb_set_mpls_stack(skb);
>       return 0;
>  }
>  
> @@ -396,6 +463,20 @@ static int do_execute_actions(struct datapath *dp, 
> struct sk_buff *skb,
>                       output_userspace(dp, skb, a);
>                       break;
>  
> +             case OVS_ACTION_ATTR_PUSH_MPLS:
> +                     err = push_mpls(skb, nla_data(a));
> +                     if (unlikely(err)) /* skb already freed. */
> +                             return err;
> +                     break;
> +
> +             case OVS_ACTION_ATTR_POP_MPLS:
> +                     err = pop_mpls(skb, nla_data(a));
> +                     break;
> +
> +             case OVS_ACTION_ATTR_SET_MPLS:
> +                     err = set_mpls(skb, nla_data(a));
> +                     break;
> +
>               case OVS_ACTION_ATTR_PUSH_VLAN:
>                       err = push_vlan(skb, nla_data(a));
>                       if (unlikely(err)) /* skb already freed. */
> diff --git a/datapath/datapath.c b/datapath/datapath.c
> index c83ce16..91b70bb 100644
> --- a/datapath/datapath.c
> +++ b/datapath/datapath.c
> @@ -74,6 +74,41 @@ int ovs_net_id __read_mostly;
>  int (*ovs_dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
>  EXPORT_SYMBOL(ovs_dp_ioctl_hook);
>  
> +void skb_cb_set_mpls_stack(struct sk_buff *skb)
> +{
> +     struct ethhdr *eth;
> +     int nh_ofs;
> +     __be16 dl_type = 0;
> +
> +     eth = eth_hdr(skb);
> +     nh_ofs = sizeof(struct ethhdr);
> +     if (likely(eth->h_proto >= htons(ETH_TYPE_MIN))) {
> +             dl_type = eth->h_proto;
> +
> +             while (dl_type == htons(ETH_P_8021Q) &&
> +                             skb->len >= nh_ofs + sizeof(struct vlan_hdr)) {
> +                     struct vlan_hdr *vh = (struct vlan_hdr*)(skb->data + 
> nh_ofs);
> +                     dl_type = vh->h_vlan_encapsulated_proto;
> +                     nh_ofs += sizeof(struct vlan_hdr);
> +             }
> +
> +             OVS_CB(skb)->mpls_stack = nh_ofs;
> +     } else {
> +             OVS_CB(skb)->mpls_stack = 0;
> +     }
> +}
> +
> +unsigned char *skb_cb_mpls_stack(const struct sk_buff *skb)
> +{
> +     return OVS_CB(skb)->mpls_stack ?
> +                                     skb_mac_header(skb) + 
> OVS_CB(skb)->mpls_stack : 0;

The indentation on the line above is suboptimal.

> +}
> +
> +ptrdiff_t skb_cb_mpls_stack_offset(const struct sk_buff *skb)
> +{
> +     return OVS_CB(skb)->mpls_stack;
> +}
> +
>  /**
>   * DOC: Locking:
>   *
> @@ -663,12 +698,17 @@ static int validate_actions(const struct nlattr *attr,
>               static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
>                       [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
>                       [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
> +                     [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct 
> ovs_action_push_mpls),
> +                     [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
> +                     [OVS_ACTION_ATTR_SET_MPLS] = sizeof(__be32),
>                       [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct 
> ovs_action_push_vlan),
>                       [OVS_ACTION_ATTR_POP_VLAN] = 0,
>                       [OVS_ACTION_ATTR_SET] = (u32)-1,
>                       [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
>               };
>               const struct ovs_action_push_vlan *vlan;
> +             __be16 mpls_ethertype;
> +             __be32 mpls_label;
>               int type = nla_type(a);
>  
>               if (type > OVS_ACTION_ATTR_MAX ||
> @@ -691,6 +731,22 @@ static int validate_actions(const struct nlattr *attr,
>                               return -EINVAL;
>                       break;
>  
> +             case OVS_ACTION_ATTR_PUSH_MPLS:
> +                     mpls_ethertype = nla_get_be16(a);
> +                     if (mpls_ethertype != htons(ETH_P_MPLS_UC) &&
> +                             mpls_ethertype != htons(ETH_P_MPLS_MC))
> +                             return -EINVAL;
> +                     break;

The case above does not seem to function because the attribute
is a struct struct ovs_action_push_mpls where the ether_type is
preceded by a 32bit label.

The following seems to work for me:


               case OVS_ACTION_ATTR_PUSH_MPLS: {
                       const struct ovs_action_push_mpls *mpls = nla_data(a);
                       if (mpls->mpls_ethertype != htons(ETH_P_MPLS_UC) &&
                           mpls->mpls_ethertype != htons(ETH_P_MPLS_MC))
                               return -EINVAL;
                       break;
               }

> +
> +             case OVS_ACTION_ATTR_POP_MPLS:
> +                     break;
> +
> +             case OVS_ACTION_ATTR_SET_MPLS:
> +                     mpls_label = nla_get_be32(a);
> +                     if (mpls_label == htonl(0)) {
> +                             return -EINVAL;
> +                     }
> +                     break;
>  
>               case OVS_ACTION_ATTR_POP_VLAN:
>                       break;
> diff --git a/datapath/datapath.h b/datapath/datapath.h
> index affbf0e..1801ccd 100644
> --- a/datapath/datapath.h
> +++ b/datapath/datapath.h
> @@ -97,6 +97,9 @@ struct datapath {
>   * struct ovs_skb_cb - OVS data in skb CB
>   * @flow: The flow associated with this packet.  May be %NULL if no flow.
>   * @tun_id: ID of the tunnel that encapsulated this packet.  It is 0 if the
> + * packet is not being tunneled.
> + * @mpls_stack: Offset of the packet's MPLS stack from the beginning of the
> + * ethernet frame.  It is 0 if no MPLS stack is present.
>   * @ip_summed: Consistently stores L4 checksumming status across different
>   * kernel versions.
>   * @csum_start: Stores the offset from which to start checksumming 
> independent
> @@ -108,6 +111,7 @@ struct datapath {
>  struct ovs_skb_cb {
>       struct sw_flow          *flow;
>       __be64                  tun_id;
> +     ptrdiff_t       mpls_stack;
>  #ifdef NEED_CSUM_NORMALIZE
>       enum csum_type          ip_summed;
>       u16                     csum_start;
> @@ -192,4 +196,8 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, 
> u32 pid, u32 seq,
>                                        u8 cmd);
>  
>  int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
> +
> +void skb_cb_set_mpls_stack(struct sk_buff *skb);
> +unsigned char *skb_cb_mpls_stack(const struct sk_buff *skb);
> +ptrdiff_t skb_cb_mpls_stack_offset(const struct sk_buff *skb);
>  #endif /* datapath.h */
> diff --git a/datapath/flow.c b/datapath/flow.c
> index d07337c..40df8ff 100644
> --- a/datapath/flow.c
> +++ b/datapath/flow.c
> @@ -739,6 +739,14 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, 
> struct sw_flow_key *key,
>                               key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
>                       }
>               }
> +     } else if (key->eth.type == htons(ETH_P_MPLS_UC) ||
> +                             key->eth.type == htons(ETH_P_MPLS_MC)) {
> +             error = check_header(skb, MPLS_HLEN);
> +             if (unlikely(error))
> +                     goto out;
> +
> +             key_len = SW_FLOW_KEY_OFFSET(mpls.top_label);
> +             memcpy(&key->mpls.top_label, skb_network_header(skb), 
> MPLS_HLEN);
>       } else if (key->eth.type == htons(ETH_P_IPV6)) {
>               int nh_len;             /* IPv6 Header + Extensions */
>  
> @@ -836,6 +844,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
>       [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
>       [OVS_KEY_ATTR_VLAN] = sizeof(__be16),
>       [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
> +     [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
>       [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
>       [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
>       [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
> @@ -1141,6 +1150,17 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, 
> int *key_lenp,
>               swkey->ip.proto = ntohs(arp_key->arp_op);
>               memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN);
>               memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN);
> +     } else if (swkey->eth.type == htons(ETH_P_MPLS_UC) || 
> +                             swkey->eth.type == htons(ETH_P_MPLS_MC)) {
> +             const struct ovs_key_mpls *mpls_key;
> +
> +             if (!(attrs & (1 << OVS_KEY_ATTR_MPLS)))
> +                     return -EINVAL;
> +             attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
> +
> +             key_len = SW_FLOW_KEY_OFFSET(mpls.top_label);
> +             mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
> +             swkey->mpls.top_label = mpls_key->mpls_top_label;
>       }
>  
>       if (attrs)
> @@ -1284,6 +1304,16 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key 
> *swkey, struct sk_buff *skb)
>               arp_key->arp_op = htons(swkey->ip.proto);
>               memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN);
>               memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN);
> +     } else if (swkey->eth.type == htons(ETH_P_MPLS_UC) || 
> +                             swkey->eth.type == htons(ETH_P_MPLS_MC)) {
> +             struct ovs_key_mpls *mpls_key;
> +
> +             nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
> +             if (!nla)
> +                     goto nla_put_failure;
> +             mpls_key = nla_data(nla);
> +             memset(mpls_key, 0, sizeof(struct ovs_key_mpls));
> +             mpls_key->mpls_top_label = swkey->mpls.top_label;
>       }
>  
>       if ((swkey->eth.type == htons(ETH_P_IP) ||
> diff --git a/datapath/flow.h b/datapath/flow.h
> index 5be481e..c18183b 100644
> --- a/datapath/flow.h
> +++ b/datapath/flow.h
> @@ -53,6 +53,9 @@ struct sw_flow_key {
>               __be16 type;            /* Ethernet frame type. */
>       } eth;
>       struct {
> +             __be32 top_label;       /* 0 if no MPLS, top label from stack 
> otherwise */
> +     } mpls;
> +     struct {
>               u8     proto;           /* IP protocol or lower 8 bits of ARP 
> opcode. */
>               u8     tos;             /* IP ToS. */
>               u8     ttl;             /* IP TTL/hop limit. */
> @@ -126,6 +129,10 @@ struct arp_eth_header {
>       unsigned char       ar_tip[4];          /* target IP address        */
>  } __packed;
>  
> +#define ETH_TYPE_MIN 0x600
> +
> +#define MPLS_HLEN 4
> +
>  int ovs_flow_init(void);
>  void ovs_flow_exit(void);
>  
> diff --git a/datapath/vport.c b/datapath/vport.c
> index 172261a..0664e4c 100644
> --- a/datapath/vport.c
> +++ b/datapath/vport.c
> @@ -464,6 +464,8 @@ void ovs_vport_receive(struct vport *vport, struct 
> sk_buff *skb)
>       if (!(vport->ops->flags & VPORT_F_TUN_ID))
>               OVS_CB(skb)->tun_id = 0;

I have encountered situations where skb_cb_set_mpls_stack() panics
because eth_hdr(skb) returns NULL.

I'm unsure if this is the best fix, but adding the following here
seems to resolve the problem that I observed.

        skb_cb_set_mpls_stack(skb);

>  
> +     skb_cb_set_mpls_stack(skb);
> +
>       ovs_dp_process_received_packet(vport, skb);
>  }
>  
> diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
> index f5c9cca..78960d1 100644
> --- a/include/linux/openvswitch.h
> +++ b/include/linux/openvswitch.h
> @@ -278,6 +278,7 @@ enum ovs_key_attr {
>       OVS_KEY_ATTR_ICMPV6,    /* struct ovs_key_icmpv6 */
>       OVS_KEY_ATTR_ARP,       /* struct ovs_key_arp */
>       OVS_KEY_ATTR_ND,        /* struct ovs_key_nd */
> +     OVS_KEY_ATTR_MPLS,      /* struct ovs_key_mpls */
>       OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */
>       __OVS_KEY_ATTR_MAX
>  };
> @@ -307,6 +308,10 @@ struct ovs_key_ethernet {
>       __u8     eth_dst[6];
>  };
>  
> +struct ovs_key_mpls {
> +     __be32 mpls_top_label;
> +};
> +
>  struct ovs_key_ipv4 {
>       __be32 ipv4_src;
>       __be32 ipv4_dst;
> @@ -437,6 +442,20 @@ enum ovs_userspace_attr {
>  #define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
>  
>  /**
> + * struct ovs_action_push_mpls - %OVS_ACTION_ATTR_PUSH_MPLS action argument.
> + * @mpls_label: MPLS label to push.
> + * @mpls_ethertype: Ethertype to set in the encapsulating ethernet frame.
> + *
> + * The only values @mpls_ethertype should ever be given are %ETH_P_MPLS_UC 
> and
> + * %ETH_P_MPLS_MC, indicating MPLS unicast or multicast. Any other values 
> would
> + * produce a corrupt packet.
> + */
> +struct ovs_action_push_mpls {
> +     __be32 mpls_label;
> +     __be16 mpls_ethertype; /* Either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC */ 

I believe that this structure will be padded out to be 64 bytes
so perhaps it would be worth adding a pad or zero element value
to make it clear that some care needs to be taken there?

        __be16 zero;

> +};
> +
> +/**
>   * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument.
>   * @vlan_tpid: Tag protocol identifier (TPID) to push.
>   * @vlan_tci: Tag control identifier (TCI) to push.  The CFI bit must be set
> @@ -461,6 +480,16 @@ struct ovs_action_push_vlan {
>   * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header.  The
>   * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and 
> its
>   * value.
> + * @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label onto the top of the 
> packet
> + * MPLS stack. Set the ethertype of the encapsulating frame to either
> + * %ETH_P_MPLS_UC or %ETH_P_MPLS_MC to indicate the new packet contents.
> + * @OVS_ACTION_ATTR_POP_MPLS: Pop an MPLS label off of the packet's MPLS 
> stack.
> + * Set the encapsulating frame's ethertype to indicate the new packet 
> contents
> + * (this could potentially still be %ETH_P_MPLS_* if there are remaining MPLS
> + * labels).  If there are no MPLS labels as determined by ethertype, no 
> action
> + * is taken.
> + * @OVS_ACTION_ATTR_SET_MPLS: Set the value of the top-most label in the MPLS
> + * label stack.  If there are no MPLS labels in the packet, no action is 
> taken.
>   * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
>   * packet.
>   * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
> @@ -477,6 +506,9 @@ enum ovs_action_attr {
>       OVS_ACTION_ATTR_OUTPUT,       /* u32 port number. */
>       OVS_ACTION_ATTR_USERSPACE,    /* Nested OVS_USERSPACE_ATTR_*. */
>       OVS_ACTION_ATTR_SET,          /* One nested OVS_KEY_ATTR_*. */
> +     OVS_ACTION_ATTR_PUSH_MPLS,    /* struct ovs_action_push_mpls. */
> +     OVS_ACTION_ATTR_POP_MPLS,     /* __be16 ethertype. */
> +     OVS_ACTION_ATTR_SET_MPLS,     /* __be32 MPLS label */
>       OVS_ACTION_ATTR_PUSH_VLAN,    /* struct ovs_action_push_vlan. */
>       OVS_ACTION_ATTR_POP_VLAN,     /* No argument. */
>       OVS_ACTION_ATTR_SAMPLE,       /* Nested OVS_SAMPLE_ATTR_*. */
> -- 
> 1.7.9.5
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
> 
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to