On Sat, Apr 20, 2013 at 06:25:03AM +0000, Rajahalme, Jarno (NSN - FI/Espoo) 
wrote:
> 
> On Apr 19, 2013, at 10:41 , ext Simon Horman wrote:
> 
> > diff --git a/datapath/actions.c b/datapath/actions.c
> > index 0dac658..2c923be 100644
> > --- a/datapath/actions.c
> > +++ b/datapath/actions.c
> > @@ -38,6 +38,7 @@
> > #include "vport.h"
> > 
> > static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> > +                         unsigned *mpls_stack_depth,
> >                           const struct nlattr *attr, int len, bool 
> > keep_skb);
> > 
> > static int make_writable(struct sk_buff *skb, int write_len)
> > @@ -48,6 +49,89 @@ static int make_writable(struct sk_buff *skb, int 
> > write_len)
> >     return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
> > }
> > 
> > +static void set_ethertype(struct sk_buff *skb, const __be16 ethertype)
> > +{
> > +   struct ethhdr *hdr = (struct ethhdr *)skb_mac_header(skb);
> > +   if (hdr->h_proto == ethertype)
> > +           return;
> > +   hdr->h_proto = ethertype;
> 
> Will this work properly if the skb has VLAN headers? I recall there was an 
> earlier version that used the l2_size (now mac_len) to locate the actual 
> "h_proto" to update?

Thanks, I believe you are correct and that the version above is wrong.
I will revert to a version that makes use of mac_len.

> 
> > +   if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) {
> > +           __be16 diff[] = { ~hdr->h_proto, ethertype };
> > +           skb->csum = ~csum_partial((char *)diff, sizeof(diff),
> > +                                     ~skb->csum);
> > +   }
> > +}
> > +
> > +static int push_mpls(struct sk_buff *skb, const struct 
> > ovs_action_push_mpls *mpls)
> > +{
> > +   __be32 *new_mpls_lse;
> > +   int err;
> > +
> > +   err = make_writable(skb, skb->mac_len + MPLS_HLEN);
> > +   if (unlikely(err))
> > +           return err;
> > +
> > +   skb_push(skb, MPLS_HLEN);
> > +   memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
> > +           skb->mac_len);
> > +   skb_reset_mac_header(skb);
> > +   skb_set_network_header(skb, skb->mac_len);
> > +
> > +   new_mpls_lse = (__be32 *)skb_network_header(skb);
> > +   *new_mpls_lse = mpls->mpls_lse;
> > +
> > +   if (get_ip_summed(skb) == OVS_CSUM_COMPLETE)
> > +           skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
> > +                                                        MPLS_HLEN, 0));
> > +
> > +   set_ethertype(skb, mpls->mpls_ethertype);
> > +   return 0;
> > +}
> > +
> > +static int pop_mpls(struct sk_buff *skb, const __be16 *ethertype)
> > +{
> > +   int err;
> > +
> > +   err = make_writable(skb, skb->mac_len + MPLS_HLEN);
> > +   if (unlikely(err))
> > +           return err;
> > +
> > +   if (get_ip_summed(skb) == OVS_CSUM_COMPLETE)
> > +           skb->csum = csum_sub(skb->csum,
> > +                                csum_partial(skb_network_header(skb),
> > +                                             MPLS_HLEN, 0));
> > +
> > +   memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
> > +           skb->mac_len);
> > +
> > +   skb_pull(skb, MPLS_HLEN);
> > +   skb_reset_mac_header(skb);
> > +   skb_set_network_header(skb, skb->mac_len);
> > +
> > +   set_ethertype(skb, *ethertype);
> > +   return 0;
> > +}
> > +
> > +static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse)
> > +{
> > +   __be32 *stack = (__be32 *)skb_network_header(skb);
> > +   int err;
> > +
> > +   err = make_writable(skb, skb->mac_len + MPLS_HLEN);
> > +   if (unlikely(err))
> > +           return err;
> > +
> > +   if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) {
> > +           __be32 diff[] = { ~(*stack), *mpls_lse };
> > +           skb->csum = ~csum_partial((char *)diff, sizeof(diff),
> > +                                     ~skb->csum);
> > +   }
> > +
> > +   *stack = *mpls_lse;
> > +
> > +   return 0;
> > +}
> > +
> > /* remove VLAN header from packet and update csum accordingly. */
> > static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
> > {
> > @@ -115,6 +199,9 @@ static int push_vlan(struct sk_buff *skb, const struct 
> > ovs_action_push_vlan *vla
> >             if (!__vlan_put_tag(skb, current_tag))
> >                     return -ENOMEM;
> > 
> > +           /* update mac_len for MPLS functions */
> > +           skb_reset_mac_len(skb);
> > +
> >             if (get_ip_summed(skb) == OVS_CSUM_COMPLETE)
> >                     skb->csum = csum_add(skb->csum, csum_partial(skb->data
> >                                     + (2 * ETH_ALEN), VLAN_HLEN, 0));
> > @@ -352,13 +439,26 @@ static int set_tcp(struct sk_buff *skb, const struct 
> > ovs_key_tcp *tcp_port_key)
> >     return 0;
> > }
> > 
> > -static int do_output(struct datapath *dp, struct sk_buff *skb, int 
> > out_port)
> > +static int do_output(struct datapath *dp, struct sk_buff *skb, int 
> > out_port,
> > +                unsigned mpls_stack_depth)
> > {
> >     struct vport *vport;
> > 
> >     if (unlikely(!skb))
> >             return -ENOMEM;
> > 
> > +   /* The mpls_stack_depth is only non zero if a non-MPLS packet is
> > +    * turned into an MPLS packet via an MPLS push action. In this case
> > +    * the skb may be GSO so update skb->mac_len and skb's
> > +    * network_header to correspond to the bottom of the MPLS label
> > +    * stack rather than the end of the original L2 data which is now
> > +    * the top of the MPLS label stack.  */
> 
> It is not clear to me that "bottom of the MPLS label stack" necessarily 
> refers to the start of L3 header, "Bottom of stack" having a special meaning 
> with MPLS.
> It might be clearer to state that "during action execution network_header, 
> and mac_len, correspondingly, have tracked the end of the L2 frame (including 
> any VLAN headers), but proper skb output processing (e.g., GSO) requires the 
> network_header (and mac_len) to track the start of the L3 header instead. 
> These differ in the presence of MPLS headers."

Thanks, I will update the wording.

> > +   if (mpls_stack_depth) {
> > +           skb->mac_len += MPLS_HLEN * mpls_stack_depth;
> > +           skb_set_network_header(skb, skb->mac_len);
> > +           skb_set_encapsulation_features(skb);
> > +   }
> > +
> >     vport = ovs_vport_rcu(dp, out_port);
> >     if (unlikely(!vport)) {
> >             kfree_skb(skb);
> > @@ -398,7 +498,7 @@ static int output_userspace(struct datapath *dp, struct 
> > sk_buff *skb,
> > }
> > 
> > static int sample(struct datapath *dp, struct sk_buff *skb,
> > -             const struct nlattr *attr)
> > +             unsigned *mpls_stack_depth, const struct nlattr *attr)
> > {
> >     const struct nlattr *acts_list = NULL;
> >     const struct nlattr *a;
> > @@ -418,8 +518,9 @@ static int sample(struct datapath *dp, struct sk_buff 
> > *skb,
> >             }
> >     }
> > 
> > -   return do_execute_actions(dp, skb, nla_data(acts_list),
> > -                             nla_len(acts_list), true);
> > +   return do_execute_actions(dp, skb, mpls_stack_depth,
> > +                             nla_data(acts_list), nla_len(acts_list),
> > +                             true);
> > }
> > 
> > static int execute_set_action(struct sk_buff *skb,
> > @@ -459,13 +560,23 @@ static int execute_set_action(struct sk_buff *skb,
> >     case OVS_KEY_ATTR_UDP:
> >             err = set_udp(skb, nla_data(nested_attr));
> >             break;
> > +
> > +   case OVS_KEY_ATTR_MPLS:
> > +           err = set_mpls(skb, nla_data(nested_attr));
> > +           break;
> >     }
> > 
> >     return err;
> > }
> > 
> > -/* Execute a list of actions against 'skb'. */
> > +/* Execute a list of actions against 'skb'.
> > + *
> > + * The stack depth is only tracked in the case of a non-MPLS packet
> > + * that becomes MPLS via an MPLS push action. The stack depth
> > + * is passed to do_output() in order to allow it to prepare the
> > + * skb for possible GSO segmentation. */
> > static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> > +                   unsigned *mpls_stack_depth,
> >                     const struct nlattr *attr, int len, bool keep_skb)
> > {
> >     /* Every output action needs a separate clone of 'skb', but the common
> > @@ -481,7 +592,8 @@ static int do_execute_actions(struct datapath *dp, 
> > struct sk_buff *skb,
> >             int err = 0;
> > 
> >             if (prev_port != -1) {
> > -                   do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port);
> > +                   do_output(dp, skb_clone(skb, GFP_ATOMIC),
> > +                             prev_port, *mpls_stack_depth);
> >                     prev_port = -1;
> >             }
> > 
> > @@ -494,6 +606,18 @@ static int do_execute_actions(struct datapath *dp, 
> > struct sk_buff *skb,
> >                     output_userspace(dp, skb, a);
> >                     break;
> > 
> > +           case OVS_ACTION_ATTR_PUSH_MPLS:
> > +                   err = push_mpls(skb, nla_data(a));
> > +                   if (!eth_p_mpls(skb->protocol))
> > +                           (*mpls_stack_depth)++;
> > +                   break;
> > +
> > +           case OVS_ACTION_ATTR_POP_MPLS:
> > +                   err = pop_mpls(skb, nla_data(a));
> > +                   if (!eth_p_mpls(skb->protocol))
> > +                           (*mpls_stack_depth)--;
> > +                   break;
> > +
> 
> In both cases the stack is changed whenever err == 0, but it is not 
> immediately clear to me whether the '!eth_p_mpls(skb->protocol)' is true if 
> and only if the label stack has changed.

Thanks, I will change the code to something like this:

        if (!err && !eth_p_mpls(skb->protocol))
                ...
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to