On Dec 27, 2012, at 7:23 , ext Simon Horman wrote:
> diff --git a/datapath/actions.c b/datapath/actions.c
> index f638ffc..3278e37 100644
> --- a/datapath/actions.c
> +++ b/datapath/actions.c
> @@ -49,6 +49,64 @@ static int make_writable(struct sk_buff *skb, int 
> write_len)
>       return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
> }
> 
> +static __be16 get_ethertype(const struct sk_buff *skb)
> +{
> +     struct ethhdr *hdr = (struct ethhdr *)(skb_cb_mpls_bos(skb) - ETH_HLEN);
> +     return hdr->h_proto;
> +}
> +
> +static void set_ethertype(struct sk_buff *skb, const __be16 ethertype)
> +{
> +     struct ethhdr *hdr = (struct ethhdr *)(skb_cb_mpls_bos(skb) - ETH_HLEN);
> +     hdr->h_proto = ethertype;
> +}
> +

These would fail if skb_cb_mpls_bos() ever returned NULL. According to the
documented semantics this would happen for every non-MPLS packet. However,
it seems that OVS_CB(skb)->mpls_bos is actually set for every packet, and always
contains the length of the Ethernet headers, including possible VLAN headers. It
would be good to change the comment in datapath.h accordingly (see below), and
rename the field. See more detailed comments below in context.

> +static int push_mpls(struct sk_buff *skb, const struct ovs_action_push_mpls 
> *mpls)
> +{
> +     u32 l2_size;
> +     __be32 *new_mpls_label;
> +
> +     if (skb_cow_head(skb, MPLS_HLEN) < 0) {
> +             kfree_skb(skb);
> +             return -ENOMEM;
> +     }
> +
> +     l2_size = skb_cb_mpls_bos_offset(skb);
> +     skb_push(skb, MPLS_HLEN);
> +     memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), l2_size);
> +     skb_reset_mac_header(skb);
> +
> +     new_mpls_label = (__be32 *)(skb_mac_header(skb) + l2_size);
> +     *new_mpls_label = mpls->mpls_label;
> +
> +     set_ethertype(skb, mpls->mpls_ethertype);
> +     return 0;
> +}
> +
> +static int pop_mpls(struct sk_buff *skb, const __be16 *ethertype)
> +{
> +     __be16 current_ethertype = get_ethertype(skb);
> +     if (eth_p_mpls(current_ethertype)) {
> +             u32 l2_size = skb_cb_mpls_bos_offset(skb);
> +
> +             memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), 
> l2_size);
> +
> +             skb_pull(skb, MPLS_HLEN);
> +             skb_reset_mac_header(skb);
> +
> +             set_ethertype(skb, *ethertype);
> +     }
> +     return 0;
> +}
> +
> +static int set_mpls(struct sk_buff *skb, const __be32 *mpls_label)
> +{
> +     __be16 current_ethertype = get_ethertype(skb);
> +     if (eth_p_mpls(current_ethertype))
> +             memcpy(skb_cb_mpls_bos(skb), mpls_label, sizeof(__be32));
> +     return 0;
> +}
> +
> /* remove VLAN header from packet and update csum accordingly. */
> static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
> {
> @@ -95,13 +153,17 @@ static int pop_vlan(struct sk_buff *skb)
>       /* move next vlan tag to hw accel tag */
>       if (likely(skb->protocol != htons(ETH_P_8021Q) ||
>                  skb->len < VLAN_ETH_HLEN))
> -             return 0;
> +             goto update_mpls_bos;
> 
>       err = __pop_vlan_tci(skb, &tci);
>       if (unlikely(err))
>               return err;
> 
>       __vlan_hwaccel_put_tag(skb, ntohs(tci));
> +
> +update_mpls_bos:
> +     /* update pointer to MPLS label stack */
> +     skb_cb_set_mpls_bos(skb);
>       return 0;
> }
> 
> @@ -122,6 +184,9 @@ static int push_vlan(struct sk_buff *skb, const struct 
> ovs_action_push_vlan *vla
> 
>       }
>       __vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
> +
> +     /* update pointer to MPLS label stack */
> +     skb_cb_set_mpls_bos(skb);
>       return 0;
> }
> 

I haven't checked the details, but wouldn't it be far more efficient to just fix
the "mpls_bos" offset by the amount of bytes removed or added, rather than
setting the "mpls_bos" offset from scratch?

This would be easier to grasp if the "mpls_bos" field would be renamed as 
"l2_size".

> diff --git a/datapath/datapath.c b/datapath/datapath.c
> index deb32ee..747dbb6 100644
> --- a/datapath/datapath.c
> +++ b/datapath/datapath.c
> @@ -74,6 +74,43 @@ int ovs_net_id __read_mostly;
> int (*ovs_dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
> EXPORT_SYMBOL(ovs_dp_ioctl_hook);
> 
> +void skb_cb_set_mpls_bos(struct sk_buff *skb)
> +{
> +     struct ethhdr *eth;
> +     int nh_ofs;
> +     __be16 dl_type = 0;
> +
> +     skb_reset_mac_header(skb);
> +
> +     eth = eth_hdr(skb);
> +     nh_ofs = sizeof(struct ethhdr);
> +     if (likely(eth->h_proto >= htons(ETH_TYPE_MIN))) {
> +             dl_type = eth->h_proto;
> +
> +             while (dl_type == htons(ETH_P_8021Q) &&
> +                             skb->len >= nh_ofs + sizeof(struct vlan_hdr)) {
> +                     struct vlan_hdr *vh = (struct vlan_hdr*)(skb->data + 
> nh_ofs);
> +                     dl_type = vh->h_vlan_encapsulated_proto;
> +                     nh_ofs += sizeof(struct vlan_hdr);
> +             }
> +
> +             OVS_CB(skb)->mpls_bos = nh_ofs;
> +     } else {
> +             OVS_CB(skb)->mpls_bos = 0;
> +     }
> +}

This function definition has nothing to that refers to MPLS. As it is called
for every packet that enters the datapath, "mpls_bos" seems a misnormer.

The condition for setting "mpls_bos" to 0 seems to be non-presense of a
valid Ethernet header, rather than non-presense of the MPLS stack.

Again, using this for updates when VLAN headers are pushed or popped
seems a bit excessive.

> +
> +unsigned char *skb_cb_mpls_bos(const struct sk_buff *skb)
> +{
> +     return OVS_CB(skb)->mpls_bos ?
> +             skb_mac_header(skb) + OVS_CB(skb)->mpls_bos : NULL;
> +}

Note returning NULL here v.s. the use in set/get_ethertype().

In the current apparent semantics (see below) it would be sensible to
just "return skb_mac_header(skb) + OVS_CB(skb)->mpls_bos;".

> +
> +ptrdiff_t skb_cb_mpls_bos_offset(const struct sk_buff *skb)
> +{
> +     return OVS_CB(skb)->mpls_bos;
> +}
> +

This function could be renamed as "skb_cb_l2_size()".

> @@ -835,6 +885,8 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, 
> struct genl_info *info)
>       packet->priority = flow->key.phy.priority;
>       skb_set_mark(packet, flow->key.phy.skb_mark);
> 
> +     skb_cb_set_mpls_bos(packet);
> +
>       rcu_read_lock();
>       dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
>       err = -ENODEV;
> diff --git a/datapath/datapath.h b/datapath/datapath.h
> index af2e5a1..e44d053 100644
> --- a/datapath/datapath.h
> +++ b/datapath/datapath.h
> @@ -98,6 +98,8 @@ struct datapath {
>  * @flow: The flow associated with this packet.  May be %NULL if no flow.
>  * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the
>  * packet is not being tunneled.
> + * @mpls_bos: Offset of the packet's MPLS stack from the beginning of the
> + * ethernet frame.  It is 0 if no MPLS stack is present.

This is not correct. Current semantics would seem to be:

"Length of the packet's Ethernet header, including any VLAN headers.
This is the offset from the beginning of the ethernet frame where MPLS
stack would be, if one is present. It is 0 when there is no L2 header."

Therefore, and taking a cue from the actual use, it would be somewhat
clearer if "mpls_bos" be renamed as "l2_size".

>  * @ip_summed: Consistently stores L4 checksumming status across different
>  * kernel versions.
>  * @csum_start: Stores the offset from which to start checksumming independent
> @@ -109,6 +111,7 @@ struct datapath {
> struct ovs_skb_cb {
>       struct sw_flow          *flow;
>       struct ovs_key_ipv4_tunnel  *tun_key;
> +     ptrdiff_t       mpls_bos;
> #ifdef NEED_CSUM_NORMALIZE
>       enum csum_type          ip_summed;
>       u16                     csum_start;

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to