Implementation of the pop_eth and push_eth actions in the kernel, also
layer 3 flow support.  Jesse Gross provided feedback on a previous
version of this RFC patch, all of those comments are resolved here.

Signed-off-by: Lorand Jakab <loja...@cisco.com>
---
 datapath/actions.c      | 32 ++++++++++++++++++++++++++++++++
 datapath/datapath.h     |  1 +
 datapath/flow.c         | 43 ++++++++++++++++++++++++-------------------
 datapath/flow.h         |  1 +
 datapath/flow_netlink.c | 20 ++++++++++++++++++--
 datapath/vport-gre.c    |  1 +
 datapath/vport-lisp.c   | 17 +++--------------
 datapath/vport-netdev.c |  1 +
 datapath/vport-vxlan.c  |  1 +
 9 files changed, 82 insertions(+), 35 deletions(-)

diff --git a/datapath/actions.c b/datapath/actions.c
index 30ea1d2..b90e715 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -143,6 +143,30 @@ static int set_eth_addr(struct sk_buff *skb,
        return 0;
 }
 
+static int pop_eth(struct sk_buff *skb)
+{
+       skb_pull(skb, skb_network_offset(skb));
+       return 0;
+}
+
+static int push_eth(struct sk_buff *skb, const struct ovs_action_push_eth 
*ethh)
+{
+       int err;
+
+       skb_push(skb, ETH_HLEN);
+       skb_reset_mac_header(skb);
+
+       err = set_eth_addr(skb, &ethh->addresses);
+       if (unlikely(err))
+               return err;
+
+       eth_hdr(skb)->h_proto = ethh->eth_type;
+
+       ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
+
+       return 0;
+}
+
 static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
                                __be32 *addr, __be32 new_addr)
 {
@@ -546,6 +570,14 @@ static int do_execute_actions(struct datapath *dp, struct 
sk_buff *skb,
                        err = pop_vlan(skb);
                        break;
 
+               case OVS_ACTION_ATTR_PUSH_ETH:
+                       err = push_eth(skb, nla_data(a));
+                       break;
+
+               case OVS_ACTION_ATTR_POP_ETH:
+                       err = pop_eth(skb);
+                       break;
+
                case OVS_ACTION_ATTR_SET:
                        err = execute_set_action(skb, nla_data(a));
                        break;
diff --git a/datapath/datapath.h b/datapath/datapath.h
index b3ae7cd..f470f14 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -105,6 +105,7 @@ struct ovs_skb_cb {
        struct sw_flow          *flow;
        struct sw_flow_key      *pkt_key;
        struct ovs_key_ipv4_tunnel  *tun_key;
+       bool is_layer3;
 };
 #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
 
diff --git a/datapath/flow.c b/datapath/flow.c
index 8be3801..e31fbed 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -458,26 +458,31 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, 
struct sw_flow_key *key)
 
        skb_reset_mac_header(skb);
 
-       /* Link layer.  We are guaranteed to have at least the 14 byte Ethernet
-        * header in the linear data area.
-        */
-       eth = eth_hdr(skb);
-       memcpy(key->eth.src, eth->h_source, ETH_ALEN);
-       memcpy(key->eth.dst, eth->h_dest, ETH_ALEN);
-
-       __skb_pull(skb, 2 * ETH_ALEN);
-       /* We are going to push all headers that we pull, so no need to
-        * update skb->csum here. */
-
-       if (vlan_tx_tag_present(skb))
-               key->eth.tci = htons(vlan_get_tci(skb));
-       else if (eth->h_proto == htons(ETH_P_8021Q))
-               if (unlikely(parse_vlan(skb, key)))
+       /* Link layer. */
+       if (OVS_CB(skb)->is_layer3) {
+               /* The receiving L3 vport should set the inner packet protocol
+                * on the skb.  We use that here to set eth.type */
+               key->noeth = true;
+               key->eth.type = skb->protocol;
+       } else {
+               eth = eth_hdr(skb);
+               memcpy(key->eth.src, eth->h_source, ETH_ALEN);
+               memcpy(key->eth.dst, eth->h_dest, ETH_ALEN);
+
+               /* We are going to push all headers that we pull, so no need to
+                * update skb->csum here. */
+               __skb_pull(skb, 2 * ETH_ALEN);
+
+               if (vlan_tx_tag_present(skb))
+                       key->eth.tci = htons(vlan_get_tci(skb));
+               else if (eth->h_proto == htons(ETH_P_8021Q))
+                       if (unlikely(parse_vlan(skb, key)))
+                               return -ENOMEM;
+
+               key->eth.type = parse_ethertype(skb);
+               if (unlikely(key->eth.type == htons(0)))
                        return -ENOMEM;
-
-       key->eth.type = parse_ethertype(skb);
-       if (unlikely(key->eth.type == htons(0)))
-               return -ENOMEM;
+       }
 
        skb_reset_network_header(skb);
        __skb_push(skb, skb->data - skb_mac_header(skb));
diff --git a/datapath/flow.h b/datapath/flow.h
index eafcfd8..df2fb05 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -69,6 +69,7 @@ static inline void ovs_flow_tun_key_init(struct 
ovs_key_ipv4_tunnel *tun_key,
 
 struct sw_flow_key {
        struct ovs_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel key. */
+       bool noeth;                     /* Packet has no Ethernet header */
        struct {
                u32     priority;       /* Packet QoS priority. */
                u32     skb_mark;       /* SKB mark. */
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index 9b26528..8f71c49 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -106,7 +106,7 @@ static u16 range_n_bytes(const struct sw_flow_key_range 
*range)
 static bool match_validate(const struct sw_flow_match *match,
                           u64 key_attrs, u64 mask_attrs)
 {
-       u64 key_expected = 1ULL << OVS_KEY_ATTR_ETHERNET;
+       u64 key_expected = 0;
        u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
 
        /* The following mask attributes allowed only if they
@@ -125,6 +125,7 @@ static bool match_validate(const struct sw_flow_match 
*match,
        /* Always allowed mask fields. */
        mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL)
                       | (1ULL << OVS_KEY_ATTR_IN_PORT)
+                      | (1ULL << OVS_KEY_ATTR_ETHERNET)
                       | (1ULL << OVS_KEY_ATTR_ETHERTYPE));
 
        /* Check key attributes. */
@@ -521,6 +522,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match 
*match,  bool *exact_5tuple
                SW_FLOW_KEY_MEMCPY(match, eth.dst,
                                eth_key->eth_dst, ETH_ALEN, is_mask);
                attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERNET);
+       } else {
+               SW_FLOW_KEY_PUT(match, noeth, true, is_mask);
        }
 
        if (attrs & (1ULL << OVS_KEY_ATTR_VLAN)) {
@@ -951,7 +954,7 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
                     const struct sw_flow_key *output, struct sk_buff *skb)
 {
        struct ovs_key_ethernet *eth_key;
-       struct nlattr *nla, *encap;
+       struct nlattr *nla, *encap = NULL;
        bool is_mask = (swkey != output);
 
        if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
@@ -977,6 +980,9 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
        if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
                goto nla_put_failure;
 
+       if (swkey->noeth)
+               goto noethernet;
+
        nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
        if (!nla)
                goto nla_put_failure;
@@ -1011,6 +1017,7 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
                goto unencap;
        }
 
+noethernet:
        if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
                goto nla_put_failure;
 
@@ -1512,6 +1519,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
                static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
                        [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
                        [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
+                       [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct 
ovs_action_push_eth),
+                       [OVS_ACTION_ATTR_POP_ETH] = 0,
                        [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct 
ovs_action_push_vlan),
                        [OVS_ACTION_ATTR_POP_VLAN] = 0,
                        [OVS_ACTION_ATTR_SET] = (u32)-1,
@@ -1543,6 +1552,13 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
                        break;
 
 
+               case OVS_ACTION_ATTR_POP_ETH:
+                       break;
+
+               case OVS_ACTION_ATTR_PUSH_ETH:
+                       /* TODO May need to validate eth_type? */
+                       break;
+
                case OVS_ACTION_ATTR_POP_VLAN:
                        break;
 
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index 8737b63..2737cd2 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -112,6 +112,7 @@ static int gre_rcv(struct sk_buff *skb,
        key = key_to_tunnel_id(tpi->key, tpi->seq);
        ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, 
filter_tnl_flags(tpi->flags));
 
+       OVS_CB(skb)->is_layer3 = false;
        ovs_vport_receive(vport, skb, &tun_key);
        return PACKET_RCVD;
 }
diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c
index c2698ae..b56eec7 100644
--- a/datapath/vport-lisp.c
+++ b/datapath/vport-lisp.c
@@ -218,8 +218,6 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
        struct iphdr *iph, *inner_iph;
        struct ovs_key_ipv4_tunnel tun_key;
        __be64 key;
-       struct ethhdr *ethh;
-       __be16 protocol;
 
        lisp_port = lisp_find_port(dev_net(skb->dev), udp_hdr(skb)->dest);
        if (unlikely(!lisp_port))
@@ -243,25 +241,16 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
        inner_iph = (struct iphdr *)(lisph + 1);
        switch (inner_iph->version) {
        case 4:
-               protocol = htons(ETH_P_IP);
+               skb->protocol = htons(ETH_P_IP);
                break;
        case 6:
-               protocol = htons(ETH_P_IPV6);
+               skb->protocol = htons(ETH_P_IPV6);
                break;
        default:
                goto error;
        }
-       skb->protocol = protocol;
-
-       /* Add Ethernet header */
-       ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN);
-       memset(ethh, 0, ETH_HLEN);
-       ethh->h_dest[0] = 0x02;
-       ethh->h_source[0] = 0x02;
-       ethh->h_proto = protocol;
-
-       ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
 
+       OVS_CB(skb)->is_layer3 = true;
        ovs_vport_receive(vport_from_priv(lisp_port), skb, &tun_key);
        goto out;
 
diff --git a/datapath/vport-netdev.c b/datapath/vport-netdev.c
index c15923b..14430e0 100644
--- a/datapath/vport-netdev.c
+++ b/datapath/vport-netdev.c
@@ -209,6 +209,7 @@ static void netdev_port_receive(struct vport *vport, struct 
sk_buff *skb)
        skb_push(skb, ETH_HLEN);
        ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
 
+       OVS_CB(skb)->is_layer3 = false;
        ovs_vport_receive(vport, skb, NULL);
        return;
 
diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
index ab2b6f7..dfac617 100644
--- a/datapath/vport-vxlan.c
+++ b/datapath/vport-vxlan.c
@@ -70,6 +70,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff 
*skb, __be32 vx_vni)
        key = cpu_to_be64(ntohl(vx_vni) >> 8);
        ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
 
+       OVS_CB(skb)->is_layer3 = false;
        ovs_vport_receive(vport, skb, &tun_key);
 }
 
-- 
1.8.3.4 (Apple Git-47)

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to