Hi,

The LISP tunneling support as of now is not yet ready for upstreaming,
for reasons outlined in this message:

http://openvswitch.org/pipermail/dev/2013-February/025459.html

One solution to the above issues is to make OVS less Ethernet specific,
meaning that it should accept and work with packets/flows without an
Ethernet header.

At a high level, we would introduce layer 3 (tunnel) vports, and LISP
would be such a vport.  Whenever a packet that ingressed on a L2 vport
needs to egress on a L3 vport, we apply the internal pop_eth action
automatically.  For packets going from L3 vports to L2 vports, a
push_eth action would add a MAC header, with addresses determined by ARP
resolution in user space.

I attached a patch to this email with proposed changes to the datapath
to make this happen.  I didn't use git-send-email since it is still
early work, and I don't expect anyone to apply it, just wanted to get
some early feedback on some of the design decisions.

One such decision is how to handle the flow key.  I set all fields in
key->eth to 0, except the type, because we still need to know what kind
of L3 packet do we have.  Since a lot of code is accessing
key->eth.type, this is easier than having this information in a
different place, although it would be more elegant to set this field to
0 as well.  Now, in order to differentiate flows with mac addresses set
to 0 and flows without an Ethernet header, I added a boolean field to
tun_key, to mark L3 flows.  However, if we expect to have non-tunneled
L3 ports (I couldn't find a good reason for this) then we should move it
out into the main flow key structure.

Let me know what you think.

Thanks,
-Lori
diff --git a/datapath/actions.c b/datapath/actions.c
index 0dac658..fcb80a5 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -138,6 +138,26 @@ static int set_eth_addr(struct sk_buff *skb,
 	return 0;
 }
 
+static int pop_eth(struct sk_buff *skb)
+{
+	skb_pull(skb, skb_network_offset(skb));
+	return 0;
+}
+
+static int push_eth(struct sk_buff *skb, const struct ovs_action_push_eth *ethh)
+{
+	int err;
+
+	skb_push(skb, ETH_HLEN);
+
+	err = set_eth_addr(skb, &ethh->addresses);
+	if (unlikely(err))
+		return err;
+
+        eth_hdr(skb)->h_proto = ethh->eth_type;
+	return 0;
+}
+
 static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
 				__be32 *addr, __be32 new_addr)
 {
@@ -504,6 +524,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			err = pop_vlan(skb);
 			break;
 
+		case OVS_ACTION_ATTR_PUSH_ETH:
+			err = push_eth(skb, nla_data(a));
+			break;
+
+		case OVS_ACTION_ATTR_POP_ETH:
+			err = pop_eth(skb);
+			break;
+
 		case OVS_ACTION_ATTR_SET:
 			err = execute_set_action(skb, nla_data(a));
 			break;
diff --git a/datapath/flow.c b/datapath/flow.c
index 7f897bd..98346d5 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -628,24 +628,27 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 
 	skb_reset_mac_header(skb);
 
-	/* Link layer.  We are guaranteed to have at least the 14 byte Ethernet
-	 * header in the linear data area.
-	 */
-	eth = eth_hdr(skb);
-	memcpy(key->eth.src, eth->h_source, ETH_ALEN);
-	memcpy(key->eth.dst, eth->h_dest, ETH_ALEN);
+	/* Link layer. */
+	if (key->tun_key.is_layer3) {
+		/* The L3 tunnel should set the inner packet protocol on the skb */
+		key->eth.type = skb->protocol;
+	} else {
+		eth = eth_hdr(skb);
+		memcpy(key->eth.src, eth->h_source, ETH_ALEN);
+		memcpy(key->eth.dst, eth->h_dest, ETH_ALEN);
 
-	__skb_pull(skb, 2 * ETH_ALEN);
+		__skb_pull(skb, 2 * ETH_ALEN);
 
-	if (vlan_tx_tag_present(skb))
-		key->eth.tci = htons(vlan_get_tci(skb));
-	else if (eth->h_proto == htons(ETH_P_8021Q))
-		if (unlikely(parse_vlan(skb, key)))
-			return -ENOMEM;
+		if (vlan_tx_tag_present(skb))
+			key->eth.tci = htons(vlan_get_tci(skb));
+		else if (eth->h_proto == htons(ETH_P_8021Q))
+			if (unlikely(parse_vlan(skb, key)))
+				return -ENOMEM;
 
-	key->eth.type = parse_ethertype(skb);
-	if (unlikely(key->eth.type == htons(0)))
-		return -ENOMEM;
+		key->eth.type = parse_ethertype(skb);
+		if (unlikely(key->eth.type == htons(0)))
+			return -ENOMEM;
+	}
 
 	skb_reset_network_header(skb);
 	__skb_push(skb, skb->data - skb_mac_header(skb));
@@ -1034,6 +1037,9 @@ int ipv4_tun_from_nlattr(const struct nlattr *attr,
 		case OVS_TUNNEL_KEY_ATTR_CSUM:
 			tun_key->tun_flags |= OVS_TNL_F_CSUM;
 			break;
+		case OVS_TUNNEL_KEY_ATTR_LAYER3:
+			tun_key->is_layer3 = true;
+			break;
 		default:
 			return -EINVAL;
 
@@ -1079,6 +1085,9 @@ int ipv4_tun_to_nlattr(struct sk_buff *skb,
 	if ((tun_key->tun_flags & OVS_TNL_F_CSUM) &&
 		nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
 		return -EMSGSIZE;
+	if ((tun_key->is_layer3) &&
+		nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_LAYER3))
+		return -EMSGSIZE;
 
 	nla_nest_end(skb, nla);
 	return 0;
@@ -1140,13 +1149,13 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 	}
 
 	/* Data attributes. */
-	if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET)))
-		return -EINVAL;
-	attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
+	if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
+		eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
+		memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN);
+		memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN);
 
-	eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
-	memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN);
-	memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN);
+		attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
+	}
 
 	if (attrs & (1u << OVS_KEY_ATTR_ETHERTYPE) &&
 	    nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q)) {
@@ -1335,7 +1344,7 @@ int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, const stru
 int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 {
 	struct ovs_key_ethernet *eth_key;
-	struct nlattr *nla, *encap;
+	struct nlattr *nla, *encap = NULL;
 
 	if (swkey->phy.priority &&
 	    nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority))
@@ -1353,6 +1362,9 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 	    nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, swkey->phy.skb_mark))
 		goto nla_put_failure;
 
+	if (swkey->tun_key.is_layer3)
+		goto noethernet;
+
 	nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
 	if (!nla)
 		goto nla_put_failure;
@@ -1374,6 +1386,7 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 	if (swkey->eth.type == htons(ETH_P_802_2))
 		goto unencap;
 
+noethernet:
 	if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, swkey->eth.type))
 		goto nla_put_failure;
 
diff --git a/datapath/flow.h b/datapath/flow.h
index dba66cf..0033fcd 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -57,6 +57,7 @@ struct ovs_key_ipv4_tunnel {
 	u16  tun_flags;
 	u8   ipv4_tos;
 	u8   ipv4_ttl;
+	bool is_layer3;
 };
 
 struct sw_flow_key {
diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c
index 3d0a315..a1a5d22 100644
--- a/datapath/vport-lisp.c
+++ b/datapath/vport-lisp.c
@@ -197,8 +197,6 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
 	struct iphdr *iph, *inner_iph;
 	struct ovs_key_ipv4_tunnel tun_key;
 	__be64 key;
-	struct ethhdr *ethh;
-	__be16 protocol;
 
 	lisp_port = lisp_find_port(dev_net(skb->dev), udp_hdr(skb)->dest);
 	if (unlikely(!lisp_port))
@@ -224,22 +222,15 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
 	inner_iph = (struct iphdr *)(lisph + 1);
 	switch (inner_iph->version) {
 	case 4:
-		protocol = htons(ETH_P_IP);
+		skb->protocol = htons(ETH_P_IP);
 		break;
 	case 6:
-		protocol = htons(ETH_P_IPV6);
+		skb->protocol = htons(ETH_P_IPV6);
 		break;
 	default:
 		goto error;
 	}
 
-	/* Add Ethernet header */
-	ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN);
-	memset(ethh, 0, ETH_HLEN);
-	ethh->h_dest[0] = 0x02;
-	ethh->h_source[0] = 0x02;
-	ethh->h_proto = protocol;
-
 	ovs_tnl_rcv(vport_from_priv(lisp_port), skb, &tun_key);
 	goto out;
 
@@ -361,9 +352,6 @@ error:
 
 static int lisp_tnl_send(struct vport *vport, struct sk_buff *skb)
 {
-	int tnl_len;
-	int network_offset = skb_network_offset(skb);
-
 	if (unlikely(!OVS_CB(skb)->tun_key))
 		return -EINVAL;
 
@@ -371,11 +359,8 @@ static int lisp_tnl_send(struct vport *vport, struct sk_buff *skb)
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
 	case htons(ETH_P_IPV6):
-		/* Pop off "inner" Ethernet header */
-		skb_pull(skb, network_offset);
-		tnl_len = ovs_tnl_send(vport, skb, IPPROTO_UDP,
+		return ovs_tnl_send(vport, skb, IPPROTO_UDP,
 				LISP_HLEN, lisp_build_header);
-		return tnl_len > 0 ? tnl_len + network_offset : tnl_len;
 	default:
 		kfree_skb(skb);
 		return 0;
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index e890fd8..dbff58f 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -303,6 +303,7 @@ enum ovs_tunnel_key_attr {
 	OVS_TUNNEL_KEY_ATTR_TTL,		/* u8 Tunnel IP TTL. */
 	OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT,	/* No argument, set DF. */
 	OVS_TUNNEL_KEY_ATTR_CSUM,		/* No argument. CSUM packet. */
+	OVS_TUNNEL_KEY_ATTR_LAYER3,		/* No argument. Layer 3 tunnel. */
 	__OVS_TUNNEL_KEY_ATTR_MAX
 };
 
@@ -494,6 +495,16 @@ struct ovs_action_push_vlan {
 };
 
 /**
+ * struct ovs_action_push_eth - %OVS_ACTION_ATTR_PUSH_ETH action argument.
+ * @addresses: Source and destination MAC addresses.
+ * @eth_type: Ethernet type
+ */
+struct ovs_action_push_eth {
+	struct ovs_key_ethernet addresses;
+	__be16	 eth_type;
+};
+
+/**
  * enum ovs_action_attr - Action types.
  *
  * @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
@@ -532,6 +543,8 @@ enum ovs_action_attr {
 	OVS_ACTION_ATTR_SAMPLE,       /* Nested OVS_SAMPLE_ATTR_*. */
 	OVS_ACTION_ATTR_PUSH_MPLS,    /* struct ovs_action_push_mpls. */
 	OVS_ACTION_ATTR_POP_MPLS,     /* __be16 ethertype. */
+	OVS_ACTION_ATTR_PUSH_ETH,     /* struct ovs_action_push_eth. */
+	OVS_ACTION_ATTR_POP_ETH,      /* No argument. */
 	__OVS_ACTION_ATTR_MAX
 };
 
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to