Hello,

Currently, OVS GRE tunnels use Ethertype 6558 and the GRE packets 
produced by OVS hence always are xxx-over-Ethernet-over-GRE. 
Symmetrically OVS expects received GRE packets to be of the same 
ethertype and carry an Ethernet payload.

I have written the included patch, which does the following:
- add a "noeth" option for a GRE tunnel, so that the Ethernet header is 
stripped before GRE encapsulation (the option, of course, defaults to 
the current behavior)
- on reception, recreates a fake Ethernet header if the ethertype of the 
received packet is not 6558 -- this behavior is similar to what the LISP 
tunneling code does

Note that, in both cases, the code takes care of preserving correct 
ethertypes.

As a result, this patch allows to setup GRE tunnels to interconnect OVS 
with network devices doing a usual x-over-GRE approach such as IP/MPLS 
routers.

The patch builds and passes unit tests.
It has been tested to confirm with a packet capture that:
- with the option set on a tunnel, the GRE packets from this tunnel are 
send and received as expected
- for a tunnel without this option, or we the option disabled, the 
current behavior is preserved

Caveats:
- documentation and unit test are missing, but I'll be happy to 
contribute these later
- I not an experimented C coder, please be kind :)
- some parts of the code are a bit of a guess work from me: I'm unsure 
of what needs to be done for GSO (possibly it can be done later?), or if 
re-checksumming is really needed after recreating a fake Ethernet header

I'd be happy to have comments on the approach and on the patch and to 
take them into account.

Thank you,

-Thomas

  datapath/datapath.c                            |    1
  datapath/flow_netlink.c                        |    7 +++
  datapath/linux/compat/include/net/gre.h        |    4 +
  datapath/linux/compat/include/net/ip_tunnels.h |    1
  datapath/vport-gre.c                           |   57 
++++++++++++++++++++++++-
  include/linux/openvswitch.h                    |    1
  lib/flow.c                                     |    2
  lib/flow.h                                     |    1
  lib/netdev-vport.c                             |    9 +++
  lib/netdev.h                                   |    1
  lib/odp-util.c                                 |   11 ++++
  lib/odp-util.h                                 |    3 -
  ofproto/tunnel.c                               |    7 ++-
  13 files changed, 101 insertions(+), 4 deletions(-)

_________________________________________________________________________________________________________________________

Ce message et ses pieces jointes peuvent contenir des informations 
confidentielles ou privilegiees et ne doivent donc
pas etre diffuses, exploites ou copies sans autorisation. Si vous avez recu ce 
message par erreur, veuillez le signaler
a l'expediteur et le detruire ainsi que les pieces jointes. Les messages 
electroniques etant susceptibles d'alteration,
Orange decline toute responsabilite si ce message a ete altere, deforme ou 
falsifie. Merci.

This message and its attachments may contain confidential or privileged 
information that may be protected by law;
they should not be distributed, used or copied without authorisation.
If you have received this email in error, please notify the sender and delete 
this message and its attachments.
As emails may be altered, Orange is not liable for messages that have been 
modified, changed or falsified.
Thank you.

diff --git a/datapath/datapath.c b/datapath/datapath.c
index f7c3391..cc01eb0 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -361,6 +361,7 @@ static size_t key_attr_size(void)
 		  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TTL */
 		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
 		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_CSUM */
+		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_NOETH */
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
 		+ nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index 40751cb..8c09deb 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -344,6 +344,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 			[OVS_TUNNEL_KEY_ATTR_TTL] = 1,
 			[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
 			[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
+			[OVS_TUNNEL_KEY_ATTR_NOETH] = 0,
 		};
 
 		if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
@@ -388,6 +389,9 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 		case OVS_TUNNEL_KEY_ATTR_CSUM:
 			tun_flags |= TUNNEL_CSUM;
 			break;
+		case OVS_TUNNEL_KEY_ATTR_NOETH:
+			tun_flags |= TUNNEL_NOETH;
+			break;
 		default:
 			return -EINVAL;
 		}
@@ -445,6 +449,9 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
 	if ((output->tun_flags & TUNNEL_CSUM) &&
 		nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
 		return -EMSGSIZE;
+	if ((output->tun_flags & TUNNEL_NOETH) &&
+		nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_NOETH))
+		return -EMSGSIZE;
 
 	nla_nest_end(skb, nla);
 	return 0;
diff --git a/datapath/linux/compat/include/net/gre.h b/datapath/linux/compat/include/net/gre.h
index a6f29c4..19ea24c 100644
--- a/datapath/linux/compat/include/net/gre.h
+++ b/datapath/linux/compat/include/net/gre.h
@@ -50,6 +50,8 @@ static inline __be16 gre_flags_to_tnl_flags(__be16 flags)
 		tflags |= TUNNEL_REC;
 	if (flags & GRE_VERSION)
 		tflags |= TUNNEL_VERSION;
+	if (flags & GRE_NOETH)
+		tflags |= TUNNEL_NOETH; 
 
 	return tflags;
 }
@@ -72,6 +74,8 @@ static inline __be16 tnl_flags_to_gre_flags(__be16 tflags)
 		flags |= GRE_REC;
 	if (tflags & TUNNEL_VERSION)
 		flags |= GRE_VERSION;
+	if (tflags & TUNNEL_NOETH)
+		flags |= GRE_NOETH;
 
 	return flags;
 }
diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h
index a786aa9..1b55076 100644
--- a/datapath/linux/compat/include/net/ip_tunnels.h
+++ b/datapath/linux/compat/include/net/ip_tunnels.h
@@ -20,6 +20,7 @@
 #define TUNNEL_VERSION	__cpu_to_be16(0x40)
 #define TUNNEL_NO_KEY	__cpu_to_be16(0x80)
 #define TUNNEL_DONT_FRAGMENT	__cpu_to_be16(0x0100)
+#define TUNNEL_NOETH	__cpu_to_be16(0x0200)
 
 struct tnl_ptk_info {
 	__be16 flags;
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index 8737b63..4992f93 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -75,7 +75,40 @@ static struct sk_buff *__build_header(struct sk_buff *skb,
 
 	tpi.flags = filter_tnl_flags(tun_key->tun_flags) | gre64_flag;
 
-	tpi.proto = htons(ETH_P_TEB);
+	if (! (OVS_CB(skb)->tun_key->tun_flags & TUNNEL_NOETH)) {
+        	tpi.proto = htons(ETH_P_TEB);
+	} else {
+		/* if TUNNEL_NOETH option was set, we strip the Ethernet and VLAN 
+		headers, and extract the correct protocol type, which will be used 
+		as GRE ethertype. We can't use skb_pull(skb, skb_network_offset(skb)), 
+                which would remove the MPLS header too (not what we want here). 
+                The code does a similar loop as in skb_network_protocol. */
+                __be16 type = skb->protocol;
+		int strip_len = ETH_HLEN;
+
+		/* increase strip_len and remember ethertype for
+		   each intermediate VLAN header */
+		while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
+			struct vlan_hdr *vh;
+
+			if (unlikely(!pskb_may_pull(skb, strip_len + VLAN_HLEN)))
+				return 0;
+
+			vh = (struct vlan_hdr *)(skb->data + strip_len);
+			type = vh->h_vlan_encapsulated_proto;
+			strip_len += VLAN_HLEN;
+		}
+
+		tpi.proto = type;
+
+		__skb_pull(skb, strip_len);
+		
+		/* TM: this is based on vport-lisp.c, unsure if it really applies here, and more is possibly needed for GSO ?? */
+                skb_reset_mac_header(skb);
+		vlan_set_tci(skb, 0);
+		skb_reset_inner_headers(skb); 
+	}
+
 	tpi.key = be64_get_low32(tun_key->tun_id);
 	tpi.seq = seq;
 	gre_build_header(skb, &tpi, tunnel_hlen);
@@ -112,6 +145,28 @@ static int gre_rcv(struct sk_buff *skb,
 	key = key_to_tunnel_id(tpi->key, tpi->seq);
 	ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, filter_tnl_flags(tpi->flags));
 
+	if (tpi->proto != htons(ETH_P_TEB)) {
+		/* if the remote tunnel endpoint is an OVS configured with NOETH, or 
+		any other GRE implementation not using Transparent Ethernet Bridging, 
+		we need to create a fake Ethernet header for further processing by OVS */
+		struct ethhdr *ethh;
+		int err;
+
+		err = pskb_expand_head(skb, 14, 0, GFP_ATOMIC);
+		if (unlikely(err))
+			return PACKET_REJECT;
+
+		ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN);
+		memset(ethh, 0, ETH_HLEN);
+		ethh->h_dest[0] = 0x02;
+		ethh->h_dest[5] = 0x01;
+		ethh->h_source[0] = 0x02;
+		ethh->h_source[5] = 0x02;
+		ethh->h_proto = tpi->proto;
+
+		ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); /* needed ?*/
+ 	}
+
 	ovs_vport_receive(vport, skb, &tun_key);
 	return PACKET_RCVD;
 }
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index d1ff5ec..b032a07 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -328,6 +328,7 @@ enum ovs_tunnel_key_attr {
 	OVS_TUNNEL_KEY_ATTR_TTL,		/* u8 Tunnel IP TTL. */
 	OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT,	/* No argument, set DF. */
 	OVS_TUNNEL_KEY_ATTR_CSUM,		/* No argument. CSUM packet. */
+	OVS_TUNNEL_KEY_ATTR_NOETH,		/* No argument. For GRE: do not use TEB ethertype and strip the Eth header. */
 	__OVS_TUNNEL_KEY_ATTR_MAX
 };
 
diff --git a/lib/flow.c b/lib/flow.c
index e7fe4d3..699ed46 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -563,6 +563,8 @@ flow_tun_flag_to_string(uint32_t flags)
         return "csum";
     case FLOW_TNL_F_KEY:
         return "key";
+    case FLOW_TNL_F_NOETH:
+        return "noeth";
     default:
         return NULL;
     }
diff --git a/lib/flow.h b/lib/flow.h
index 3109a84..f172a1b 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -56,6 +56,7 @@ BUILD_ASSERT_DECL(FLOW_NW_FRAG_LATER == NX_IP_FRAG_LATER);
 #define FLOW_TNL_F_DONT_FRAGMENT (1 << 0)
 #define FLOW_TNL_F_CSUM (1 << 1)
 #define FLOW_TNL_F_KEY (1 << 2)
+#define FLOW_TNL_F_NOETH (1 << 3)
 
 const char *flow_tun_flag_to_string(uint32_t flags);
 
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 165c1c6..3198cc4 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -328,6 +328,7 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args)
     needs_dst_port = netdev_vport_needs_dst_port(dev_);
     tnl_cfg.ipsec = strstr(type, "ipsec");
     tnl_cfg.dont_fragment = true;
+    tnl_cfg.noeth = false;
 
     SMAP_FOR_EACH (node, args) {
         if (!strcmp(node->key, "remote_ip")) {
@@ -383,6 +384,10 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args)
             if (!strcmp(node->value, "false")) {
                 tnl_cfg.dont_fragment = false;
             }
+        } else if (!strcmp(node->key, "noeth")) {
+            if (!strcmp(node->value, "true")) {
+                tnl_cfg.noeth = true;
+            }
         } else if (!strcmp(node->key, "peer_cert") && tnl_cfg.ipsec) {
             if (smap_get(args, "certificate")) {
                 ipsec_mech_set = true;
@@ -562,6 +567,10 @@ get_tunnel_config(const struct netdev *dev, struct smap *args)
     if (!tnl_cfg.dont_fragment) {
         smap_add(args, "df_default", "false");
     }
+    
+    if (tnl_cfg.noeth) {
+        smap_add(args, "noeth", "true");
+    }
 
     return 0;
 }
diff --git a/lib/netdev.h b/lib/netdev.h
index 410c35b..88d7771 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -126,6 +126,7 @@ struct netdev_tunnel_config {
     bool csum;
     bool ipsec;
     bool dont_fragment;
+    bool noeth;
 };
 
 void netdev_run(void);
diff --git a/lib/odp-util.c b/lib/odp-util.c
index e20564f..666953e 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -801,6 +801,7 @@ tunnel_key_attr_len(int type)
     case OVS_TUNNEL_KEY_ATTR_TTL: return 1;
     case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: return 0;
     case OVS_TUNNEL_KEY_ATTR_CSUM: return 0;
+    case OVS_TUNNEL_KEY_ATTR_NOETH: return 0;
     case __OVS_TUNNEL_KEY_ATTR_MAX:
         return -1;
     }
@@ -848,6 +849,9 @@ odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun)
         case OVS_TUNNEL_KEY_ATTR_CSUM:
             tun->flags |= FLOW_TNL_F_CSUM;
             break;
+        case OVS_TUNNEL_KEY_ATTR_NOETH:
+            tun->flags |= FLOW_TNL_F_NOETH;
+            break;
         default:
             /* Allow this to show up as unexpected, if there are unknown
              * tunnel attribute, eventually resulting in ODP_FIT_TOO_MUCH. */
@@ -891,6 +895,9 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key)
     if (tun_key->flags & FLOW_TNL_F_CSUM) {
         nl_msg_put_flag(a, OVS_TUNNEL_KEY_ATTR_CSUM);
     }
+    if (tun_key->flags & FLOW_TNL_F_NOETH) {
+        nl_msg_put_flag(a, OVS_TUNNEL_KEY_ATTR_NOETH);
+    }
 
     nl_msg_end_nested(a, tun_key_ofs);
 }
@@ -917,7 +924,9 @@ odp_mask_attr_is_exact(const struct nlattr *ma)
         odp_tun_key_from_attr(ma, &tun_mask);
         if (tun_mask.flags == (FLOW_TNL_F_KEY
                                | FLOW_TNL_F_DONT_FRAGMENT
-                               | FLOW_TNL_F_CSUM)) {
+                               | FLOW_TNL_F_CSUM
+                               | FLOW_TNL_F_NOETH  /* TODO: I'm unsure... */
+                              )) {
             /* The flags are exact match, check the remaining fields. */
             tun_mask.flags = 0xffff;
             is_exact = is_all_ones((uint8_t *)&tun_mask,
diff --git a/lib/odp-util.h b/lib/odp-util.h
index 7bc64c7..6ced8c6 100644
--- a/lib/odp-util.h
+++ b/lib/odp-util.h
@@ -106,6 +106,7 @@ void odp_portno_names_destroy(struct hmap *portno_names);
  *  - OVS_TUNNEL_KEY_ATTR_TTL            1    3      4      8
  *  - OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT  0    --     4      4
  *  - OVS_TUNNEL_KEY_ATTR_CSUM           0    --     4      4
+ *  - OVS_TUNNEL_KEY_ATTR_NOETH          0    --     4      4
  *  OVS_KEY_ATTR_IN_PORT                 4    --     4      8
  *  OVS_KEY_ATTR_SKB_MARK                4    --     4      8
  *  OVS_KEY_ATTR_ETHERNET               12    --     4     16
@@ -117,7 +118,7 @@ void odp_portno_names_destroy(struct hmap *portno_names);
  *  OVS_KEY_ATTR_ICMPV6                  2     2     4      8
  *  OVS_KEY_ATTR_ND                     28    --     4     32
  *  ----------------------------------------------------------
- *  total                                                 208
+ *  total                                                 212
  *
  * We include some slack space in case the calculation isn't quite right or we
  * add another field and forget to adjust this value.
diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c
index 38b782f..4e1e7d2 100644
--- a/ofproto/tunnel.c
+++ b/ofproto/tunnel.c
@@ -406,7 +406,8 @@ tnl_port_send(const struct ofport_dpif *ofport, struct flow *flow,
 
     flow->tunnel.flags = (cfg->dont_fragment ? FLOW_TNL_F_DONT_FRAGMENT : 0)
         | (cfg->csum ? FLOW_TNL_F_CSUM : 0)
-        | (cfg->out_key_present ? FLOW_TNL_F_KEY : 0);
+        | (cfg->out_key_present ? FLOW_TNL_F_KEY : 0)
+        | (cfg->noeth ? FLOW_TNL_F_NOETH : 0);
 
     if (pre_flow_str) {
         char *post_flow_str = flow_to_string(flow);
@@ -611,6 +612,10 @@ tnl_port_fmt(const struct tnl_port *tnl_port) OVS_REQ_RDLOCK(rwlock)
         ds_put_cstr(&ds, ", csum=true");
     }
 
+    if (cfg->noeth) {
+        ds_put_cstr(&ds, ", noeth=true");
+    }
+
     ds_put_cstr(&ds, ")\n");
 
     return ds_steal_cstr(&ds);
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to