Hello, Currently, OVS GRE tunnels use Ethertype 6558 and the GRE packets produced by OVS hence always are xxx-over-Ethernet-over-GRE. Symmetrically OVS expects received GRE packets to be of the same ethertype and carry an Ethernet payload.
I have written the included patch, which does the following: - add a "noeth" option for a GRE tunnel, so that the Ethernet header is stripped before GRE encapsulation (the option, of course, defaults to the current behavior) - on reception, recreates a fake Ethernet header if the ethertype of the received packet is not 6558 -- this behavior is similar to what the LISP tunneling code does Note that, in both cases, the code takes care of preserving correct ethertypes. As a result, this patch allows to setup GRE tunnels to interconnect OVS with network devices doing a usual x-over-GRE approach such as IP/MPLS routers. The patch builds and passes unit tests. It has been tested to confirm with a packet capture that: - with the option set on a tunnel, the GRE packets from this tunnel are send and received as expected - for a tunnel without this option, or we the option disabled, the current behavior is preserved Caveats: - documentation and unit test are missing, but I'll be happy to contribute these later - I not an experimented C coder, please be kind :) - some parts of the code are a bit of a guess work from me: I'm unsure of what needs to be done for GSO (possibly it can be done later?), or if re-checksumming is really needed after recreating a fake Ethernet header I'd be happy to have comments on the approach and on the patch and to take them into account. Thank you, -Thomas datapath/datapath.c | 1 datapath/flow_netlink.c | 7 +++ datapath/linux/compat/include/net/gre.h | 4 + datapath/linux/compat/include/net/ip_tunnels.h | 1 datapath/vport-gre.c | 57 ++++++++++++++++++++++++- include/linux/openvswitch.h | 1 lib/flow.c | 2 lib/flow.h | 1 lib/netdev-vport.c | 9 +++ lib/netdev.h | 1 lib/odp-util.c | 11 ++++ lib/odp-util.h | 3 - ofproto/tunnel.c | 7 ++- 13 files changed, 101 insertions(+), 4 deletions(-) _________________________________________________________________________________________________________________________ Ce message et ses pieces jointes peuvent contenir des informations confidentielles ou privilegiees et ne doivent donc pas etre diffuses, exploites ou copies sans autorisation. Si vous avez recu ce message par erreur, veuillez le signaler a l'expediteur et le detruire ainsi que les pieces jointes. Les messages electroniques etant susceptibles d'alteration, Orange decline toute responsabilite si ce message a ete altere, deforme ou falsifie. Merci. This message and its attachments may contain confidential or privileged information that may be protected by law; they should not be distributed, used or copied without authorisation. If you have received this email in error, please notify the sender and delete this message and its attachments. As emails may be altered, Orange is not liable for messages that have been modified, changed or falsified. Thank you.
diff --git a/datapath/datapath.c b/datapath/datapath.c index f7c3391..cc01eb0 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -361,6 +361,7 @@ static size_t key_attr_size(void) + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_NOETH */ + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index 40751cb..8c09deb 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -344,6 +344,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, [OVS_TUNNEL_KEY_ATTR_TTL] = 1, [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, + [OVS_TUNNEL_KEY_ATTR_NOETH] = 0, }; if (type > OVS_TUNNEL_KEY_ATTR_MAX) { @@ -388,6 +389,9 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, case OVS_TUNNEL_KEY_ATTR_CSUM: tun_flags |= TUNNEL_CSUM; break; + case OVS_TUNNEL_KEY_ATTR_NOETH: + tun_flags |= TUNNEL_NOETH; + break; default: return -EINVAL; } @@ -445,6 +449,9 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb, if ((output->tun_flags & TUNNEL_CSUM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) return -EMSGSIZE; + if ((output->tun_flags & TUNNEL_NOETH) && + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_NOETH)) + return -EMSGSIZE; nla_nest_end(skb, nla); return 0; diff --git a/datapath/linux/compat/include/net/gre.h b/datapath/linux/compat/include/net/gre.h index a6f29c4..19ea24c 100644 --- a/datapath/linux/compat/include/net/gre.h +++ b/datapath/linux/compat/include/net/gre.h @@ -50,6 +50,8 @@ static inline __be16 gre_flags_to_tnl_flags(__be16 flags) tflags |= TUNNEL_REC; if (flags & GRE_VERSION) tflags |= TUNNEL_VERSION; + if (flags & GRE_NOETH) + tflags |= TUNNEL_NOETH; return tflags; } @@ -72,6 +74,8 @@ static inline __be16 tnl_flags_to_gre_flags(__be16 tflags) flags |= GRE_REC; if (tflags & TUNNEL_VERSION) flags |= GRE_VERSION; + if (tflags & TUNNEL_NOETH) + flags |= GRE_NOETH; return flags; } diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h index a786aa9..1b55076 100644 --- a/datapath/linux/compat/include/net/ip_tunnels.h +++ b/datapath/linux/compat/include/net/ip_tunnels.h @@ -20,6 +20,7 @@ #define TUNNEL_VERSION __cpu_to_be16(0x40) #define TUNNEL_NO_KEY __cpu_to_be16(0x80) #define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100) +#define TUNNEL_NOETH __cpu_to_be16(0x0200) struct tnl_ptk_info { __be16 flags; diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c index 8737b63..4992f93 100644 --- a/datapath/vport-gre.c +++ b/datapath/vport-gre.c @@ -75,7 +75,40 @@ static struct sk_buff *__build_header(struct sk_buff *skb, tpi.flags = filter_tnl_flags(tun_key->tun_flags) | gre64_flag; - tpi.proto = htons(ETH_P_TEB); + if (! (OVS_CB(skb)->tun_key->tun_flags & TUNNEL_NOETH)) { + tpi.proto = htons(ETH_P_TEB); + } else { + /* if TUNNEL_NOETH option was set, we strip the Ethernet and VLAN + headers, and extract the correct protocol type, which will be used + as GRE ethertype. We can't use skb_pull(skb, skb_network_offset(skb)), + which would remove the MPLS header too (not what we want here). + The code does a similar loop as in skb_network_protocol. */ + __be16 type = skb->protocol; + int strip_len = ETH_HLEN; + + /* increase strip_len and remember ethertype for + each intermediate VLAN header */ + while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { + struct vlan_hdr *vh; + + if (unlikely(!pskb_may_pull(skb, strip_len + VLAN_HLEN))) + return 0; + + vh = (struct vlan_hdr *)(skb->data + strip_len); + type = vh->h_vlan_encapsulated_proto; + strip_len += VLAN_HLEN; + } + + tpi.proto = type; + + __skb_pull(skb, strip_len); + + /* TM: this is based on vport-lisp.c, unsure if it really applies here, and more is possibly needed for GSO ?? */ + skb_reset_mac_header(skb); + vlan_set_tci(skb, 0); + skb_reset_inner_headers(skb); + } + tpi.key = be64_get_low32(tun_key->tun_id); tpi.seq = seq; gre_build_header(skb, &tpi, tunnel_hlen); @@ -112,6 +145,28 @@ static int gre_rcv(struct sk_buff *skb, key = key_to_tunnel_id(tpi->key, tpi->seq); ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, filter_tnl_flags(tpi->flags)); + if (tpi->proto != htons(ETH_P_TEB)) { + /* if the remote tunnel endpoint is an OVS configured with NOETH, or + any other GRE implementation not using Transparent Ethernet Bridging, + we need to create a fake Ethernet header for further processing by OVS */ + struct ethhdr *ethh; + int err; + + err = pskb_expand_head(skb, 14, 0, GFP_ATOMIC); + if (unlikely(err)) + return PACKET_REJECT; + + ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN); + memset(ethh, 0, ETH_HLEN); + ethh->h_dest[0] = 0x02; + ethh->h_dest[5] = 0x01; + ethh->h_source[0] = 0x02; + ethh->h_source[5] = 0x02; + ethh->h_proto = tpi->proto; + + ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); /* needed ?*/ + } + ovs_vport_receive(vport, skb, &tun_key); return PACKET_RCVD; } diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index d1ff5ec..b032a07 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -328,6 +328,7 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_TTL, /* u8 Tunnel IP TTL. */ OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */ OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */ + OVS_TUNNEL_KEY_ATTR_NOETH, /* No argument. For GRE: do not use TEB ethertype and strip the Eth header. */ __OVS_TUNNEL_KEY_ATTR_MAX }; diff --git a/lib/flow.c b/lib/flow.c index e7fe4d3..699ed46 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -563,6 +563,8 @@ flow_tun_flag_to_string(uint32_t flags) return "csum"; case FLOW_TNL_F_KEY: return "key"; + case FLOW_TNL_F_NOETH: + return "noeth"; default: return NULL; } diff --git a/lib/flow.h b/lib/flow.h index 3109a84..f172a1b 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -56,6 +56,7 @@ BUILD_ASSERT_DECL(FLOW_NW_FRAG_LATER == NX_IP_FRAG_LATER); #define FLOW_TNL_F_DONT_FRAGMENT (1 << 0) #define FLOW_TNL_F_CSUM (1 << 1) #define FLOW_TNL_F_KEY (1 << 2) +#define FLOW_TNL_F_NOETH (1 << 3) const char *flow_tun_flag_to_string(uint32_t flags); diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 165c1c6..3198cc4 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -328,6 +328,7 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args) needs_dst_port = netdev_vport_needs_dst_port(dev_); tnl_cfg.ipsec = strstr(type, "ipsec"); tnl_cfg.dont_fragment = true; + tnl_cfg.noeth = false; SMAP_FOR_EACH (node, args) { if (!strcmp(node->key, "remote_ip")) { @@ -383,6 +384,10 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args) if (!strcmp(node->value, "false")) { tnl_cfg.dont_fragment = false; } + } else if (!strcmp(node->key, "noeth")) { + if (!strcmp(node->value, "true")) { + tnl_cfg.noeth = true; + } } else if (!strcmp(node->key, "peer_cert") && tnl_cfg.ipsec) { if (smap_get(args, "certificate")) { ipsec_mech_set = true; @@ -562,6 +567,10 @@ get_tunnel_config(const struct netdev *dev, struct smap *args) if (!tnl_cfg.dont_fragment) { smap_add(args, "df_default", "false"); } + + if (tnl_cfg.noeth) { + smap_add(args, "noeth", "true"); + } return 0; } diff --git a/lib/netdev.h b/lib/netdev.h index 410c35b..88d7771 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -126,6 +126,7 @@ struct netdev_tunnel_config { bool csum; bool ipsec; bool dont_fragment; + bool noeth; }; void netdev_run(void); diff --git a/lib/odp-util.c b/lib/odp-util.c index e20564f..666953e 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -801,6 +801,7 @@ tunnel_key_attr_len(int type) case OVS_TUNNEL_KEY_ATTR_TTL: return 1; case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: return 0; case OVS_TUNNEL_KEY_ATTR_CSUM: return 0; + case OVS_TUNNEL_KEY_ATTR_NOETH: return 0; case __OVS_TUNNEL_KEY_ATTR_MAX: return -1; } @@ -848,6 +849,9 @@ odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun) case OVS_TUNNEL_KEY_ATTR_CSUM: tun->flags |= FLOW_TNL_F_CSUM; break; + case OVS_TUNNEL_KEY_ATTR_NOETH: + tun->flags |= FLOW_TNL_F_NOETH; + break; default: /* Allow this to show up as unexpected, if there are unknown * tunnel attribute, eventually resulting in ODP_FIT_TOO_MUCH. */ @@ -891,6 +895,9 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key) if (tun_key->flags & FLOW_TNL_F_CSUM) { nl_msg_put_flag(a, OVS_TUNNEL_KEY_ATTR_CSUM); } + if (tun_key->flags & FLOW_TNL_F_NOETH) { + nl_msg_put_flag(a, OVS_TUNNEL_KEY_ATTR_NOETH); + } nl_msg_end_nested(a, tun_key_ofs); } @@ -917,7 +924,9 @@ odp_mask_attr_is_exact(const struct nlattr *ma) odp_tun_key_from_attr(ma, &tun_mask); if (tun_mask.flags == (FLOW_TNL_F_KEY | FLOW_TNL_F_DONT_FRAGMENT - | FLOW_TNL_F_CSUM)) { + | FLOW_TNL_F_CSUM + | FLOW_TNL_F_NOETH /* TODO: I'm unsure... */ + )) { /* The flags are exact match, check the remaining fields. */ tun_mask.flags = 0xffff; is_exact = is_all_ones((uint8_t *)&tun_mask, diff --git a/lib/odp-util.h b/lib/odp-util.h index 7bc64c7..6ced8c6 100644 --- a/lib/odp-util.h +++ b/lib/odp-util.h @@ -106,6 +106,7 @@ void odp_portno_names_destroy(struct hmap *portno_names); * - OVS_TUNNEL_KEY_ATTR_TTL 1 3 4 8 * - OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT 0 -- 4 4 * - OVS_TUNNEL_KEY_ATTR_CSUM 0 -- 4 4 + * - OVS_TUNNEL_KEY_ATTR_NOETH 0 -- 4 4 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 * OVS_KEY_ATTR_SKB_MARK 4 -- 4 8 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 @@ -117,7 +118,7 @@ void odp_portno_names_destroy(struct hmap *portno_names); * OVS_KEY_ATTR_ICMPV6 2 2 4 8 * OVS_KEY_ATTR_ND 28 -- 4 32 * ---------------------------------------------------------- - * total 208 + * total 212 * * We include some slack space in case the calculation isn't quite right or we * add another field and forget to adjust this value. diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c index 38b782f..4e1e7d2 100644 --- a/ofproto/tunnel.c +++ b/ofproto/tunnel.c @@ -406,7 +406,8 @@ tnl_port_send(const struct ofport_dpif *ofport, struct flow *flow, flow->tunnel.flags = (cfg->dont_fragment ? FLOW_TNL_F_DONT_FRAGMENT : 0) | (cfg->csum ? FLOW_TNL_F_CSUM : 0) - | (cfg->out_key_present ? FLOW_TNL_F_KEY : 0); + | (cfg->out_key_present ? FLOW_TNL_F_KEY : 0) + | (cfg->noeth ? FLOW_TNL_F_NOETH : 0); if (pre_flow_str) { char *post_flow_str = flow_to_string(flow); @@ -611,6 +612,10 @@ tnl_port_fmt(const struct tnl_port *tnl_port) OVS_REQ_RDLOCK(rwlock) ds_put_cstr(&ds, ", csum=true"); } + if (cfg->noeth) { + ds_put_cstr(&ds, ", noeth=true"); + } + ds_put_cstr(&ds, ")\n"); return ds_steal_cstr(&ds);
_______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev