This is a first pass at providing a tun_key which can be used as the basis for flow-based tunnelling. The tun_key includes and replaces the tun_id in both struct ovs_skb_cb and struct sw_tun_key.
This patch allows all existing tun_id behaviour to still work. Existing users of tun_id are redirected to tun_key->tun_id to retain compatibility. However, when the userspace code is updated to make use of the new tun_key, the old behaviour will be deprecated and removed. NOTE: With these changes, the tunneling code no longer assumes input and output keys are symmetric. If they are not, PMTUD needs to be disabled for tunneling to work. Signed-off-by: Kyle Mestery <kmest...@cisco.com> Cc: Simon Horman <ho...@verge.net.au> Cc: Jesse Gross <je...@nicira.com> --- V6: - Fix more comments addressed from Jesse. V5: - Address another round of comments from Jesse. V4: - Address 2 comments from Jesse: - When processing actions, if OVS_CB(skb)->tun_key is NULL, point it at one on the stack temporarily. This goes away when we remove the ability to set tun_id outside the scope of tun_key. - Move tun_key to the end of struct sw_flow_key. V3: - Fix issues found during review by Jesse. - Add a NEWS entry around tunnel code no longer assuming symmetric input and output tunnel keys. V2: - Fix blank line addition/removal found by Simon. - Fix hex printing output found by Simon. --- NEWS | 3 ++ datapath/actions.c | 38 +++++++++++++---- datapath/datapath.c | 10 ++++- datapath/datapath.h | 5 ++- datapath/flow.c | 64 ++++++++++++++++++++++++---- datapath/flow.h | 12 ++++-- datapath/tunnel.c | 101 ++++++++++++++++++++++++++++---------------- datapath/tunnel.h | 15 ++++++- datapath/vport-capwap.c | 12 +++--- datapath/vport-gre.c | 15 ++++--- datapath/vport.c | 2 +- include/linux/openvswitch.h | 18 +++++++- lib/dpif-netdev.c | 1 + lib/odp-util.c | 15 ++++++- lib/odp-util.h | 3 +- 15 files changed, 235 insertions(+), 79 deletions(-) diff --git a/NEWS b/NEWS index 29fd9f3..ae831e3 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,8 @@ post-v1.8.0 ------------------------ + - The tunneling code no longer assumes input and output keys are symmetric. + If they are not, PMTUD needs to be disabled for tunneling to work. Note + this only applies to flow-based keys. - FreeBSD is now a supported platform, thanks to code contributions from Gaetano Catalli, Ed Maste, and Giuseppe Lettieri. - ovs-bugtool: New --ovs option to report only OVS related information. diff --git a/datapath/actions.c b/datapath/actions.c index ec9b595..fa8c10d 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -37,7 +37,8 @@ #include "vport.h" static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, - const struct nlattr *attr, int len, bool keep_skb); + const struct nlattr *attr, int len, + struct ovs_key_ipv4_tunnel *tun_key, bool keep_skb); static int make_writable(struct sk_buff *skb, int write_len) { @@ -329,11 +330,14 @@ static int sample(struct datapath *dp, struct sk_buff *skb, } return do_execute_actions(dp, skb, nla_data(acts_list), - nla_len(acts_list), true); + nla_len(acts_list), + OVS_CB(skb)->tun_key, + true); } static int execute_set_action(struct sk_buff *skb, - const struct nlattr *nested_attr) + const struct nlattr *nested_attr, + struct ovs_key_ipv4_tunnel *tun_key) { int err = 0; @@ -343,7 +347,21 @@ static int execute_set_action(struct sk_buff *skb, break; case OVS_KEY_ATTR_TUN_ID: - OVS_CB(skb)->tun_id = nla_get_be64(nested_attr); + if (OVS_CB(skb)->tun_key == NULL) { + /* If tun_key is NULL for this skb, assign it to + * a value the caller passed in for action processing + * and output. This can disappear once we drop support + * for setting tun_id outside of tun_key. + */ + memset(tun_key, 0, sizeof(struct ovs_key_ipv4_tunnel)); + OVS_CB(skb)->tun_key = tun_key; + } + + OVS_CB(skb)->tun_key->tun_id = nla_get_be64(nested_attr); + break; + + case OVS_KEY_ATTR_IPV4_TUNNEL: + OVS_CB(skb)->tun_key = nla_data(nested_attr); break; case OVS_KEY_ATTR_ETHERNET: @@ -368,7 +386,8 @@ static int execute_set_action(struct sk_buff *skb, /* Execute a list of actions against 'skb'. */ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, - const struct nlattr *attr, int len, bool keep_skb) + const struct nlattr *attr, int len, + struct ovs_key_ipv4_tunnel *tun_key, bool keep_skb) { /* Every output action needs a separate clone of 'skb', but the common * case is just a single output action, so that doing a clone and @@ -407,7 +426,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, break; case OVS_ACTION_ATTR_SET: - err = execute_set_action(skb, nla_data(a)); + err = execute_set_action(skb, nla_data(a), tun_key); break; case OVS_ACTION_ATTR_SAMPLE: @@ -458,6 +477,9 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb) struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts); struct loop_counter *loop; int error; + struct ovs_key_ipv4_tunnel tun_key; + + memset(&tun_key, 0, sizeof(tun_key)); /* Check whether we've looped too much. */ loop = &__get_cpu_var(loop_counters); @@ -469,9 +491,9 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb) goto out_loop; } - OVS_CB(skb)->tun_id = 0; + OVS_CB(skb)->tun_key = NULL; error = do_execute_actions(dp, skb, acts->actions, - acts->actions_len, false); + acts->actions_len, &tun_key, false); /* Check whether sub-actions looped too much. */ if (unlikely(loop->looping)) diff --git a/datapath/datapath.c b/datapath/datapath.c index a6915fb..d8a198e 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -587,12 +587,20 @@ static int validate_set(const struct nlattr *a, switch (key_type) { const struct ovs_key_ipv4 *ipv4_key; + const struct ovs_key_ipv4_tunnel *tun_key; case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_TUN_ID: case OVS_KEY_ATTR_ETHERNET: break; + case OVS_KEY_ATTR_IPV4_TUNNEL: + tun_key = nla_data(ovs_key); + if (!tun_key->ipv4_dst) { + return -EINVAL; + } + break; + case OVS_KEY_ATTR_IPV4: if (flow_key->eth.type != htons(ETH_P_IP)) return -EINVAL; @@ -785,7 +793,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority, &flow->key.phy.in_port, - &flow->key.phy.tun_id, + &flow->key.tun.tun_key, a[OVS_PACKET_ATTR_KEY]); if (err) goto err_flow_put; diff --git a/datapath/datapath.h b/datapath/datapath.h index affbf0e..c5df12d 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -96,7 +96,8 @@ struct datapath { /** * struct ovs_skb_cb - OVS data in skb CB * @flow: The flow associated with this packet. May be %NULL if no flow. - * @tun_id: ID of the tunnel that encapsulated this packet. It is 0 if the + * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the + * packet is not being tunneled. * @ip_summed: Consistently stores L4 checksumming status across different * kernel versions. * @csum_start: Stores the offset from which to start checksumming independent @@ -107,7 +108,7 @@ struct datapath { */ struct ovs_skb_cb { struct sw_flow *flow; - __be64 tun_id; + struct ovs_key_ipv4_tunnel *tun_key; #ifdef NEED_CSUM_NORMALIZE enum csum_type ip_summed; u16 csum_start; diff --git a/datapath/flow.c b/datapath/flow.c index d07337c..376f4be 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -629,7 +629,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, memset(key, 0, sizeof(*key)); key->phy.priority = skb->priority; - key->phy.tun_id = OVS_CB(skb)->tun_id; + if (OVS_CB(skb)->tun_key) + key->tun.tun_key = *OVS_CB(skb)->tun_key; key->phy.in_port = in_port; skb_reset_mac_header(skb); @@ -847,6 +848,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { /* Not upstream. */ [OVS_KEY_ATTR_TUN_ID] = sizeof(__be64), + [OVS_KEY_ATTR_IPV4_TUNNEL] = sizeof(struct ovs_key_ipv4_tunnel), }; static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, @@ -1022,9 +1024,30 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, swkey->phy.in_port = DP_MAX_PORTS; } - if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID)) { - swkey->phy.tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]); + /* OVS_KEY_ATTR_TUN_ID and OVS_KEY_ATTR_IPV4_TUNEL must both arrive + * together. + */ + if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID) && + attrs & (1ULL << OVS_KEY_ATTR_IPV4_TUNNEL)) { + struct ovs_key_ipv4_tunnel *tun_key; + __be64 tun_id = 0; + tun_key = nla_data(a[OVS_KEY_ATTR_IPV4_TUNNEL]); + tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]); + + if (tun_id != tun_key->tun_id) + return -EINVAL; + + swkey->tun.tun_key = *tun_key; attrs &= ~(1ULL << OVS_KEY_ATTR_TUN_ID); + attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4_TUNNEL); + } else if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID)) { + swkey->tun.tun_key.tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]); + attrs &= ~(1ULL << OVS_KEY_ATTR_TUN_ID); + } else if (attrs & (1ULL << OVS_KEY_ATTR_IPV4_TUNNEL)) { + struct ovs_key_ipv4_tunnel *tun_key; + tun_key = nla_data(a[OVS_KEY_ATTR_IPV4_TUNNEL]); + swkey->tun.tun_key = *tun_key; + attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4_TUNNEL); } /* Data attributes. */ @@ -1162,14 +1185,16 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, * get the metadata, that is, the parts of the flow key that cannot be * extracted from the packet itself. */ -int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id, +int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, + struct ovs_key_ipv4_tunnel *tun_key, const struct nlattr *attr) { const struct nlattr *nla; int rem; + __be64 tun_id = 0; *in_port = DP_MAX_PORTS; - *tun_id = 0; + memset(tun_key, 0, sizeof(*tun_key)); *priority = 0; nla_for_each_nested(nla, attr, rem) { @@ -1185,7 +1210,20 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id, break; case OVS_KEY_ATTR_TUN_ID: - *tun_id = nla_get_be64(nla); + tun_id = nla_get_be64(nla); + if ((tun_key->tun_flags & FLOW_TNL_F_KEY) && + tun_key->tun_id != tun_id) + return -EINVAL; + break; + + case OVS_KEY_ATTR_IPV4_TUNNEL: + if (tun_key->tun_flags & FLOW_TNL_F_KEY) + tun_id = tun_key->tun_id; + memcpy(tun_key, nla_data(nla), + sizeof(*tun_key)); + if ((tun_key->tun_flags & FLOW_TNL_F_KEY) && + tun_id != tun_key->tun_id) + return -EINVAL; break; case OVS_KEY_ATTR_IN_PORT: @@ -1210,10 +1248,20 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) goto nla_put_failure; - if (swkey->phy.tun_id != cpu_to_be64(0) && - nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, swkey->phy.tun_id)) + if ((swkey->tun.tun_key.tun_flags & FLOW_TNL_F_KEY) && + nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, swkey->tun.tun_key.tun_id)) goto nla_put_failure; + if (swkey->tun.tun_key.ipv4_dst) { + struct ovs_key_ipv4_tunnel *tun_key; + nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4_TUNNEL, + sizeof(*tun_key)); + if (!nla) + goto nla_put_failure; + tun_key = nla_data(nla); + *tun_key = swkey->tun.tun_key; + } + if (swkey->phy.in_port != DP_MAX_PORTS && nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) goto nla_put_failure; diff --git a/datapath/flow.h b/datapath/flow.h index 02c563a..4430b32 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -42,10 +42,12 @@ struct sw_flow_actions { struct sw_flow_key { struct { - __be64 tun_id; /* Encapsulating tunnel ID. */ u32 priority; /* Packet QoS priority. */ u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ } phy; + struct { + struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ + } tun; struct { u8 src[ETH_ALEN]; /* Ethernet source address. */ u8 dst[ETH_ALEN]; /* Ethernet destination address. */ @@ -150,6 +152,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); * ------ --- ------ ----- * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 * OVS_KEY_ATTR_TUN_ID 8 -- 4 12 + * OVS_KEY_ATTR_IPV4_TUNNEL 24 -- 4 28 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype) @@ -160,14 +163,15 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); * OVS_KEY_ATTR_ICMPV6 2 2 4 8 * OVS_KEY_ATTR_ND 28 -- 4 32 * ------------------------------------------------- - * total 156 + * total 184 */ -#define FLOW_BUFSIZE 156 +#define FLOW_BUFSIZE 184 int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, const struct nlattr *); -int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id, +int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, + struct ovs_key_ipv4_tunnel *tun_key, const struct nlattr *); #define MAX_ACTIONS_BUFSIZE (16 * 1024) diff --git a/datapath/tunnel.c b/datapath/tunnel.c index d651c11..739f098 100644 --- a/datapath/tunnel.c +++ b/datapath/tunnel.c @@ -367,9 +367,9 @@ struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr, return NULL; } -static void ecn_decapsulate(struct sk_buff *skb, u8 tos) +static void ecn_decapsulate(struct sk_buff *skb) { - if (unlikely(INET_ECN_is_ce(tos))) { + if (unlikely(INET_ECN_is_ce(OVS_CB(skb)->tun_key->ipv4_tos))) { __be16 protocol = skb->protocol; skb_set_network_header(skb, ETH_HLEN); @@ -416,7 +416,7 @@ static void ecn_decapsulate(struct sk_buff *skb, u8 tos) * - skb->csum does not include the inner Ethernet header. * - The layer pointers are undefined. */ -void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos) +void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb) { struct ethhdr *eh; @@ -433,7 +433,7 @@ void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos) skb_clear_rxhash(skb); secpath_reset(skb); - ecn_decapsulate(skb, tos); + ecn_decapsulate(skb); vlan_set_tci(skb, 0); if (unlikely(compute_ip_summed(skb, false))) { @@ -613,7 +613,7 @@ static void ipv6_build_icmp(struct sk_buff *skb, struct sk_buff *nskb, bool ovs_tnl_frag_needed(struct vport *vport, const struct tnl_mutable_config *mutable, - struct sk_buff *skb, unsigned int mtu, __be64 flow_key) + struct sk_buff *skb, unsigned int mtu) { unsigned int eth_hdr_len = ETH_HLEN; unsigned int total_length = 0, header_length = 0, payload_length; @@ -697,17 +697,6 @@ bool ovs_tnl_frag_needed(struct vport *vport, ipv6_build_icmp(skb, nskb, mtu, payload_length); #endif - /* - * Assume that flow based keys are symmetric with respect to input - * and output and use the key that we were going to put on the - * outgoing packet for the fake received packet. If the keys are - * not symmetric then PMTUD needs to be disabled since we won't have - * any way of synthesizing packets. - */ - if ((mutable->flags & (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) == - (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) - OVS_CB(nskb)->tun_id = flow_key; - if (unlikely(compute_ip_summed(nskb, false))) { kfree_skb(nskb); return false; @@ -723,12 +712,20 @@ static bool check_mtu(struct sk_buff *skb, const struct tnl_mutable_config *mutable, const struct rtable *rt, __be16 *frag_offp) { - bool df_inherit = mutable->flags & TNL_F_DF_INHERIT; + bool df_inherit; bool pmtud = mutable->flags & TNL_F_PMTUD; - __be16 frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : 0; + __be16 frag_off; int mtu = 0; unsigned int packet_length = skb->len - ETH_HLEN; + if (OVS_CB(skb)->tun_key->ipv4_dst != 0) { + df_inherit = false; + frag_off = OVS_CB(skb)->tun_key->tun_flags & FLOW_TNL_F_DONT_FRAGMENT; + } else { + df_inherit = mutable->flags & TNL_F_DF_INHERIT; + frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : 0; + } + /* Allow for one level of tagging in the packet length. */ if (!vlan_tx_tag_present(skb) && eth_hdr(skb)->h_proto == htons(ETH_P_8021Q)) @@ -760,8 +757,7 @@ static bool check_mtu(struct sk_buff *skb, mtu = max(mtu, IP_MIN_MTU); if (packet_length > mtu && - ovs_tnl_frag_needed(vport, mutable, skb, mtu, - OVS_CB(skb)->tun_id)) + ovs_tnl_frag_needed(vport, mutable, skb, mtu)) return false; } } @@ -777,8 +773,7 @@ static bool check_mtu(struct sk_buff *skb, mtu = max(mtu, IPV6_MIN_MTU); if (packet_length > mtu && - ovs_tnl_frag_needed(vport, mutable, skb, mtu, - OVS_CB(skb)->tun_id)) + ovs_tnl_frag_needed(vport, mutable, skb, mtu)) return false; } } @@ -1000,15 +995,16 @@ unlock: } static struct rtable *__find_route(const struct tnl_mutable_config *mutable, - u8 ipproto, u8 tos) + __be32 saddr, __be32 daddr, u8 ipproto, + u8 tos) { /* Tunnel configuration keeps DSCP part of TOS bits, But Linux * router expect RT_TOS bits only. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) struct flowi fl = { .nl_u = { .ip4_u = { - .daddr = mutable->key.daddr, - .saddr = mutable->key.saddr, + .daddr = daddr, + .saddr = saddr, .tos = RT_TOS(tos) } }, .proto = ipproto }; struct rtable *rt; @@ -1018,8 +1014,8 @@ static struct rtable *__find_route(const struct tnl_mutable_config *mutable, return rt; #else - struct flowi4 fl = { .daddr = mutable->key.daddr, - .saddr = mutable->key.saddr, + struct flowi4 fl = { .daddr = daddr, + .saddr = saddr, .flowi4_tos = RT_TOS(tos), .flowi4_proto = ipproto }; @@ -1029,7 +1025,8 @@ static struct rtable *__find_route(const struct tnl_mutable_config *mutable, static struct rtable *find_route(struct vport *vport, const struct tnl_mutable_config *mutable, - u8 tos, struct tnl_cache **cache) + __be32 saddr, __be32 daddr, u8 tos, + struct tnl_cache **cache) { struct tnl_vport *tnl_vport = tnl_vport_priv(vport); struct tnl_cache *cur_cache = rcu_dereference(tnl_vport->cache); @@ -1037,18 +1034,21 @@ static struct rtable *find_route(struct vport *vport, *cache = NULL; tos = RT_TOS(tos); - if (likely(tos == RT_TOS(mutable->tos) && - check_cache_valid(cur_cache, mutable))) { + if (daddr == mutable->key.daddr && saddr == mutable->key.saddr && + tos == RT_TOS(mutable->tos) && + check_cache_valid(cur_cache, mutable)) { *cache = cur_cache; return cur_cache->rt; } else { struct rtable *rt; - rt = __find_route(mutable, tnl_vport->tnl_ops->ipproto, tos); + rt = __find_route(mutable, saddr, daddr, + tnl_vport->tnl_ops->ipproto, tos); if (IS_ERR(rt)) return NULL; - if (likely(tos == RT_TOS(mutable->tos))) + if (daddr == mutable->key.daddr && saddr == mutable->key.saddr && + tos == RT_TOS(mutable->tos)) *cache = build_cache(vport, mutable, rt); return rt; @@ -1178,8 +1178,11 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb) struct rtable *rt; struct dst_entry *unattached_dst = NULL; struct tnl_cache *cache; + struct ovs_key_ipv4_tunnel tun_key; int sent_len = 0; __be16 frag_off = 0; + __be32 daddr; + __be32 saddr; u8 ttl; u8 inner_tos; u8 tos; @@ -1207,6 +1210,13 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb) } #endif + /* If OVS_CB(skb)->tun_key is NULL, point it at the local tun_key here, + * and zero it out. + */ + if (OVS_CB(skb)->tun_key == NULL) { + memset(&tun_key, 0, sizeof(tun_key)); + OVS_CB(skb)->tun_key = &tun_key; + } /* ToS */ if (skb->protocol == htons(ETH_P_IP)) inner_tos = ip_hdr(skb)->tos; @@ -1219,11 +1229,23 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb) if (mutable->flags & TNL_F_TOS_INHERIT) tos = inner_tos; + else if (OVS_CB(skb)->tun_key->ipv4_dst != 0) + tos = OVS_CB(skb)->tun_key->ipv4_tos; else tos = mutable->tos; + if (OVS_CB(skb)->tun_key->ipv4_dst != 0) + daddr = OVS_CB(skb)->tun_key->ipv4_dst; + else + daddr = mutable->key.daddr; + + if (OVS_CB(skb)->tun_key->ipv4_dst != 0) + saddr = OVS_CB(skb)->tun_key->ipv4_src; + else + saddr = mutable->key.saddr; + /* Route lookup */ - rt = find_route(vport, mutable, tos, &cache); + rt = find_route(vport, mutable, saddr, daddr, tos, &cache); if (unlikely(!rt)) goto error_free; if (unlikely(!cache)) @@ -1260,9 +1282,13 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb) } /* TTL */ - ttl = mutable->ttl; - if (!ttl) - ttl = ip4_dst_hoplimit(&rt_dst(rt)); + if (OVS_CB(skb)->tun_key->ipv4_dst != 0) + ttl = OVS_CB(skb)->tun_key->ipv4_ttl; + else { + ttl = mutable->ttl; + if (!ttl) + ttl = ip4_dst_hoplimit(&rt_dst(rt)); + } if (mutable->flags & TNL_F_TTL_INHERIT) { if (skb->protocol == htons(ETH_P_IP)) @@ -1442,7 +1468,8 @@ static int tnl_set_config(struct net *net, struct nlattr *options, struct net_device *dev; struct rtable *rt; - rt = __find_route(mutable, tnl_ops->ipproto, mutable->tos); + rt = __find_route(mutable, mutable->key.saddr, mutable->key.daddr, + tnl_ops->ipproto, mutable->tos); if (IS_ERR(rt)) return -EADDRNOTAVAIL; dev = rt_dst(rt).dev; diff --git a/datapath/tunnel.h b/datapath/tunnel.h index 1924017..b7858d3 100644 --- a/datapath/tunnel.h +++ b/datapath/tunnel.h @@ -269,14 +269,14 @@ int ovs_tnl_set_addr(struct vport *vport, const unsigned char *addr); const char *ovs_tnl_get_name(const struct vport *vport); const unsigned char *ovs_tnl_get_addr(const struct vport *vport); int ovs_tnl_send(struct vport *vport, struct sk_buff *skb); -void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos); +void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb); struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr, __be64 key, int tunnel_type, const struct tnl_mutable_config **mutable); bool ovs_tnl_frag_needed(struct vport *vport, const struct tnl_mutable_config *mutable, - struct sk_buff *skb, unsigned int mtu, __be64 flow_key); + struct sk_buff *skb, unsigned int mtu); void ovs_tnl_free_linked_skbs(struct sk_buff *skb); int ovs_tnl_init(void); @@ -286,4 +286,15 @@ static inline struct tnl_vport *tnl_vport_priv(const struct vport *vport) return vport_priv(vport); } +static inline void tnl_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, + const struct iphdr *iph, __be64 tun_id) +{ + tun_key->tun_id = tun_id; + tun_key->ipv4_src = iph->saddr; + tun_key->ipv4_dst = iph->daddr; + tun_key->ipv4_tos = iph->tos; + tun_key->ipv4_ttl = iph->ttl; + tun_key->tun_flags = 0; +} + #endif /* tunnel.h */ diff --git a/datapath/vport-capwap.c b/datapath/vport-capwap.c index 05a099d..fbf50d9 100644 --- a/datapath/vport-capwap.c +++ b/datapath/vport-capwap.c @@ -220,7 +220,7 @@ static struct sk_buff *capwap_update_header(const struct vport *vport, struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1); struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key *)(wsi + 1); - opt->key = OVS_CB(skb)->tun_id; + opt->key = OVS_CB(skb)->tun_key->tun_id; } udph->len = htons(skb->len - skb_transport_offset(skb)); @@ -316,6 +316,7 @@ static int capwap_rcv(struct sock *sk, struct sk_buff *skb) struct vport *vport; const struct tnl_mutable_config *mutable; struct iphdr *iph; + struct ovs_key_ipv4_tunnel tun_key; __be64 key = 0; if (unlikely(!pskb_may_pull(skb, CAPWAP_MIN_HLEN + ETH_HLEN))) @@ -333,12 +334,11 @@ static int capwap_rcv(struct sock *sk, struct sk_buff *skb) goto error; } - if (mutable->flags & TNL_F_IN_KEY_MATCH) - OVS_CB(skb)->tun_id = key; - else - OVS_CB(skb)->tun_id = 0; + tnl_tun_key_init(&tun_key, iph, + mutable->flags & TNL_F_IN_KEY_MATCH ? key : 0); + OVS_CB(skb)->tun_key = &tun_key; - ovs_tnl_rcv(vport, skb, iph->tos); + ovs_tnl_rcv(vport, skb); goto out; error: diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c index ab89c5b..8076b1d 100644 --- a/datapath/vport-gre.c +++ b/datapath/vport-gre.c @@ -103,7 +103,7 @@ static struct sk_buff *gre_update_header(const struct vport *vport, /* Work backwards over the options so the checksum is last. */ if (mutable->flags & TNL_F_OUT_KEY_ACTION) - *options = be64_get_low32(OVS_CB(skb)->tun_id); + *options = be64_get_low32(OVS_CB(skb)->tun_key->tun_id); if (mutable->out_key || mutable->flags & TNL_F_OUT_KEY_ACTION) options--; @@ -285,7 +285,7 @@ static void gre_err(struct sk_buff *skb, u32 info) #endif __skb_pull(skb, tunnel_hdr_len); - ovs_tnl_frag_needed(vport, mutable, skb, mtu, key); + ovs_tnl_frag_needed(vport, mutable, skb, mtu); __skb_push(skb, tunnel_hdr_len); out: @@ -327,6 +327,7 @@ static int gre_rcv(struct sk_buff *skb) const struct tnl_mutable_config *mutable; int hdr_len; struct iphdr *iph; + struct ovs_key_ipv4_tunnel tun_key; __be16 flags; __be64 key; @@ -351,15 +352,15 @@ static int gre_rcv(struct sk_buff *skb) goto error; } - if (mutable->flags & TNL_F_IN_KEY_MATCH) - OVS_CB(skb)->tun_id = key; - else - OVS_CB(skb)->tun_id = 0; + + tnl_tun_key_init(&tun_key, iph, + mutable->flags & TNL_F_IN_KEY_MATCH ? key : 0); + OVS_CB(skb)->tun_key = &tun_key; __skb_pull(skb, hdr_len); skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN); - ovs_tnl_rcv(vport, skb, iph->tos); + ovs_tnl_rcv(vport, skb); return 0; error: diff --git a/datapath/vport.c b/datapath/vport.c index dc7adfa..e8279fb 100644 --- a/datapath/vport.c +++ b/datapath/vport.c @@ -462,7 +462,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) OVS_CB(skb)->flow = NULL; if (!(vport->ops->flags & VPORT_F_TUN_ID)) - OVS_CB(skb)->tun_id = 0; + OVS_CB(skb)->tun_key = NULL; ovs_dp_process_received_packet(vport, skb); } diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index f5c9cca..8334cc1 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -278,7 +278,8 @@ enum ovs_key_attr { OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */ OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ - OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */ + OVS_KEY_ATTR_IPV4_TUNNEL = 62, /* struct ovs_key_ipv4_tunnel */ + OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */ __OVS_KEY_ATTR_MAX }; @@ -360,6 +361,21 @@ struct ovs_key_nd { __u8 nd_tll[6]; }; +/* Values for ovs_key_ipv4_tunnel->tun_flags */ +#define FLOW_TNL_F_DONT_FRAGMENT (1 << 0) +#define FLOW_TNL_F_CSUM (1 << 1) +#define FLOW_TNL_F_KEY (1 << 2) + +struct ovs_key_ipv4_tunnel { + __be64 tun_id; + __u32 tun_flags; + __be32 ipv4_src; + __be32 ipv4_dst; + __u8 ipv4_tos; + __u8 ipv4_ttl; + __u8 pad[2]; +}; + /** * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index c9e3210..797cb06 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1179,6 +1179,7 @@ execute_set_action(struct ofpbuf *packet, const struct nlattr *a) case OVS_KEY_ATTR_TUN_ID: case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_IPV6: + case OVS_KEY_ATTR_IPV4_TUNNEL: /* not implemented */ break; diff --git a/lib/odp-util.c b/lib/odp-util.c index 257d7a7..9ed17ed 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -93,6 +93,8 @@ ovs_key_attr_to_string(enum ovs_key_attr attr) case OVS_KEY_ATTR_UNSPEC: return "unspec"; case OVS_KEY_ATTR_ENCAP: return "encap"; case OVS_KEY_ATTR_PRIORITY: return "priority"; + case OVS_KEY_ATTR_TUN_ID: return "tun_id"; + case OVS_KEY_ATTR_IPV4_TUNNEL: return "ipv4_tunnel"; case OVS_KEY_ATTR_IN_PORT: return "in_port"; case OVS_KEY_ATTR_ETHERNET: return "eth"; case OVS_KEY_ATTR_VLAN: return "vlan"; @@ -105,7 +107,6 @@ ovs_key_attr_to_string(enum ovs_key_attr attr) case OVS_KEY_ATTR_ICMPV6: return "icmpv6"; case OVS_KEY_ATTR_ARP: return "arp"; case OVS_KEY_ATTR_ND: return "nd"; - case OVS_KEY_ATTR_TUN_ID: return "tun_id"; case __OVS_KEY_ATTR_MAX: default: @@ -602,6 +603,7 @@ odp_flow_key_attr_len(uint16_t type) case OVS_KEY_ATTR_ENCAP: return -2; case OVS_KEY_ATTR_PRIORITY: return 4; case OVS_KEY_ATTR_TUN_ID: return 8; + case OVS_KEY_ATTR_IPV4_TUNNEL: return sizeof(struct ovs_key_ipv4_tunnel); case OVS_KEY_ATTR_IN_PORT: return 4; case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet); case OVS_KEY_ATTR_VLAN: return sizeof(ovs_be16); @@ -668,6 +670,7 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds) const struct ovs_key_icmpv6 *icmpv6_key; const struct ovs_key_arp *arp_key; const struct ovs_key_nd *nd_key; + const struct ovs_key_ipv4_tunnel *ipv4_tun_key; enum ovs_key_attr attr = nl_attr_type(a); int expected_len; @@ -698,6 +701,16 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds) ds_put_format(ds, "(%#"PRIx64")", ntohll(nl_attr_get_be64(a))); break; + case OVS_KEY_ATTR_IPV4_TUNNEL: + ipv4_tun_key = nl_attr_get(a); + ds_put_format(ds, "(tun_id=0x%"PRIx64",flags=0x%"PRIx32 + ",src="IP_FMT",dst="IP_FMT",tos=0x%"PRIx8",ttl=%"PRIu8")", + ntohll(ipv4_tun_key->tun_id), ipv4_tun_key->tun_flags, + IP_ARGS(&ipv4_tun_key->ipv4_src), + IP_ARGS(&ipv4_tun_key->ipv4_dst), + ipv4_tun_key->ipv4_tos, ipv4_tun_key->ipv4_ttl); + break; + case OVS_KEY_ATTR_IN_PORT: ds_put_format(ds, "(%"PRIu32")", nl_attr_get_u32(a)); break; diff --git a/lib/odp-util.h b/lib/odp-util.h index 16f2b15..57073ba 100644 --- a/lib/odp-util.h +++ b/lib/odp-util.h @@ -80,6 +80,7 @@ int odp_actions_from_string(const char *, const struct simap *port_names, * ------ --- ------ ----- * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 * OVS_KEY_ATTR_TUN_ID 8 -- 4 12 + * OVS_KEY_ATTR_IPV4_TUNNEL 24 -- 4 28 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype) @@ -90,7 +91,7 @@ int odp_actions_from_string(const char *, const struct simap *port_names, * OVS_KEY_ATTR_ICMPV6 2 2 4 8 * OVS_KEY_ATTR_ND 28 -- 4 32 * ------------------------------------------------- - * total 156 + * total 184 * * We include some slack space in case the calculation isn't quite right or we * add another field and forget to adjust this value. -- 1.7.11.4 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev