Patch looks good, I have few comments inlined.
On Tue, Oct 9, 2012 at 11:49 AM, Kyle Mestery <kmest...@cisco.com> wrote: > This is a first pass at providing a tun_key which can be > used as the basis for flow-based tunnelling. The > tun_key includes and replaces the tun_id in both struct > ovs_skb_cb and struct sw_tun_key. > > This patch allows all existing tun_id behaviour to still work. Existing > users of tun_id are redirected to tun_key->tun_id to retain compatibility. > However, when the userspace code is updated to make use of the new tun_key, > the old behaviour will be deprecated and removed. > > NOTE: With these changes, the tunneling code no longer assumes input and > output keys are symmetric. If they are not, PMTUD needs to be disabled > for tunneling to work. > > Signed-off-by: Kyle Mestery <kmest...@cisco.com> > Cc: Simon Horman <ho...@verge.net.au> > Cc: Jesse Gross <je...@nicira.com> > --- > V6: > - Fix more comments addressed from Jesse. > V5: > - Address another round of comments from Jesse. > V4: > - Address 2 comments from Jesse: > - When processing actions, if OVS_CB(skb)->tun_key is NULL, point it at one > on the stack temporarily. This goes away when we remove the ability to set > tun_id outside the scope of tun_key. > - Move tun_key to the end of struct sw_flow_key. > V3: > - Fix issues found during review by Jesse. > - Add a NEWS entry around tunnel code no longer assuming symmetric input and > output tunnel keys. > > V2: > - Fix blank line addition/removal found by Simon. > - Fix hex printing output found by Simon. > --- > NEWS | 3 ++ > datapath/actions.c | 38 +++++++++++++---- > datapath/datapath.c | 10 ++++- > datapath/datapath.h | 5 ++- > datapath/flow.c | 64 ++++++++++++++++++++++++---- > datapath/flow.h | 12 ++++-- > datapath/tunnel.c | 101 > ++++++++++++++++++++++++++++---------------- > datapath/tunnel.h | 15 ++++++- > datapath/vport-capwap.c | 12 +++--- > datapath/vport-gre.c | 15 ++++--- > datapath/vport.c | 2 +- > include/linux/openvswitch.h | 18 +++++++- > lib/dpif-netdev.c | 1 + > lib/odp-util.c | 15 ++++++- > lib/odp-util.h | 3 +- > 15 files changed, 235 insertions(+), 79 deletions(-) > > diff --git a/NEWS b/NEWS > index 29fd9f3..ae831e3 100644 > --- a/NEWS > +++ b/NEWS > @@ -1,5 +1,8 @@ > post-v1.8.0 > ------------------------ > + - The tunneling code no longer assumes input and output keys are > symmetric. > + If they are not, PMTUD needs to be disabled for tunneling to work. Note > + this only applies to flow-based keys. > - FreeBSD is now a supported platform, thanks to code contributions from > Gaetano Catalli, Ed Maste, and Giuseppe Lettieri. > - ovs-bugtool: New --ovs option to report only OVS related information. > diff --git a/datapath/actions.c b/datapath/actions.c > index ec9b595..fa8c10d 100644 > --- a/datapath/actions.c > +++ b/datapath/actions.c > @@ -37,7 +37,8 @@ > #include "vport.h" > > static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, > - const struct nlattr *attr, int len, bool keep_skb); > + const struct nlattr *attr, int len, > + struct ovs_key_ipv4_tunnel *tun_key, bool keep_skb); > > static int make_writable(struct sk_buff *skb, int write_len) > { > @@ -329,11 +330,14 @@ static int sample(struct datapath *dp, struct sk_buff > *skb, > } > > return do_execute_actions(dp, skb, nla_data(acts_list), > - nla_len(acts_list), true); > + nla_len(acts_list), > + OVS_CB(skb)->tun_key, > + true); > } > > static int execute_set_action(struct sk_buff *skb, > - const struct nlattr *nested_attr) > + const struct nlattr *nested_attr, > + struct ovs_key_ipv4_tunnel *tun_key) > { > int err = 0; > > @@ -343,7 +347,21 @@ static int execute_set_action(struct sk_buff *skb, > break; > > case OVS_KEY_ATTR_TUN_ID: > - OVS_CB(skb)->tun_id = nla_get_be64(nested_attr); > + if (OVS_CB(skb)->tun_key == NULL) { > + /* If tun_key is NULL for this skb, assign it to > + * a value the caller passed in for action processing > + * and output. This can disappear once we drop support > + * for setting tun_id outside of tun_key. > + */ > + memset(tun_key, 0, sizeof(struct > ovs_key_ipv4_tunnel)); > + OVS_CB(skb)->tun_key = tun_key; > + } > + > + OVS_CB(skb)->tun_key->tun_id = nla_get_be64(nested_attr); > + break; > + > + case OVS_KEY_ATTR_IPV4_TUNNEL: > + OVS_CB(skb)->tun_key = nla_data(nested_attr); > break; > > case OVS_KEY_ATTR_ETHERNET: > @@ -368,7 +386,8 @@ static int execute_set_action(struct sk_buff *skb, > > /* Execute a list of actions against 'skb'. */ > static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, > - const struct nlattr *attr, int len, bool keep_skb) > + const struct nlattr *attr, int len, > + struct ovs_key_ipv4_tunnel *tun_key, bool keep_skb) > { > /* Every output action needs a separate clone of 'skb', but the common > * case is just a single output action, so that doing a clone and > @@ -407,7 +426,7 @@ static int do_execute_actions(struct datapath *dp, struct > sk_buff *skb, > break; > > case OVS_ACTION_ATTR_SET: > - err = execute_set_action(skb, nla_data(a)); > + err = execute_set_action(skb, nla_data(a), tun_key); > break; > > case OVS_ACTION_ATTR_SAMPLE: > @@ -458,6 +477,9 @@ int ovs_execute_actions(struct datapath *dp, struct > sk_buff *skb) > struct sw_flow_actions *acts = > rcu_dereference(OVS_CB(skb)->flow->sf_acts); > struct loop_counter *loop; > int error; > + struct ovs_key_ipv4_tunnel tun_key; > + > + memset(&tun_key, 0, sizeof(tun_key)); > tun key will zeroed while executing tunnel set action, Do we still need to zero it here? > /* Check whether we've looped too much. */ > loop = &__get_cpu_var(loop_counters); > @@ -469,9 +491,9 @@ int ovs_execute_actions(struct datapath *dp, struct > sk_buff *skb) > goto out_loop; > } > > - OVS_CB(skb)->tun_id = 0; > + OVS_CB(skb)->tun_key = NULL; > error = do_execute_actions(dp, skb, acts->actions, > - acts->actions_len, false); > + acts->actions_len, &tun_key, false); > > /* Check whether sub-actions looped too much. */ > if (unlikely(loop->looping)) > diff --git a/datapath/datapath.c b/datapath/datapath.c > index a6915fb..d8a198e 100644 > --- a/datapath/datapath.c > +++ b/datapath/datapath.c > @@ -587,12 +587,20 @@ static int validate_set(const struct nlattr *a, > > switch (key_type) { > const struct ovs_key_ipv4 *ipv4_key; > + const struct ovs_key_ipv4_tunnel *tun_key; > > case OVS_KEY_ATTR_PRIORITY: > case OVS_KEY_ATTR_TUN_ID: > case OVS_KEY_ATTR_ETHERNET: > break; > > + case OVS_KEY_ATTR_IPV4_TUNNEL: > + tun_key = nla_data(ovs_key); > + if (!tun_key->ipv4_dst) { > + return -EINVAL; > + } > + break; > + > case OVS_KEY_ATTR_IPV4: > if (flow_key->eth.type != htons(ETH_P_IP)) > return -EINVAL; > @@ -785,7 +793,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, > struct genl_info *info) > > err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority, > &flow->key.phy.in_port, > - &flow->key.phy.tun_id, > + &flow->key.tun.tun_key, > a[OVS_PACKET_ATTR_KEY]); > if (err) > goto err_flow_put; > diff --git a/datapath/datapath.h b/datapath/datapath.h > index affbf0e..c5df12d 100644 > --- a/datapath/datapath.h > +++ b/datapath/datapath.h > @@ -96,7 +96,8 @@ struct datapath { > /** > * struct ovs_skb_cb - OVS data in skb CB > * @flow: The flow associated with this packet. May be %NULL if no flow. > - * @tun_id: ID of the tunnel that encapsulated this packet. It is 0 if the > + * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the > + * packet is not being tunneled. > * @ip_summed: Consistently stores L4 checksumming status across different > * kernel versions. > * @csum_start: Stores the offset from which to start checksumming > independent > @@ -107,7 +108,7 @@ struct datapath { > */ > struct ovs_skb_cb { > struct sw_flow *flow; > - __be64 tun_id; > + struct ovs_key_ipv4_tunnel *tun_key; > #ifdef NEED_CSUM_NORMALIZE > enum csum_type ip_summed; > u16 csum_start; > diff --git a/datapath/flow.c b/datapath/flow.c > index d07337c..376f4be 100644 > --- a/datapath/flow.c > +++ b/datapath/flow.c > @@ -629,7 +629,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, > struct sw_flow_key *key, > memset(key, 0, sizeof(*key)); > > key->phy.priority = skb->priority; > - key->phy.tun_id = OVS_CB(skb)->tun_id; > + if (OVS_CB(skb)->tun_key) > + key->tun.tun_key = *OVS_CB(skb)->tun_key; > key->phy.in_port = in_port; > > skb_reset_mac_header(skb); > @@ -847,6 +848,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { > > /* Not upstream. */ > [OVS_KEY_ATTR_TUN_ID] = sizeof(__be64), > + [OVS_KEY_ATTR_IPV4_TUNNEL] = sizeof(struct ovs_key_ipv4_tunnel), > }; > > static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, > @@ -1022,9 +1024,30 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, > int *key_lenp, > swkey->phy.in_port = DP_MAX_PORTS; > } > > - if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID)) { > - swkey->phy.tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]); > + /* OVS_KEY_ATTR_TUN_ID and OVS_KEY_ATTR_IPV4_TUNEL must both arrive > + * together. > + */ > + if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID) && > + attrs & (1ULL << OVS_KEY_ATTR_IPV4_TUNNEL)) { > + struct ovs_key_ipv4_tunnel *tun_key; > + __be64 tun_id = 0; > + tun_key = nla_data(a[OVS_KEY_ATTR_IPV4_TUNNEL]); > + tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]); > + > + if (tun_id != tun_key->tun_id) > + return -EINVAL; > + > + swkey->tun.tun_key = *tun_key; > attrs &= ~(1ULL << OVS_KEY_ATTR_TUN_ID); > + attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4_TUNNEL); > + } else if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID)) { > + swkey->tun.tun_key.tun_id = > nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]); > + attrs &= ~(1ULL << OVS_KEY_ATTR_TUN_ID); > + } else if (attrs & (1ULL << OVS_KEY_ATTR_IPV4_TUNNEL)) { > + struct ovs_key_ipv4_tunnel *tun_key; > + tun_key = nla_data(a[OVS_KEY_ATTR_IPV4_TUNNEL]); > + swkey->tun.tun_key = *tun_key; > + attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4_TUNNEL); > } While validating set action OVS_KEY_ATTR_IPV4_TUNNEL, we are checking for ipv4_dst, I think we need similar check here. > > /* Data attributes. */ > @@ -1162,14 +1185,16 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, > int *key_lenp, > * get the metadata, that is, the parts of the flow key that cannot be > * extracted from the packet itself. > */ > -int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 > *tun_id, > +int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, > + struct ovs_key_ipv4_tunnel *tun_key, > const struct nlattr *attr) > { > const struct nlattr *nla; > int rem; > + __be64 tun_id = 0; > > *in_port = DP_MAX_PORTS; > - *tun_id = 0; > + memset(tun_key, 0, sizeof(*tun_key)); > *priority = 0; > > nla_for_each_nested(nla, attr, rem) { > @@ -1185,7 +1210,20 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 > *in_port, __be64 *tun_id, > break; > > case OVS_KEY_ATTR_TUN_ID: > - *tun_id = nla_get_be64(nla); > + tun_id = nla_get_be64(nla); > + if ((tun_key->tun_flags & FLOW_TNL_F_KEY) && > + tun_key->tun_id != tun_id) > + return -EINVAL; > + break; > + > + case OVS_KEY_ATTR_IPV4_TUNNEL: > + if (tun_key->tun_flags & FLOW_TNL_F_KEY) > + tun_id = tun_key->tun_id; > + memcpy(tun_key, nla_data(nla), > + sizeof(*tun_key)); > + if ((tun_key->tun_flags & FLOW_TNL_F_KEY) && > + tun_id != tun_key->tun_id) > + return -EINVAL; > break; > > case OVS_KEY_ATTR_IN_PORT: > @@ -1210,10 +1248,20 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key > *swkey, struct sk_buff *skb) > nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) > goto nla_put_failure; > > - if (swkey->phy.tun_id != cpu_to_be64(0) && > - nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, swkey->phy.tun_id)) > + if ((swkey->tun.tun_key.tun_flags & FLOW_TNL_F_KEY) && > + nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, swkey->tun.tun_key.tun_id)) > goto nla_put_failure; > > + if (swkey->tun.tun_key.ipv4_dst) { > + struct ovs_key_ipv4_tunnel *tun_key; > + nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4_TUNNEL, > + sizeof(*tun_key)); > + if (!nla) > + goto nla_put_failure; > + tun_key = nla_data(nla); > + *tun_key = swkey->tun.tun_key; > + } > + Why are we sending both OVS_KEY_ATTR_TUN_ID and OVS_KEY_ATTR_IPV4_TUNNEL attributed here? we could first check for ipv4_dst else for FLOW_TNL_F_KEY in flow key. > if (swkey->phy.in_port != DP_MAX_PORTS && > nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) > goto nla_put_failure; > diff --git a/datapath/flow.h b/datapath/flow.h > index 02c563a..4430b32 100644 > --- a/datapath/flow.h > +++ b/datapath/flow.h > @@ -42,10 +42,12 @@ struct sw_flow_actions { > > struct sw_flow_key { > struct { > - __be64 tun_id; /* Encapsulating tunnel ID. */ > u32 priority; /* Packet QoS priority. */ > u16 in_port; /* Input switch port (or > DP_MAX_PORTS). */ > } phy; > + struct { > + struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel > key. */ > + } tun; > struct { > u8 src[ETH_ALEN]; /* Ethernet source address. */ > u8 dst[ETH_ALEN]; /* Ethernet destination address. */ > @@ -150,6 +152,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); > * ------ --- ------ ----- > * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 > * OVS_KEY_ATTR_TUN_ID 8 -- 4 12 > + * OVS_KEY_ATTR_IPV4_TUNNEL 24 -- 4 28 > * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 > * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 > * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype) > @@ -160,14 +163,15 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); > * OVS_KEY_ATTR_ICMPV6 2 2 4 8 > * OVS_KEY_ATTR_ND 28 -- 4 32 > * ------------------------------------------------- > - * total 156 > + * total 184 > */ > -#define FLOW_BUFSIZE 156 > +#define FLOW_BUFSIZE 184 > > int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); > int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, > const struct nlattr *); > -int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 > *tun_id, > +int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, > + struct ovs_key_ipv4_tunnel *tun_key, > const struct nlattr *); > > #define MAX_ACTIONS_BUFSIZE (16 * 1024) > diff --git a/datapath/tunnel.c b/datapath/tunnel.c > index d651c11..739f098 100644 > --- a/datapath/tunnel.c > +++ b/datapath/tunnel.c > @@ -367,9 +367,9 @@ struct vport *ovs_tnl_find_port(struct net *net, __be32 > saddr, __be32 daddr, > return NULL; ñ> } > > -static void ecn_decapsulate(struct sk_buff *skb, u8 tos) > +static void ecn_decapsulate(struct sk_buff *skb) > { > - if (unlikely(INET_ECN_is_ce(tos))) { > + if (unlikely(INET_ECN_is_ce(OVS_CB(skb)->tun_key->ipv4_tos))) { > __be16 protocol = skb->protocol; > > skb_set_network_header(skb, ETH_HLEN); > @@ -416,7 +416,7 @@ static void ecn_decapsulate(struct sk_buff *skb, u8 tos) > * - skb->csum does not include the inner Ethernet header. > * - The layer pointers are undefined. > */ > -void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos) > +void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb) > { > struct ethhdr *eh; > > @@ -433,7 +433,7 @@ void ovs_tnl_rcv(struct vport *vport, struct sk_buff > *skb, u8 tos) > skb_clear_rxhash(skb); > secpath_reset(skb); > > - ecn_decapsulate(skb, tos); > + ecn_decapsulate(skb); > vlan_set_tci(skb, 0); > > if (unlikely(compute_ip_summed(skb, false))) { > @@ -613,7 +613,7 @@ static void ipv6_build_icmp(struct sk_buff *skb, struct > sk_buff *nskb, > > bool ovs_tnl_frag_needed(struct vport *vport, > const struct tnl_mutable_config *mutable, > - struct sk_buff *skb, unsigned int mtu, __be64 > flow_key) > + struct sk_buff *skb, unsigned int mtu) > { > unsigned int eth_hdr_len = ETH_HLEN; > unsigned int total_length = 0, header_length = 0, payload_length; > @@ -697,17 +697,6 @@ bool ovs_tnl_frag_needed(struct vport *vport, > ipv6_build_icmp(skb, nskb, mtu, payload_length); > #endif > > - /* > - * Assume that flow based keys are symmetric with respect to input > - * and output and use the key that we were going to put on the > - * outgoing packet for the fake received packet. If the keys are > - * not symmetric then PMTUD needs to be disabled since we won't have > - * any way of synthesizing packets. > - */ > - if ((mutable->flags & (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) == > - (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) > - OVS_CB(nskb)->tun_id = flow_key; > - > if (unlikely(compute_ip_summed(nskb, false))) { > kfree_skb(nskb); > return false; > @@ -723,12 +712,20 @@ static bool check_mtu(struct sk_buff *skb, > const struct tnl_mutable_config *mutable, > const struct rtable *rt, __be16 *frag_offp) > { > - bool df_inherit = mutable->flags & TNL_F_DF_INHERIT; > + bool df_inherit; > bool pmtud = mutable->flags & TNL_F_PMTUD; > - __be16 frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : > 0; > + __be16 frag_off; > int mtu = 0; > unsigned int packet_length = skb->len - ETH_HLEN; > > + if (OVS_CB(skb)->tun_key->ipv4_dst != 0) { > + df_inherit = false; > + frag_off = OVS_CB(skb)->tun_key->tun_flags & > FLOW_TNL_F_DONT_FRAGMENT; > + } else { > + df_inherit = mutable->flags & TNL_F_DF_INHERIT; > + frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : > 0; > + } > + Why not set pmtud to false incase of flow-based tunneling like you did it for df_inherit? > /* Allow for one level of tagging in the packet length. */ > if (!vlan_tx_tag_present(skb) && > eth_hdr(skb)->h_proto == htons(ETH_P_8021Q)) > @@ -760,8 +757,7 @@ static bool check_mtu(struct sk_buff *skb, > mtu = max(mtu, IP_MIN_MTU); > > if (packet_length > mtu && > - ovs_tnl_frag_needed(vport, mutable, skb, mtu, > - OVS_CB(skb)->tun_id)) > + ovs_tnl_frag_needed(vport, mutable, skb, mtu)) > return false; > } > } > @@ -777,8 +773,7 @@ static bool check_mtu(struct sk_buff *skb, > mtu = max(mtu, IPV6_MIN_MTU); > > if (packet_length > mtu && > - ovs_tnl_frag_needed(vport, mutable, skb, mtu, > - OVS_CB(skb)->tun_id)) > + ovs_tnl_frag_needed(vport, mutable, skb, mtu)) > return false; > } > } > @@ -1000,15 +995,16 @@ unlock: > } > > static struct rtable *__find_route(const struct tnl_mutable_config *mutable, > - u8 ipproto, u8 tos) > + __be32 saddr, __be32 daddr, u8 ipproto, > + u8 tos) > { > /* Tunnel configuration keeps DSCP part of TOS bits, But Linux > * router expect RT_TOS bits only. */ > > #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) > struct flowi fl = { .nl_u = { .ip4_u = { > - .daddr = mutable->key.daddr, > - .saddr = mutable->key.saddr, > + .daddr = daddr, > + .saddr = saddr, > .tos = RT_TOS(tos) } }, > .proto = ipproto }; > struct rtable *rt; > @@ -1018,8 +1014,8 @@ static struct rtable *__find_route(const struct > tnl_mutable_config *mutable, > > return rt; > #else > - struct flowi4 fl = { .daddr = mutable->key.daddr, > - .saddr = mutable->key.saddr, > + struct flowi4 fl = { .daddr = daddr, > + .saddr = saddr, > .flowi4_tos = RT_TOS(tos), > .flowi4_proto = ipproto }; > > @@ -1029,7 +1025,8 @@ static struct rtable *__find_route(const struct > tnl_mutable_config *mutable, > > static struct rtable *find_route(struct vport *vport, > const struct tnl_mutable_config *mutable, > - u8 tos, struct tnl_cache **cache) > + __be32 saddr, __be32 daddr, u8 tos, > + struct tnl_cache **cache) > { > struct tnl_vport *tnl_vport = tnl_vport_priv(vport); > struct tnl_cache *cur_cache = rcu_dereference(tnl_vport->cache); > @@ -1037,18 +1034,21 @@ static struct rtable *find_route(struct vport *vport, > *cache = NULL; > tos = RT_TOS(tos); > > - if (likely(tos == RT_TOS(mutable->tos) && > - check_cache_valid(cur_cache, mutable))) { > + if (daddr == mutable->key.daddr && saddr == mutable->key.saddr && > + tos == RT_TOS(mutable->tos) && > + check_cache_valid(cur_cache, mutable)) { > *cache = cur_cache; > return cur_cache->rt; > } else { > struct rtable *rt; > > - rt = __find_route(mutable, tnl_vport->tnl_ops->ipproto, tos); > + rt = __find_route(mutable, saddr, daddr, > + tnl_vport->tnl_ops->ipproto, tos); > if (IS_ERR(rt)) > return NULL; > > - if (likely(tos == RT_TOS(mutable->tos))) > + if (daddr == mutable->key.daddr && saddr == > mutable->key.saddr && > + tos == RT_TOS(mutable->tos)) > *cache = build_cache(vport, mutable, rt); > I am not sure why checks are added on saddr and daddr here? header caching is not set in case of flow based tunneling to build_cache call is no-op anyways. > return rt; > @@ -1178,8 +1178,11 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff > *skb) > struct rtable *rt; > struct dst_entry *unattached_dst = NULL; > struct tnl_cache *cache; > + struct ovs_key_ipv4_tunnel tun_key; > int sent_len = 0; > __be16 frag_off = 0; > + __be32 daddr; > + __be32 saddr; > u8 ttl; > u8 inner_tos; > u8 tos; > @@ -1207,6 +1210,13 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff > *skb) > } > #endif > > + /* If OVS_CB(skb)->tun_key is NULL, point it at the local tun_key > here, > + * and zero it out. > + */ > + if (OVS_CB(skb)->tun_key == NULL) { > + memset(&tun_key, 0, sizeof(tun_key)); > + OVS_CB(skb)->tun_key = &tun_key; > + } > /* ToS */ > if (skb->protocol == htons(ETH_P_IP)) > inner_tos = ip_hdr(skb)->tos; > @@ -1219,11 +1229,23 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff > *skb) > > if (mutable->flags & TNL_F_TOS_INHERIT) > tos = inner_tos; > + else if (OVS_CB(skb)->tun_key->ipv4_dst != 0) > + tos = OVS_CB(skb)->tun_key->ipv4_tos; > else > tos = mutable->tos; > > + if (OVS_CB(skb)->tun_key->ipv4_dst != 0) > + daddr = OVS_CB(skb)->tun_key->ipv4_dst; > + else > + daddr = mutable->key.daddr; > + > + if (OVS_CB(skb)->tun_key->ipv4_dst != 0) > + saddr = OVS_CB(skb)->tun_key->ipv4_src; > + else > + saddr = mutable->key.saddr; > + It will be easier to read if we have one if check on OVS_CB(skb)->tun_key->ipv4_dst which is retrieve all tunnel parameter for flow-based tunning and else block for campat code. > /* Route lookup */ > - rt = find_route(vport, mutable, tos, &cache); > + rt = find_route(vport, mutable, saddr, daddr, tos, &cache); > if (unlikely(!rt)) > goto error_free; > if (unlikely(!cache)) > @@ -1260,9 +1282,13 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff > *skb) > } > > /* TTL */ > - ttl = mutable->ttl; > - if (!ttl) > - ttl = ip4_dst_hoplimit(&rt_dst(rt)); > + if (OVS_CB(skb)->tun_key->ipv4_dst != 0) > + ttl = OVS_CB(skb)->tun_key->ipv4_ttl; > + else { > + ttl = mutable->ttl; > + if (!ttl) > + ttl = ip4_dst_hoplimit(&rt_dst(rt)); > + } > > if (mutable->flags & TNL_F_TTL_INHERIT) { This check could be in the compat code block, so it is less confusing. > if (skb->protocol == htons(ETH_P_IP)) > @@ -1442,7 +1468,8 @@ static int tnl_set_config(struct net *net, struct > nlattr *options, > struct net_device *dev; > struct rtable *rt; > > - rt = __find_route(mutable, tnl_ops->ipproto, mutable->tos); > + rt = __find_route(mutable, mutable->key.saddr, > mutable->key.daddr, > + tnl_ops->ipproto, mutable->tos); > if (IS_ERR(rt)) > return -EADDRNOTAVAIL; > dev = rt_dst(rt).dev; > diff --git a/datapath/tunnel.h b/datapath/tunnel.h > index 1924017..b7858d3 100644 > --- a/datapath/tunnel.h > +++ b/datapath/tunnel.h > @@ -269,14 +269,14 @@ int ovs_tnl_set_addr(struct vport *vport, const > unsigned char *addr); > const char *ovs_tnl_get_name(const struct vport *vport); > const unsigned char *ovs_tnl_get_addr(const struct vport *vport); > int ovs_tnl_send(struct vport *vport, struct sk_buff *skb); > -void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos); > +void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb); > > struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr, > __be64 key, int tunnel_type, > const struct tnl_mutable_config **mutable); > bool ovs_tnl_frag_needed(struct vport *vport, > const struct tnl_mutable_config *mutable, > - struct sk_buff *skb, unsigned int mtu, __be64 > flow_key); > + struct sk_buff *skb, unsigned int mtu); > void ovs_tnl_free_linked_skbs(struct sk_buff *skb); > > int ovs_tnl_init(void); > @@ -286,4 +286,15 @@ static inline struct tnl_vport *tnl_vport_priv(const > struct vport *vport) > return vport_priv(vport); > } > > +static inline void tnl_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, > + const struct iphdr *iph, __be64 tun_id) > +{ > + tun_key->tun_id = tun_id; > + tun_key->ipv4_src = iph->saddr; > + tun_key->ipv4_dst = iph->daddr; > + tun_key->ipv4_tos = iph->tos; > + tun_key->ipv4_ttl = iph->ttl; > + tun_key->tun_flags = 0; > +} > + > #endif /* tunnel.h */ > diff --git a/datapath/vport-capwap.c b/datapath/vport-capwap.c > index 05a099d..fbf50d9 100644 > --- a/datapath/vport-capwap.c > +++ b/datapath/vport-capwap.c > @@ -220,7 +220,7 @@ static struct sk_buff *capwap_update_header(const struct > vport *vport, > struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1); > struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key > *)(wsi + 1); > > - opt->key = OVS_CB(skb)->tun_id; > + opt->key = OVS_CB(skb)->tun_key->tun_id; > } > > udph->len = htons(skb->len - skb_transport_offset(skb)); > @@ -316,6 +316,7 @@ static int capwap_rcv(struct sock *sk, struct sk_buff > *skb) > struct vport *vport; > const struct tnl_mutable_config *mutable; > struct iphdr *iph; > + struct ovs_key_ipv4_tunnel tun_key; > __be64 key = 0; > > if (unlikely(!pskb_may_pull(skb, CAPWAP_MIN_HLEN + ETH_HLEN))) > @@ -333,12 +334,11 @@ static int capwap_rcv(struct sock *sk, struct sk_buff > *skb) > goto error; > } > > - if (mutable->flags & TNL_F_IN_KEY_MATCH) > - OVS_CB(skb)->tun_id = key; > - else > - OVS_CB(skb)->tun_id = 0; > + tnl_tun_key_init(&tun_key, iph, > + mutable->flags & TNL_F_IN_KEY_MATCH ? key : 0); > + OVS_CB(skb)->tun_key = &tun_key; > > - ovs_tnl_rcv(vport, skb, iph->tos); > + ovs_tnl_rcv(vport, skb); > goto out; > > error: > diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c > index ab89c5b..8076b1d 100644 > --- a/datapath/vport-gre.c > +++ b/datapath/vport-gre.c > @@ -103,7 +103,7 @@ static struct sk_buff *gre_update_header(const struct > vport *vport, > > /* Work backwards over the options so the checksum is last. */ > if (mutable->flags & TNL_F_OUT_KEY_ACTION) > - *options = be64_get_low32(OVS_CB(skb)->tun_id); > + *options = be64_get_low32(OVS_CB(skb)->tun_key->tun_id); > Here we do need check if we doing flow-based tunneling. in case of flow-based tunneling mutable is shared between all tunnels, so there is no-way to we can do port specific flags check from mutable. about GRE checksum, we need to check for FLOW_TNL_F_CSUM flag for doing checksum on a packet. > if (mutable->out_key || mutable->flags & TNL_F_OUT_KEY_ACTION) > options--; > @@ -285,7 +285,7 @@ static void gre_err(struct sk_buff *skb, u32 info) > #endif > > __skb_pull(skb, tunnel_hdr_len); > - ovs_tnl_frag_needed(vport, mutable, skb, mtu, key); > + ovs_tnl_frag_needed(vport, mutable, skb, mtu); > __skb_push(skb, tunnel_hdr_len); > same here, tunnel_hdr_len is stored in mutable which is going to be shared for all tunnels for a type in flow based tunneling, so we need to calculate it on packet send. > out: > @@ -327,6 +327,7 @@ static int gre_rcv(struct sk_buff *skb) > const struct tnl_mutable_config *mutable; > int hdr_len; > struct iphdr *iph; > + struct ovs_key_ipv4_tunnel tun_key; > __be16 flags; > __be64 key; > > @@ -351,15 +352,15 @@ static int gre_rcv(struct sk_buff *skb) > goto error; > } > > - if (mutable->flags & TNL_F_IN_KEY_MATCH) > - OVS_CB(skb)->tun_id = key; > - else > - OVS_CB(skb)->tun_id = 0; > + > + tnl_tun_key_init(&tun_key, iph, > + mutable->flags & TNL_F_IN_KEY_MATCH ? key : 0); > + OVS_CB(skb)->tun_key = &tun_key; > > __skb_pull(skb, hdr_len); > skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + > ETH_HLEN); > > - ovs_tnl_rcv(vport, skb, iph->tos); > + ovs_tnl_rcv(vport, skb); > return 0; > > error: > diff --git a/datapath/vport.c b/datapath/vport.c > index dc7adfa..e8279fb 100644 > --- a/datapath/vport.c > +++ b/datapath/vport.c > @@ -462,7 +462,7 @@ void ovs_vport_receive(struct vport *vport, struct > sk_buff *skb) > OVS_CB(skb)->flow = NULL; > > if (!(vport->ops->flags & VPORT_F_TUN_ID)) > - OVS_CB(skb)->tun_id = 0; > + OVS_CB(skb)->tun_key = NULL; > > ovs_dp_process_received_packet(vport, skb); > } > diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h > index f5c9cca..8334cc1 100644 > --- a/include/linux/openvswitch.h > +++ b/include/linux/openvswitch.h > @@ -278,7 +278,8 @@ enum ovs_key_attr { > OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */ > OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ > OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ > - OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */ > + OVS_KEY_ATTR_IPV4_TUNNEL = 62, /* struct ovs_key_ipv4_tunnel */ > + OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */ > __OVS_KEY_ATTR_MAX > }; > > @@ -360,6 +361,21 @@ struct ovs_key_nd { > __u8 nd_tll[6]; > }; > > +/* Values for ovs_key_ipv4_tunnel->tun_flags */ > +#define FLOW_TNL_F_DONT_FRAGMENT (1 << 0) > +#define FLOW_TNL_F_CSUM (1 << 1) > +#define FLOW_TNL_F_KEY (1 << 2) > + > +struct ovs_key_ipv4_tunnel { > + __be64 tun_id; > + __u32 tun_flags; > + __be32 ipv4_src; > + __be32 ipv4_dst; > + __u8 ipv4_tos; > + __u8 ipv4_ttl; > + __u8 pad[2]; > +}; > + > /** > * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. > * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index c9e3210..797cb06 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -1179,6 +1179,7 @@ execute_set_action(struct ofpbuf *packet, const struct > nlattr *a) > case OVS_KEY_ATTR_TUN_ID: > case OVS_KEY_ATTR_PRIORITY: > case OVS_KEY_ATTR_IPV6: > + case OVS_KEY_ATTR_IPV4_TUNNEL: > /* not implemented */ > break; > > diff --git a/lib/odp-util.c b/lib/odp-util.c > index 257d7a7..9ed17ed 100644 > --- a/lib/odp-util.c > +++ b/lib/odp-util.c > @@ -93,6 +93,8 @@ ovs_key_attr_to_string(enum ovs_key_attr attr) > case OVS_KEY_ATTR_UNSPEC: return "unspec"; > case OVS_KEY_ATTR_ENCAP: return "encap"; > case OVS_KEY_ATTR_PRIORITY: return "priority"; > + case OVS_KEY_ATTR_TUN_ID: return "tun_id"; > + case OVS_KEY_ATTR_IPV4_TUNNEL: return "ipv4_tunnel"; > case OVS_KEY_ATTR_IN_PORT: return "in_port"; > case OVS_KEY_ATTR_ETHERNET: return "eth"; > case OVS_KEY_ATTR_VLAN: return "vlan"; > @@ -105,7 +107,6 @@ ovs_key_attr_to_string(enum ovs_key_attr attr) > case OVS_KEY_ATTR_ICMPV6: return "icmpv6"; > case OVS_KEY_ATTR_ARP: return "arp"; > case OVS_KEY_ATTR_ND: return "nd"; > - case OVS_KEY_ATTR_TUN_ID: return "tun_id"; > > case __OVS_KEY_ATTR_MAX: > default: > @@ -602,6 +603,7 @@ odp_flow_key_attr_len(uint16_t type) > case OVS_KEY_ATTR_ENCAP: return -2; > case OVS_KEY_ATTR_PRIORITY: return 4; > case OVS_KEY_ATTR_TUN_ID: return 8; > + case OVS_KEY_ATTR_IPV4_TUNNEL: return sizeof(struct ovs_key_ipv4_tunnel); > case OVS_KEY_ATTR_IN_PORT: return 4; > case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet); > case OVS_KEY_ATTR_VLAN: return sizeof(ovs_be16); > @@ -668,6 +670,7 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds) > const struct ovs_key_icmpv6 *icmpv6_key; > const struct ovs_key_arp *arp_key; > const struct ovs_key_nd *nd_key; > + const struct ovs_key_ipv4_tunnel *ipv4_tun_key; > enum ovs_key_attr attr = nl_attr_type(a); > int expected_len; > > @@ -698,6 +701,16 @@ format_odp_key_attr(const struct nlattr *a, struct ds > *ds) > ds_put_format(ds, "(%#"PRIx64")", ntohll(nl_attr_get_be64(a))); > break; > > + case OVS_KEY_ATTR_IPV4_TUNNEL: > + ipv4_tun_key = nl_attr_get(a); > + ds_put_format(ds, "(tun_id=0x%"PRIx64",flags=0x%"PRIx32 > + > ",src="IP_FMT",dst="IP_FMT",tos=0x%"PRIx8",ttl=%"PRIu8")", > + ntohll(ipv4_tun_key->tun_id), ipv4_tun_key->tun_flags, > + IP_ARGS(&ipv4_tun_key->ipv4_src), > + IP_ARGS(&ipv4_tun_key->ipv4_dst), > + ipv4_tun_key->ipv4_tos, ipv4_tun_key->ipv4_ttl); > + break; > + > case OVS_KEY_ATTR_IN_PORT: > ds_put_format(ds, "(%"PRIu32")", nl_attr_get_u32(a)); > break; > diff --git a/lib/odp-util.h b/lib/odp-util.h > index 16f2b15..57073ba 100644 > --- a/lib/odp-util.h > +++ b/lib/odp-util.h > @@ -80,6 +80,7 @@ int odp_actions_from_string(const char *, const struct > simap *port_names, > * ------ --- ------ ----- > * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 > * OVS_KEY_ATTR_TUN_ID 8 -- 4 12 > + * OVS_KEY_ATTR_IPV4_TUNNEL 24 -- 4 28 > * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 > * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 > * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype) > @@ -90,7 +91,7 @@ int odp_actions_from_string(const char *, const struct > simap *port_names, > * OVS_KEY_ATTR_ICMPV6 2 2 4 8 > * OVS_KEY_ATTR_ND 28 -- 4 32 > * ------------------------------------------------- > - * total 156 > + * total 184 > * > * We include some slack space in case the calculation isn't quite right or > we > * add another field and forget to adjust this value. > -- > 1.7.11.4 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev