this is a first pass at providing a tun_key which can be used as the basis for flow-based tunnelling. The tun_key includes and replaces the tun_id in both struct ovs_skb_cb and struct sw_tun_key.
A significant problem with the current code, which causes it to fail to compile, is that struct ovs_key_ipv6_tunnel is itself 48 bytes and that as a result ovs_skb_cb is larger than 48 bytes. This means that it will not fit inside skb->cb which is also 48 bytes. At this point it may be just as well just to drop the IPv6 portions of this change, as far as I know OVS has not supported IPv6 as the outer transport protocol for tunnelled frames. However it does seem to be a problem that will arise at some point. One idea that I had was to make the tun_key element of struct ovs_skb_cb a pointer and set skb->destructor() to free it as needed. I am, however, concerned about the complexity and performance penalty this may introduce. Moreover, I'd like some review on the merit of stuffing all the tun_key information into skb->cb. The patch does make some effort to retain the existing tun_id behaviour. I imagine this is required for compatibility reasons. The patch makes no attempt to use tun_key other than compatibility with the tun_id behaviour. The patch is untested. ** Please to not apply ** Cc: Kyle Mestery <[email protected]> Signed-off-by: Simon Horman <[email protected]> --- datapath/actions.c | 19 ++++++++++++++-- datapath/datapath.c | 2 +- datapath/datapath.h | 4 ++-- datapath/flow.c | 50 ++++++++++++++++++++++++++++++++++++------- datapath/flow.h | 31 +++++++++++++++++++++++---- datapath/tunnel.c | 9 ++++---- datapath/tunnel.h | 3 ++- datapath/vport-capwap.c | 7 +++--- datapath/vport-gre.c | 16 ++++++++++---- datapath/vport.c | 6 ++++-- include/linux/openvswitch.h | 25 +++++++++++++++++++++- lib/dpif-netdev.c | 2 ++ lib/odp-util.c | 32 +++++++++++++++++++++++++++ 13 files changed, 174 insertions(+), 32 deletions(-) diff --git a/datapath/actions.c b/datapath/actions.c index 2903801..d8fad8a 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -343,7 +343,20 @@ static int execute_set_action(struct sk_buff *skb, break; case OVS_KEY_ATTR_TUN_ID: - OVS_CB(skb)->tun_id = nla_get_be64(nested_attr); + OVS_CB(skb)->tun_key.tun_af = AF_UNSPEC; + OVS_CB(skb)->tun_key.tun_id = nla_get_be64(nested_attr); + break; + + case OVS_KEY_ATTR_IPV4_TUNNEL: + OVS_CB(skb)->tun_key.tun_af = AF_INET; + memcpy(&OVS_CB(skb)->tun_key.tun_ipv4, nla_data(nested_attr), + sizeof(OVS_CB(skb)->tun_key.tun_ipv4)); + break; + + case OVS_KEY_ATTR_IPV6_TUNNEL: + OVS_CB(skb)->tun_key.tun_af = AF_INET6; + memcpy(&OVS_CB(skb)->tun_key.tun_ipv6, nla_data(nested_attr), + sizeof(OVS_CB(skb)->tun_key.tun_ipv6)); break; case OVS_KEY_ATTR_ETHERNET: @@ -469,7 +482,9 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb) goto out_loop; } - OVS_CB(skb)->tun_id = 0; + OVS_CB(skb)->tun_key.tun_af = AF_UNSPEC; + OVS_CB(skb)->tun_key.tun_id = 0; + error = do_execute_actions(dp, skb, acts->actions, acts->actions_len, false); diff --git a/datapath/datapath.c b/datapath/datapath.c index 826dc89..6c6cf09 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -776,7 +776,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority, &flow->key.phy.in_port, - &flow->key.phy.tun_id, + &flow->key.phy.tun_key, a[OVS_PACKET_ATTR_KEY]); if (err) goto err_flow_put; diff --git a/datapath/datapath.h b/datapath/datapath.h index 18c8598..b9d5bf8 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -96,7 +96,7 @@ struct datapath { /** * struct ovs_skb_cb - OVS data in skb CB * @flow: The flow associated with this packet. May be %NULL if no flow. - * @tun_id: ID of the tunnel that encapsulated this packet. It is 0 if the + * @tun_key: Key for the tunnel that encapsulated this packet. * @ip_summed: Consistently stores L4 checksumming status across different * kernel versions. * @csum_start: Stores the offset from which to start checksumming independent @@ -107,7 +107,7 @@ struct datapath { */ struct ovs_skb_cb { struct sw_flow *flow; - __be64 tun_id; + struct sw_tun_key tun_key; #ifdef NEED_CSUM_NORMALIZE enum csum_type ip_summed; u16 csum_start; diff --git a/datapath/flow.c b/datapath/flow.c index 9f93550..644cd3e 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -629,7 +629,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, memset(key, 0, sizeof(*key)); key->phy.priority = skb->priority; - key->phy.tun_id = OVS_CB(skb)->tun_id; + key->phy.tun_key = OVS_CB(skb)->tun_key; key->phy.in_port = in_port; skb_reset_mac_header(skb); @@ -1023,7 +1023,9 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, } if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID)) { - swkey->phy.tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]); + swkey->phy.tun_key.tun_af = AF_UNSPEC; + swkey->phy.tun_key.tun_id = + nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]); attrs &= ~(1ULL << OVS_KEY_ATTR_TUN_ID); } @@ -1162,14 +1164,16 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, * get the metadata, that is, the parts of the flow key that cannot be * extracted from the packet itself. */ -int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id, +int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, + struct sw_tun_key *tun_key, const struct nlattr *attr) { const struct nlattr *nla; int rem; *in_port = DP_MAX_PORTS; - *tun_id = 0; + tun_key->tun_af = AF_UNSPEC; + tun_key->tun_id = 0; *priority = 0; nla_for_each_nested(nla, attr, rem) { @@ -1185,7 +1189,20 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id, break; case OVS_KEY_ATTR_TUN_ID: - *tun_id = nla_get_be64(nla); + tun_key->tun_af = AF_UNSPEC; + tun_key->tun_id = nla_get_be64(nla); + break; + + case OVS_KEY_ATTR_IPV4_TUNNEL: + tun_key->tun_af = AF_INET; + memcpy(&tun_key->tun_ipv4, nla_data(nla), + sizeof(tun_key->tun_ipv4)); + break; + + case OVS_KEY_ATTR_IPV6_TUNNEL: + tun_key->tun_af = AF_INET6; + memcpy(&tun_key->tun_ipv6, nla_data(nla), + sizeof(tun_key->tun_ipv6)); break; case OVS_KEY_ATTR_IN_PORT: @@ -1210,9 +1227,26 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) goto nla_put_failure; - if (swkey->phy.tun_id != cpu_to_be64(0) && - nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, swkey->phy.tun_id)) - goto nla_put_failure; + if (swkey->phy.tun_key.tun_af == AF_UNSPEC && + swkey->phy.tun_key.tun_id != cpu_to_be64(0)) { + if (nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, + swkey->phy.tun_key.tun_id)) + goto nla_put_failure; + } else if (swkey->phy.tun_key.tun_af == AF_INET) { + struct ovs_key_ipv4_tunnel *key; + nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4_TUNNEL, sizeof(*key)); + if (!nla) + goto nla_put_failure; + key = nla_data(nla); + *key = swkey->phy.tun_key.tun_ipv4; + } else if (swkey->phy.tun_key.tun_af == AF_INET6) { + struct ovs_key_ipv6_tunnel *key; + nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6_TUNNEL, sizeof(*key)); + if (!nla) + goto nla_put_failure; + key = nla_data(nla); + *key = swkey->phy.tun_key.tun_ipv6; + } if (swkey->phy.in_port != DP_MAX_PORTS && nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) diff --git a/datapath/flow.h b/datapath/flow.h index 5261fa8..c39023a 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -40,11 +40,33 @@ struct sw_flow_actions { struct nlattr actions[]; }; +/** + * struct ovs_tun_key - Key for a tunnel + * @tun_af: The address family of the tunnel. One of AF_*. + * @tun_id: Use for the keys consisting of only the tun_id. + * Use in conjunction with .tun_af = AF_UNSPEC. + * Set to zero for no tunnel. + * @tun_ipv4: Use for fully specified tunnel keys that use IPv4 for + * the outer packet. + * Use in conjunction with .tun_af = AF_INET. + * @tun_ipv6: Use for fully specified tunnel keys that use IPv6 for + * the outer packet. + * Use in conjunction with .tun_af = AF_INET6. + */ +struct sw_tun_key { + __kernel_sa_family_t tun_af; + union { + __be64 tun_id; + struct ovs_key_ipv4_tunnel tun_ipv4; + struct ovs_key_ipv6_tunnel tun_ipv6; + }; +}; + struct sw_flow_key { struct { - __be64 tun_id; /* Encapsulating tunnel ID. */ - u32 priority; /* Packet QoS priority. */ - u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ + struct sw_tun_key tun_key; /* Encapsulating tunnel key. */ + u32 priority; /* Packet QoS priority. */ + u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ } phy; struct { u8 src[ETH_ALEN]; /* Ethernet source address. */ @@ -165,7 +187,8 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, const struct nlattr *); -int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id, +int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, + struct sw_tun_key *tun_key, const struct nlattr *); #define MAX_ACTIONS_BUFSIZE (16 * 1024) diff --git a/datapath/tunnel.c b/datapath/tunnel.c index d406dbc..0305fbd 100644 --- a/datapath/tunnel.c +++ b/datapath/tunnel.c @@ -613,7 +613,8 @@ static void ipv6_build_icmp(struct sk_buff *skb, struct sk_buff *nskb, bool ovs_tnl_frag_needed(struct vport *vport, const struct tnl_mutable_config *mutable, - struct sk_buff *skb, unsigned int mtu, __be64 flow_key) + struct sk_buff *skb, unsigned int mtu, + struct sw_tun_key *tun_key) { unsigned int eth_hdr_len = ETH_HLEN; unsigned int total_length = 0, header_length = 0, payload_length; @@ -706,7 +707,7 @@ bool ovs_tnl_frag_needed(struct vport *vport, */ if ((mutable->flags & (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) == (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) - OVS_CB(nskb)->tun_id = flow_key; + OVS_CB(nskb)->tun_key = *tun_key; if (unlikely(compute_ip_summed(nskb, false))) { kfree_skb(nskb); @@ -761,7 +762,7 @@ static bool check_mtu(struct sk_buff *skb, if (packet_length > mtu && ovs_tnl_frag_needed(vport, mutable, skb, mtu, - OVS_CB(skb)->tun_id)) + &OVS_CB(skb)->tun_key)) return false; } } @@ -778,7 +779,7 @@ static bool check_mtu(struct sk_buff *skb, if (packet_length > mtu && ovs_tnl_frag_needed(vport, mutable, skb, mtu, - OVS_CB(skb)->tun_id)) + &OVS_CB(skb)->tun_key)) return false; } } diff --git a/datapath/tunnel.h b/datapath/tunnel.h index 33eb63c..d5b122c 100644 --- a/datapath/tunnel.h +++ b/datapath/tunnel.h @@ -276,7 +276,8 @@ struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr, const struct tnl_mutable_config **mutable); bool ovs_tnl_frag_needed(struct vport *vport, const struct tnl_mutable_config *mutable, - struct sk_buff *skb, unsigned int mtu, __be64 flow_key); + struct sk_buff *skb, unsigned int mtu, + struct sw_tun_key *tun_key); void ovs_tnl_free_linked_skbs(struct sk_buff *skb); int ovs_tnl_init(void); diff --git a/datapath/vport-capwap.c b/datapath/vport-capwap.c index e5b7afb..3d492cf 100644 --- a/datapath/vport-capwap.c +++ b/datapath/vport-capwap.c @@ -220,7 +220,7 @@ static struct sk_buff *capwap_update_header(const struct vport *vport, struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1); struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key *)(wsi + 1); - opt->key = OVS_CB(skb)->tun_id; + opt->key = OVS_CB(skb)->tun_key.tun_id; } udph->len = htons(skb->len - skb_transport_offset(skb)); @@ -333,10 +333,11 @@ static int capwap_rcv(struct sock *sk, struct sk_buff *skb) goto error; } + OVS_CB(skb)->tun_key.tun_af = AF_UNSPEC; if (mutable->flags & TNL_F_IN_KEY_MATCH) - OVS_CB(skb)->tun_id = key; + OVS_CB(skb)->tun_key.tun_id = key; else - OVS_CB(skb)->tun_id = 0; + OVS_CB(skb)->tun_key.tun_id = 0; ovs_tnl_rcv(vport, skb, iph->tos); goto out; diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c index 3bb55f0..1542ca8 100644 --- a/datapath/vport-gre.c +++ b/datapath/vport-gre.c @@ -103,7 +103,7 @@ static struct sk_buff *gre_update_header(const struct vport *vport, /* Work backwards over the options so the checksum is last. */ if (mutable->flags & TNL_F_OUT_KEY_ACTION) - *options = be64_get_low32(OVS_CB(skb)->tun_id); + *options = be64_get_low32(OVS_CB(skb)->tun_key.tun_id); if (mutable->out_key || mutable->flags & TNL_F_OUT_KEY_ACTION) options--; @@ -285,7 +285,13 @@ static void gre_err(struct sk_buff *skb, u32 info) #endif __skb_pull(skb, tunnel_hdr_len); - ovs_tnl_frag_needed(vport, mutable, skb, mtu, key); + { + struct sw_tun_key tun_key = { + .tun_af = AF_UNSPEC, + .tun_id = key + }; + ovs_tnl_frag_needed(vport, mutable, skb, mtu, &tun_key); + } __skb_push(skb, tunnel_hdr_len); out: @@ -351,10 +357,12 @@ static int gre_rcv(struct sk_buff *skb) goto error; } + + OVS_CB(skb)->tun_key.tun_af = AF_UNSPEC; if (mutable->flags & TNL_F_IN_KEY_MATCH) - OVS_CB(skb)->tun_id = key; + OVS_CB(skb)->tun_key.tun_id = key; else - OVS_CB(skb)->tun_id = 0; + OVS_CB(skb)->tun_key.tun_id = 0; __skb_pull(skb, hdr_len); skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN); diff --git a/datapath/vport.c b/datapath/vport.c index b75a866..d05bc34 100644 --- a/datapath/vport.c +++ b/datapath/vport.c @@ -461,8 +461,10 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) if (!(vport->ops->flags & VPORT_F_FLOW)) OVS_CB(skb)->flow = NULL; - if (!(vport->ops->flags & VPORT_F_TUN_ID)) - OVS_CB(skb)->tun_id = 0; + if (!(vport->ops->flags & VPORT_F_TUN_ID)) { + OVS_CB(skb)->tun_key.tun_af = AF_UNSPEC; + OVS_CB(skb)->tun_key.tun_id = 0; + } ovs_dp_process_received_packet(vport, skb); } diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index 0578b5f..da32f7f 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -278,7 +278,9 @@ enum ovs_key_attr { OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */ OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ - OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */ + OVS_KEY_ATTR_TUN_ID, /* be64 tunnel ID */ + OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */ + OVS_KEY_ATTR_IPV6_TUNNEL = 63, /* struct ovs_key_ipv6_tunnel */ __OVS_KEY_ATTR_MAX }; @@ -360,6 +362,27 @@ struct ovs_key_nd { __u8 nd_tll[6]; }; +struct ovs_key_ipv4_tunnel { + __be64 tun_id; + __be32 ipv4_src; + __be32 ipv4_dst; + __u8 ipv4_tos; + __u8 ipv4_ttl; + __u8 tun_proto; /* One of TNL_T_PROTO_* */ + __u8 reserved; +}; + +struct ovs_key_ipv6_tunnel { + __be64 tun_id; + __be32 ipv6_src[4]; + __be32 ipv6_dst[4]; + __be32 ipv6_label; /* 20-bits in least-significant bits. */ + __u8 ipv6_tclass; + __u8 ipv6_hlimit; + __u8 tun_proto; /* One of TNL_T_PROTO_* */ + __u8 reserved; +}; + /** * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index a33fe23..087c23b 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1154,6 +1154,8 @@ execute_set_action(struct ofpbuf *packet, const struct nlattr *a) case OVS_KEY_ATTR_TUN_ID: case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_IPV6: + case OVS_KEY_ATTR_IPV4_TUNNEL: + case OVS_KEY_ATTR_IPV6_TUNNEL: /* not implemented */ break; diff --git a/lib/odp-util.c b/lib/odp-util.c index 8fa3359..cb0815c 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -107,6 +107,8 @@ ovs_key_attr_to_string(enum ovs_key_attr attr) case OVS_KEY_ATTR_ARP: return "arp"; case OVS_KEY_ATTR_ND: return "nd"; case OVS_KEY_ATTR_TUN_ID: return "tun_id"; + case OVS_KEY_ATTR_IPV4_TUNNEL: return "ipv4_tunnel"; + case OVS_KEY_ATTR_IPV6_TUNNEL: return "ipv6_tunnel"; case __OVS_KEY_ATTR_MAX: default: @@ -523,6 +525,8 @@ odp_flow_key_attr_len(uint16_t type) case OVS_KEY_ATTR_ICMPV6: return sizeof(struct ovs_key_icmpv6); case OVS_KEY_ATTR_ARP: return sizeof(struct ovs_key_arp); case OVS_KEY_ATTR_ND: return sizeof(struct ovs_key_nd); + case OVS_KEY_ATTR_IPV4_TUNNEL: return sizeof(struct ovs_key_ipv4_tunnel); + case OVS_KEY_ATTR_IPV6_TUNNEL: return sizeof(struct ovs_key_ipv6_tunnel); case OVS_KEY_ATTR_UNSPEC: case __OVS_KEY_ATTR_MAX: @@ -577,6 +581,8 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds) const struct ovs_key_icmpv6 *icmpv6_key; const struct ovs_key_arp *arp_key; const struct ovs_key_nd *nd_key; + const struct ovs_key_ipv4_tunnel *ipv4_tun_key; + const struct ovs_key_ipv6_tunnel *ipv6_tun_key; enum ovs_key_attr attr = nl_attr_type(a); int expected_len; @@ -607,6 +613,32 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds) ds_put_format(ds, "(%#"PRIx64")", ntohll(nl_attr_get_be64(a))); break; + case OVS_KEY_ATTR_IPV4_TUNNEL: + ipv4_tun_key = nl_attr_get(a); + ds_put_format(ds, "(src="IP_FMT",dst="IP_FMT + ",tos=%#"PRIx8",ttl=%"PRIu8",tun_proto=%"PRIu8")", + IP_ARGS(&ipv4_tun_key->ipv4_src), + IP_ARGS(&ipv4_tun_key->ipv4_dst), + ipv4_tun_key->ipv4_tos, ipv4_tun_key->ipv4_ttl, + ipv4_tun_key->tun_proto); + break; + + case OVS_KEY_ATTR_IPV6_TUNNEL: { + char src_str[INET6_ADDRSTRLEN]; + char dst_str[INET6_ADDRSTRLEN]; + + ipv6_tun_key = nl_attr_get(a); + inet_ntop(AF_INET6, ipv6_tun_key->ipv6_src, src_str, sizeof src_str); + inet_ntop(AF_INET6, ipv6_tun_key->ipv6_dst, dst_str, sizeof dst_str); + + ds_put_format(ds, "(src=%s,dst=%s,label=%#"PRIx32 + ",tclass=%#"PRIx8",hlimit=%"PRIu8",proto=%"PRIu8")", + src_str, dst_str, ntohl(ipv6_tun_key->ipv6_label), + ipv6_tun_key->ipv6_tclass, ipv6_tun_key->ipv6_hlimit, + ipv6_tun_key->tun_proto); + break; + } + case OVS_KEY_ATTR_IN_PORT: ds_put_format(ds, "(%"PRIu32")", nl_attr_get_u32(a)); break; -- 1.7.9.5 _______________________________________________ dev mailing list [email protected] http://openvswitch.org/mailman/listinfo/dev
