On Fri, Feb 21, 2014 at 11:41 AM, Jarno Rajahalme <[email protected]> wrote:
> Minimize padding in sw_flow_key and move 'tp' top the main struct.
> These changes simplify code when accessing the transport port numbers
> and the tcp flags, and makes the sw_flow_key 8 bytes smaller on 64-bit
> systems (128->120 bytes). These changes also make the keys for IPv4
> packets to fit in one cache line.
>
> There is a valid concern for safety of packing the struct
> ovs_key_ipv4_tunnel, as it would be possible to take the address of
> the tun_id member as a __be64 * which could result in unaligned access
> in some systems. However:
>
> - sw_flow_key itself is 64-bit aligned, so the tun_id within is always
> 64-bit aligned.
> - We never make arrays of ovs_key_ipv4_tunnel (which would force every
> second tun_key to be misaligned).
> - We never take the address of the tun_id in to a __be64 *.
> - Whereever we use struct ovs_key_ipv4_tunnel outside the sw_flow_key,
> it is in stack (on tunnel input functions), where compiler has full
> control of the alignment.
>
> Signed-off-by: Jarno Rajahalme <[email protected]>
> ---
> datapath/datapath.c | 6 +++
> datapath/flow.c | 44 ++++++++-----------
> datapath/flow.h | 29 +++++-------
> datapath/flow_netlink.c | 112
> ++++++++++++++---------------------------------
> 4 files changed, 68 insertions(+), 123 deletions(-)
>
> diff --git a/datapath/datapath.c b/datapath/datapath.c
> index 130300f..8a2c0af 100644
> --- a/datapath/datapath.c
> +++ b/datapath/datapath.c
> @@ -1929,6 +1929,12 @@ static int __init dp_init(void)
> pr_info("Open vSwitch switching datapath %s, built "__DATE__"
> "__TIME__"\n",
> VERSION);
>
> + pr_info("Datapath sw_flow_key size: %ld bytes. ip.frag at %ld,
> tp.flags at %ld, ipv4.addr at %ld\n",
> + sizeof(struct sw_flow_key),
> + offsetof(struct sw_flow_key, ip.frag),
> + offsetof(struct sw_flow_key, tp.flags),
> + offsetof(struct sw_flow_key, ipv4.addr));
> +
> err = ovs_flow_init();
> if (err)
> goto error;
> diff --git a/datapath/flow.c b/datapath/flow.c
> index 3cc4cdf..4e37e9b 100644
> --- a/datapath/flow.c
> +++ b/datapath/flow.c
> @@ -65,17 +65,11 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
> void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
> {
> struct flow_stats *stats;
> - __be16 tcp_flags = 0;
> + __be16 tcp_flags = flow->key.tp.flags;
> int node = numa_node_id();
>
> stats = rcu_dereference(flow->stats[node]);
>
> - if (likely(flow->key.ip.proto == IPPROTO_TCP)) {
> - if (likely(flow->key.eth.type == htons(ETH_P_IP)))
> - tcp_flags = flow->key.ipv4.tp.flags;
> - else if (likely(flow->key.eth.type == htons(ETH_P_IPV6)))
> - tcp_flags = flow->key.ipv6.tp.flags;
> - }
nice :)
> /* Check if already have node-specific stats. */
> if (likely(stats)) {
> spin_lock(&stats->lock);
> @@ -358,8 +352,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct
> sw_flow_key *key,
> /* The ICMPv6 type and code fields use the 16-bit transport port
> * fields, so we need to store them in 16-bit network byte order.
> */
> - key->ipv6.tp.src = htons(icmp->icmp6_type);
> - key->ipv6.tp.dst = htons(icmp->icmp6_code);
> + key->tp.src = htons(icmp->icmp6_type);
> + key->tp.dst = htons(icmp->icmp6_code);
>
> if (icmp->icmp6_code == 0 &&
> (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
> @@ -520,21 +514,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port,
> struct sw_flow_key *key)
> if (key->ip.proto == IPPROTO_TCP) {
> if (tcphdr_ok(skb)) {
> struct tcphdr *tcp = tcp_hdr(skb);
> - key->ipv4.tp.src = tcp->source;
> - key->ipv4.tp.dst = tcp->dest;
> - key->ipv4.tp.flags = TCP_FLAGS_BE16(tcp);
> + key->tp.src = tcp->source;
> + key->tp.dst = tcp->dest;
> + key->tp.flags = TCP_FLAGS_BE16(tcp);
> }
Do u think we can directly copy 32 bits from packet here ?
> } else if (key->ip.proto == IPPROTO_UDP) {
> if (udphdr_ok(skb)) {
> struct udphdr *udp = udp_hdr(skb);
> - key->ipv4.tp.src = udp->source;
> - key->ipv4.tp.dst = udp->dest;
> + key->tp.src = udp->source;
> + key->tp.dst = udp->dest;
> }
> } else if (key->ip.proto == IPPROTO_SCTP) {
> if (sctphdr_ok(skb)) {
> struct sctphdr *sctp = sctp_hdr(skb);
> - key->ipv4.tp.src = sctp->source;
> - key->ipv4.tp.dst = sctp->dest;
> + key->tp.src = sctp->source;
> + key->tp.dst = sctp->dest;
> }
> } else if (key->ip.proto == IPPROTO_ICMP) {
> if (icmphdr_ok(skb)) {
> @@ -542,8 +536,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port,
> struct sw_flow_key *key)
> /* The ICMP type and code fields use the
> 16-bit
> * transport port fields, so we need to store
> * them in 16-bit network byte order. */
> - key->ipv4.tp.src = htons(icmp->type);
> - key->ipv4.tp.dst = htons(icmp->code);
> + key->tp.src = htons(icmp->type);
> + key->tp.dst = htons(icmp->code);
> }
> }
>
> @@ -589,21 +583,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port,
> struct sw_flow_key *key)
> if (key->ip.proto == NEXTHDR_TCP) {
> if (tcphdr_ok(skb)) {
> struct tcphdr *tcp = tcp_hdr(skb);
> - key->ipv6.tp.src = tcp->source;
> - key->ipv6.tp.dst = tcp->dest;
> - key->ipv6.tp.flags = TCP_FLAGS_BE16(tcp);
> + key->tp.src = tcp->source;
> + key->tp.dst = tcp->dest;
> + key->tp.flags = TCP_FLAGS_BE16(tcp);
> }
> } else if (key->ip.proto == NEXTHDR_UDP) {
> if (udphdr_ok(skb)) {
> struct udphdr *udp = udp_hdr(skb);
> - key->ipv6.tp.src = udp->source;
> - key->ipv6.tp.dst = udp->dest;
> + key->tp.src = udp->source;
> + key->tp.dst = udp->dest;
> }
> } else if (key->ip.proto == NEXTHDR_SCTP) {
> if (sctphdr_ok(skb)) {
> struct sctphdr *sctp = sctp_hdr(skb);
> - key->ipv6.tp.src = sctp->source;
> - key->ipv6.tp.dst = sctp->dest;
> + key->tp.src = sctp->source;
> + key->tp.dst = sctp->dest;
> }
> } else if (key->ip.proto == NEXTHDR_ICMP) {
> if (icmp6hdr_ok(skb)) {
> diff --git a/datapath/flow.h b/datapath/flow.h
> index 270a324..5587577 100644
> --- a/datapath/flow.h
> +++ b/datapath/flow.h
> @@ -49,7 +49,7 @@ struct ovs_key_ipv4_tunnel {
> __be16 tun_flags;
> u8 ipv4_tos;
> u8 ipv4_ttl;
> -};
> +} __packed __aligned(4); /* Minimize padding. */
>
> static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
> const struct iphdr *iph, __be64
> tun_id,
> @@ -73,7 +73,7 @@ struct sw_flow_key {
> u32 priority; /* Packet QoS priority. */
> u32 skb_mark; /* SKB mark. */
> u16 in_port; /* Input switch port (or
> DP_MAX_PORTS). */
> - } phy;
> + } __packed phy; /* Safe when right after 'tun_key'. */
> struct {
> u8 src[ETH_ALEN]; /* Ethernet source address. */
> u8 dst[ETH_ALEN]; /* Ethernet destination address. */
> @@ -86,23 +86,21 @@ struct sw_flow_key {
> u8 ttl; /* IP TTL/hop limit. */
> u8 frag; /* One of OVS_FRAG_TYPE_*. */
> } ip;
> + struct {
> + __be16 src; /* TCP/UDP/SCTP source port. */
> + __be16 dst; /* TCP/UDP/SCTP destination port. */
> + __be16 flags; /* TCP flags. */
> + } tp;
> union {
> struct {
> struct {
> __be32 src; /* IP source address. */
> __be32 dst; /* IP destination address. */
> } addr;
> - union {
> - struct {
> - __be16 src; /*
> TCP/UDP/SCTP source port. */
> - __be16 dst; /*
> TCP/UDP/SCTP destination port. */
> - __be16 flags; /* TCP flags.
> */
> - } tp;
> - struct {
> - u8 sha[ETH_ALEN]; /* ARP source
> hardware address. */
> - u8 tha[ETH_ALEN]; /* ARP target
> hardware address. */
> - } arp;
> - };
> + struct {
> + u8 sha[ETH_ALEN]; /* ARP source
> hardware address. */
> + u8 tha[ETH_ALEN]; /* ARP target
> hardware address. */
> + } arp;
> } ipv4;
> struct {
> struct {
> @@ -111,11 +109,6 @@ struct sw_flow_key {
> } addr;
> __be32 label; /* IPv6 flow label. */
> struct {
> - __be16 src; /* TCP/UDP/SCTP
> source port. */
> - __be16 dst; /* TCP/UDP/SCTP
> destination port. */
> - __be16 flags; /* TCP flags. */
> - } tp;
> - struct {
> struct in6_addr target; /* ND target address.
> */
> u8 sll[ETH_ALEN]; /* ND source link
> layer address. */
> u8 tll[ETH_ALEN]; /* ND target link
> layer address. */
This change increases range of key for mega flow matching on IP
address but not beyond. But I think such flows are less likely.
> diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
> index 9d13b7a..c4f16b4 100644
> --- a/datapath/flow_netlink.c
> +++ b/datapath/flow_netlink.c
> @@ -204,11 +204,11 @@ static bool match_validate(const struct sw_flow_match
> *match,
> if (match->mask && (match->mask->key.ip.proto
> == 0xff))
> mask_allowed |= 1ULL <<
> OVS_KEY_ATTR_ICMPV6;
>
> - if (match->key->ipv6.tp.src ==
> + if (match->key->tp.src ==
>
> htons(NDISC_NEIGHBOUR_SOLICITATION) ||
> - match->key->ipv6.tp.src ==
> htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
> + match->key->tp.src ==
> htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
> key_expected |= 1ULL <<
> OVS_KEY_ATTR_ND;
> - if (match->mask &&
> (match->mask->key.ipv6.tp.src == htons(0xffff)))
> + if (match->mask &&
> (match->mask->key.tp.src == htons(0xffff)))
> mask_allowed |= 1ULL <<
> OVS_KEY_ATTR_ND;
> }
> }
> @@ -630,27 +630,18 @@ static int ovs_key_from_nlattrs(struct sw_flow_match
> *match, u64 attrs,
> const struct ovs_key_tcp *tcp_key;
>
> tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
> - if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
> - SW_FLOW_KEY_PUT(match, ipv4.tp.src,
> - tcp_key->tcp_src, is_mask);
> - SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
> - tcp_key->tcp_dst, is_mask);
> - } else {
> - SW_FLOW_KEY_PUT(match, ipv6.tp.src,
> - tcp_key->tcp_src, is_mask);
> - SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
> - tcp_key->tcp_dst, is_mask);
> - }
> + SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
> + SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
> attrs &= ~(1ULL << OVS_KEY_ATTR_TCP);
> }
>
> if (attrs & (1ULL << OVS_KEY_ATTR_TCP_FLAGS)) {
> if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
> - SW_FLOW_KEY_PUT(match, ipv4.tp.flags,
> + SW_FLOW_KEY_PUT(match, tp.flags,
>
> nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
> is_mask);
> } else {
> - SW_FLOW_KEY_PUT(match, ipv6.tp.flags,
> + SW_FLOW_KEY_PUT(match, tp.flags,
>
> nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
> is_mask);
> }
> @@ -661,17 +652,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match
> *match, u64 attrs,
> const struct ovs_key_udp *udp_key;
>
> udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
> - if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
> - SW_FLOW_KEY_PUT(match, ipv4.tp.src,
> - udp_key->udp_src, is_mask);
> - SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
> - udp_key->udp_dst, is_mask);
> - } else {
> - SW_FLOW_KEY_PUT(match, ipv6.tp.src,
> - udp_key->udp_src, is_mask);
> - SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
> - udp_key->udp_dst, is_mask);
> - }
> + SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
> + SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
> attrs &= ~(1ULL << OVS_KEY_ATTR_UDP);
> }
>
> @@ -679,17 +661,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match
> *match, u64 attrs,
> const struct ovs_key_sctp *sctp_key;
>
> sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
> - if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
> - SW_FLOW_KEY_PUT(match, ipv4.tp.src,
> - sctp_key->sctp_src, is_mask);
> - SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
> - sctp_key->sctp_dst, is_mask);
> - } else {
> - SW_FLOW_KEY_PUT(match, ipv6.tp.src,
> - sctp_key->sctp_src, is_mask);
> - SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
> - sctp_key->sctp_dst, is_mask);
> - }
> + SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
> + SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
> attrs &= ~(1ULL << OVS_KEY_ATTR_SCTP);
> }
>
> @@ -697,9 +670,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match
> *match, u64 attrs,
> const struct ovs_key_icmp *icmp_key;
>
> icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
> - SW_FLOW_KEY_PUT(match, ipv4.tp.src,
> + SW_FLOW_KEY_PUT(match, tp.src,
> htons(icmp_key->icmp_type), is_mask);
> - SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
> + SW_FLOW_KEY_PUT(match, tp.dst,
> htons(icmp_key->icmp_code), is_mask);
> attrs &= ~(1ULL << OVS_KEY_ATTR_ICMP);
> }
> @@ -708,9 +681,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match
> *match, u64 attrs,
> const struct ovs_key_icmpv6 *icmpv6_key;
>
> icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
> - SW_FLOW_KEY_PUT(match, ipv6.tp.src,
> + SW_FLOW_KEY_PUT(match, tp.src,
> htons(icmpv6_key->icmpv6_type), is_mask);
> - SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
> + SW_FLOW_KEY_PUT(match, tp.dst,
> htons(icmpv6_key->icmpv6_code), is_mask);
> attrs &= ~(1ULL << OVS_KEY_ATTR_ICMPV6);
> }
> @@ -1024,19 +997,11 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
> if (!nla)
> goto nla_put_failure;
> tcp_key = nla_data(nla);
> - if (swkey->eth.type == htons(ETH_P_IP)) {
> - tcp_key->tcp_src = output->ipv4.tp.src;
> - tcp_key->tcp_dst = output->ipv4.tp.dst;
> - if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
> - output->ipv4.tp.flags))
> - goto nla_put_failure;
> - } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
> - tcp_key->tcp_src = output->ipv6.tp.src;
> - tcp_key->tcp_dst = output->ipv6.tp.dst;
> - if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
> - output->ipv6.tp.flags))
> - goto nla_put_failure;
> - }
> + tcp_key->tcp_src = output->tp.src;
> + tcp_key->tcp_dst = output->tp.dst;
> + if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
> + output->tp.flags))
> + goto nla_put_failure;
> } else if (swkey->ip.proto == IPPROTO_UDP) {
> struct ovs_key_udp *udp_key;
>
> @@ -1044,13 +1009,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
> if (!nla)
> goto nla_put_failure;
> udp_key = nla_data(nla);
> - if (swkey->eth.type == htons(ETH_P_IP)) {
> - udp_key->udp_src = output->ipv4.tp.src;
> - udp_key->udp_dst = output->ipv4.tp.dst;
> - } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
> - udp_key->udp_src = output->ipv6.tp.src;
> - udp_key->udp_dst = output->ipv6.tp.dst;
> - }
> + udp_key->udp_src = output->tp.src;
> + udp_key->udp_dst = output->tp.dst;
> } else if (swkey->ip.proto == IPPROTO_SCTP) {
> struct ovs_key_sctp *sctp_key;
>
> @@ -1058,13 +1018,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
> if (!nla)
> goto nla_put_failure;
> sctp_key = nla_data(nla);
> - if (swkey->eth.type == htons(ETH_P_IP)) {
> - sctp_key->sctp_src = output->ipv4.tp.src;
> - sctp_key->sctp_dst = output->ipv4.tp.dst;
> - } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
> - sctp_key->sctp_src = output->ipv6.tp.src;
> - sctp_key->sctp_dst = output->ipv6.tp.dst;
> - }
> + sctp_key->sctp_src = output->tp.src;
> + sctp_key->sctp_dst = output->tp.dst;
> } else if (swkey->eth.type == htons(ETH_P_IP) &&
> swkey->ip.proto == IPPROTO_ICMP) {
> struct ovs_key_icmp *icmp_key;
> @@ -1073,8 +1028,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
> if (!nla)
> goto nla_put_failure;
> icmp_key = nla_data(nla);
> - icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
> - icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
> + icmp_key->icmp_type = ntohs(output->tp.src);
> + icmp_key->icmp_code = ntohs(output->tp.dst);
> } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
> swkey->ip.proto == IPPROTO_ICMPV6) {
> struct ovs_key_icmpv6 *icmpv6_key;
> @@ -1084,8 +1039,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
> if (!nla)
> goto nla_put_failure;
> icmpv6_key = nla_data(nla);
> - icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
> - icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
> + icmpv6_key->icmpv6_type = ntohs(output->tp.src);
> + icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
>
> if (icmpv6_key->icmpv6_type ==
> NDISC_NEIGHBOUR_SOLICITATION ||
> icmpv6_key->icmpv6_type ==
> NDISC_NEIGHBOUR_ADVERTISEMENT) {
> @@ -1271,13 +1226,10 @@ static int validate_and_copy_sample(const struct
> nlattr *attr,
>
> static int validate_tp_port(const struct sw_flow_key *flow_key)
> {
> - if (flow_key->eth.type == htons(ETH_P_IP)) {
> - if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
> - return 0;
> - } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
> - if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
> - return 0;
> - }
> + if ((flow_key->eth.type == htons(ETH_P_IP) ||
> + flow_key->eth.type == htons(ETH_P_IPV6)) &&
> + (flow_key->tp.src || flow_key->tp.dst))
> + return 0;
>
> return -EINVAL;
> }
looks good.
Acked-by: Pravin B Shelar <[email protected]>
> --
> 1.7.10.4
>
> _______________________________________________
> dev mailing list
> [email protected]
> http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev