On Fri, Feb 21, 2014 at 11:41 AM, Jarno Rajahalme <jrajaha...@nicira.com> wrote: > Minimize padding in sw_flow_key and move 'tp' top the main struct. > These changes simplify code when accessing the transport port numbers > and the tcp flags, and makes the sw_flow_key 8 bytes smaller on 64-bit > systems (128->120 bytes). These changes also make the keys for IPv4 > packets to fit in one cache line. > > There is a valid concern for safety of packing the struct > ovs_key_ipv4_tunnel, as it would be possible to take the address of > the tun_id member as a __be64 * which could result in unaligned access > in some systems. However: > > - sw_flow_key itself is 64-bit aligned, so the tun_id within is always > 64-bit aligned. > - We never make arrays of ovs_key_ipv4_tunnel (which would force every > second tun_key to be misaligned). > - We never take the address of the tun_id in to a __be64 *. > - Whereever we use struct ovs_key_ipv4_tunnel outside the sw_flow_key, > it is in stack (on tunnel input functions), where compiler has full > control of the alignment. > > Signed-off-by: Jarno Rajahalme <jrajaha...@nicira.com> > --- > datapath/datapath.c | 6 +++ > datapath/flow.c | 44 ++++++++----------- > datapath/flow.h | 29 +++++------- > datapath/flow_netlink.c | 112 > ++++++++++++++--------------------------------- > 4 files changed, 68 insertions(+), 123 deletions(-) > > diff --git a/datapath/datapath.c b/datapath/datapath.c > index 130300f..8a2c0af 100644 > --- a/datapath/datapath.c > +++ b/datapath/datapath.c > @@ -1929,6 +1929,12 @@ static int __init dp_init(void) > pr_info("Open vSwitch switching datapath %s, built "__DATE__" > "__TIME__"\n", > VERSION); > > + pr_info("Datapath sw_flow_key size: %ld bytes. ip.frag at %ld, > tp.flags at %ld, ipv4.addr at %ld\n", > + sizeof(struct sw_flow_key), > + offsetof(struct sw_flow_key, ip.frag), > + offsetof(struct sw_flow_key, tp.flags), > + offsetof(struct sw_flow_key, ipv4.addr)); > + > err = ovs_flow_init(); > if (err) > goto error; > diff --git a/datapath/flow.c b/datapath/flow.c > index 3cc4cdf..4e37e9b 100644 > --- a/datapath/flow.c > +++ b/datapath/flow.c > @@ -65,17 +65,11 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies) > void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) > { > struct flow_stats *stats; > - __be16 tcp_flags = 0; > + __be16 tcp_flags = flow->key.tp.flags; > int node = numa_node_id(); > > stats = rcu_dereference(flow->stats[node]); > > - if (likely(flow->key.ip.proto == IPPROTO_TCP)) { > - if (likely(flow->key.eth.type == htons(ETH_P_IP))) > - tcp_flags = flow->key.ipv4.tp.flags; > - else if (likely(flow->key.eth.type == htons(ETH_P_IPV6))) > - tcp_flags = flow->key.ipv6.tp.flags; > - } nice :)
> /* Check if already have node-specific stats. */ > if (likely(stats)) { > spin_lock(&stats->lock); > @@ -358,8 +352,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct > sw_flow_key *key, > /* The ICMPv6 type and code fields use the 16-bit transport port > * fields, so we need to store them in 16-bit network byte order. > */ > - key->ipv6.tp.src = htons(icmp->icmp6_type); > - key->ipv6.tp.dst = htons(icmp->icmp6_code); > + key->tp.src = htons(icmp->icmp6_type); > + key->tp.dst = htons(icmp->icmp6_code); > > if (icmp->icmp6_code == 0 && > (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || > @@ -520,21 +514,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, > struct sw_flow_key *key) > if (key->ip.proto == IPPROTO_TCP) { > if (tcphdr_ok(skb)) { > struct tcphdr *tcp = tcp_hdr(skb); > - key->ipv4.tp.src = tcp->source; > - key->ipv4.tp.dst = tcp->dest; > - key->ipv4.tp.flags = TCP_FLAGS_BE16(tcp); > + key->tp.src = tcp->source; > + key->tp.dst = tcp->dest; > + key->tp.flags = TCP_FLAGS_BE16(tcp); > } Do u think we can directly copy 32 bits from packet here ? > } else if (key->ip.proto == IPPROTO_UDP) { > if (udphdr_ok(skb)) { > struct udphdr *udp = udp_hdr(skb); > - key->ipv4.tp.src = udp->source; > - key->ipv4.tp.dst = udp->dest; > + key->tp.src = udp->source; > + key->tp.dst = udp->dest; > } > } else if (key->ip.proto == IPPROTO_SCTP) { > if (sctphdr_ok(skb)) { > struct sctphdr *sctp = sctp_hdr(skb); > - key->ipv4.tp.src = sctp->source; > - key->ipv4.tp.dst = sctp->dest; > + key->tp.src = sctp->source; > + key->tp.dst = sctp->dest; > } > } else if (key->ip.proto == IPPROTO_ICMP) { > if (icmphdr_ok(skb)) { > @@ -542,8 +536,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, > struct sw_flow_key *key) > /* The ICMP type and code fields use the > 16-bit > * transport port fields, so we need to store > * them in 16-bit network byte order. */ > - key->ipv4.tp.src = htons(icmp->type); > - key->ipv4.tp.dst = htons(icmp->code); > + key->tp.src = htons(icmp->type); > + key->tp.dst = htons(icmp->code); > } > } > > @@ -589,21 +583,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, > struct sw_flow_key *key) > if (key->ip.proto == NEXTHDR_TCP) { > if (tcphdr_ok(skb)) { > struct tcphdr *tcp = tcp_hdr(skb); > - key->ipv6.tp.src = tcp->source; > - key->ipv6.tp.dst = tcp->dest; > - key->ipv6.tp.flags = TCP_FLAGS_BE16(tcp); > + key->tp.src = tcp->source; > + key->tp.dst = tcp->dest; > + key->tp.flags = TCP_FLAGS_BE16(tcp); > } > } else if (key->ip.proto == NEXTHDR_UDP) { > if (udphdr_ok(skb)) { > struct udphdr *udp = udp_hdr(skb); > - key->ipv6.tp.src = udp->source; > - key->ipv6.tp.dst = udp->dest; > + key->tp.src = udp->source; > + key->tp.dst = udp->dest; > } > } else if (key->ip.proto == NEXTHDR_SCTP) { > if (sctphdr_ok(skb)) { > struct sctphdr *sctp = sctp_hdr(skb); > - key->ipv6.tp.src = sctp->source; > - key->ipv6.tp.dst = sctp->dest; > + key->tp.src = sctp->source; > + key->tp.dst = sctp->dest; > } > } else if (key->ip.proto == NEXTHDR_ICMP) { > if (icmp6hdr_ok(skb)) { > diff --git a/datapath/flow.h b/datapath/flow.h > index 270a324..5587577 100644 > --- a/datapath/flow.h > +++ b/datapath/flow.h > @@ -49,7 +49,7 @@ struct ovs_key_ipv4_tunnel { > __be16 tun_flags; > u8 ipv4_tos; > u8 ipv4_ttl; > -}; > +} __packed __aligned(4); /* Minimize padding. */ > > static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, > const struct iphdr *iph, __be64 > tun_id, > @@ -73,7 +73,7 @@ struct sw_flow_key { > u32 priority; /* Packet QoS priority. */ > u32 skb_mark; /* SKB mark. */ > u16 in_port; /* Input switch port (or > DP_MAX_PORTS). */ > - } phy; > + } __packed phy; /* Safe when right after 'tun_key'. */ > struct { > u8 src[ETH_ALEN]; /* Ethernet source address. */ > u8 dst[ETH_ALEN]; /* Ethernet destination address. */ > @@ -86,23 +86,21 @@ struct sw_flow_key { > u8 ttl; /* IP TTL/hop limit. */ > u8 frag; /* One of OVS_FRAG_TYPE_*. */ > } ip; > + struct { > + __be16 src; /* TCP/UDP/SCTP source port. */ > + __be16 dst; /* TCP/UDP/SCTP destination port. */ > + __be16 flags; /* TCP flags. */ > + } tp; > union { > struct { > struct { > __be32 src; /* IP source address. */ > __be32 dst; /* IP destination address. */ > } addr; > - union { > - struct { > - __be16 src; /* > TCP/UDP/SCTP source port. */ > - __be16 dst; /* > TCP/UDP/SCTP destination port. */ > - __be16 flags; /* TCP flags. > */ > - } tp; > - struct { > - u8 sha[ETH_ALEN]; /* ARP source > hardware address. */ > - u8 tha[ETH_ALEN]; /* ARP target > hardware address. */ > - } arp; > - }; > + struct { > + u8 sha[ETH_ALEN]; /* ARP source > hardware address. */ > + u8 tha[ETH_ALEN]; /* ARP target > hardware address. */ > + } arp; > } ipv4; > struct { > struct { > @@ -111,11 +109,6 @@ struct sw_flow_key { > } addr; > __be32 label; /* IPv6 flow label. */ > struct { > - __be16 src; /* TCP/UDP/SCTP > source port. */ > - __be16 dst; /* TCP/UDP/SCTP > destination port. */ > - __be16 flags; /* TCP flags. */ > - } tp; > - struct { > struct in6_addr target; /* ND target address. > */ > u8 sll[ETH_ALEN]; /* ND source link > layer address. */ > u8 tll[ETH_ALEN]; /* ND target link > layer address. */ This change increases range of key for mega flow matching on IP address but not beyond. But I think such flows are less likely. > diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c > index 9d13b7a..c4f16b4 100644 > --- a/datapath/flow_netlink.c > +++ b/datapath/flow_netlink.c > @@ -204,11 +204,11 @@ static bool match_validate(const struct sw_flow_match > *match, > if (match->mask && (match->mask->key.ip.proto > == 0xff)) > mask_allowed |= 1ULL << > OVS_KEY_ATTR_ICMPV6; > > - if (match->key->ipv6.tp.src == > + if (match->key->tp.src == > > htons(NDISC_NEIGHBOUR_SOLICITATION) || > - match->key->ipv6.tp.src == > htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { > + match->key->tp.src == > htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { > key_expected |= 1ULL << > OVS_KEY_ATTR_ND; > - if (match->mask && > (match->mask->key.ipv6.tp.src == htons(0xffff))) > + if (match->mask && > (match->mask->key.tp.src == htons(0xffff))) > mask_allowed |= 1ULL << > OVS_KEY_ATTR_ND; > } > } > @@ -630,27 +630,18 @@ static int ovs_key_from_nlattrs(struct sw_flow_match > *match, u64 attrs, > const struct ovs_key_tcp *tcp_key; > > tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); > - if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) { > - SW_FLOW_KEY_PUT(match, ipv4.tp.src, > - tcp_key->tcp_src, is_mask); > - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, > - tcp_key->tcp_dst, is_mask); > - } else { > - SW_FLOW_KEY_PUT(match, ipv6.tp.src, > - tcp_key->tcp_src, is_mask); > - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, > - tcp_key->tcp_dst, is_mask); > - } > + SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask); > + SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask); > attrs &= ~(1ULL << OVS_KEY_ATTR_TCP); > } > > if (attrs & (1ULL << OVS_KEY_ATTR_TCP_FLAGS)) { > if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) { > - SW_FLOW_KEY_PUT(match, ipv4.tp.flags, > + SW_FLOW_KEY_PUT(match, tp.flags, > > nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), > is_mask); > } else { > - SW_FLOW_KEY_PUT(match, ipv6.tp.flags, > + SW_FLOW_KEY_PUT(match, tp.flags, > > nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), > is_mask); > } > @@ -661,17 +652,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match > *match, u64 attrs, > const struct ovs_key_udp *udp_key; > > udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); > - if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) { > - SW_FLOW_KEY_PUT(match, ipv4.tp.src, > - udp_key->udp_src, is_mask); > - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, > - udp_key->udp_dst, is_mask); > - } else { > - SW_FLOW_KEY_PUT(match, ipv6.tp.src, > - udp_key->udp_src, is_mask); > - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, > - udp_key->udp_dst, is_mask); > - } > + SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask); > + SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask); > attrs &= ~(1ULL << OVS_KEY_ATTR_UDP); > } > > @@ -679,17 +661,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match > *match, u64 attrs, > const struct ovs_key_sctp *sctp_key; > > sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); > - if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) { > - SW_FLOW_KEY_PUT(match, ipv4.tp.src, > - sctp_key->sctp_src, is_mask); > - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, > - sctp_key->sctp_dst, is_mask); > - } else { > - SW_FLOW_KEY_PUT(match, ipv6.tp.src, > - sctp_key->sctp_src, is_mask); > - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, > - sctp_key->sctp_dst, is_mask); > - } > + SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask); > + SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask); > attrs &= ~(1ULL << OVS_KEY_ATTR_SCTP); > } > > @@ -697,9 +670,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match > *match, u64 attrs, > const struct ovs_key_icmp *icmp_key; > > icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); > - SW_FLOW_KEY_PUT(match, ipv4.tp.src, > + SW_FLOW_KEY_PUT(match, tp.src, > htons(icmp_key->icmp_type), is_mask); > - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, > + SW_FLOW_KEY_PUT(match, tp.dst, > htons(icmp_key->icmp_code), is_mask); > attrs &= ~(1ULL << OVS_KEY_ATTR_ICMP); > } > @@ -708,9 +681,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match > *match, u64 attrs, > const struct ovs_key_icmpv6 *icmpv6_key; > > icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); > - SW_FLOW_KEY_PUT(match, ipv6.tp.src, > + SW_FLOW_KEY_PUT(match, tp.src, > htons(icmpv6_key->icmpv6_type), is_mask); > - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, > + SW_FLOW_KEY_PUT(match, tp.dst, > htons(icmpv6_key->icmpv6_code), is_mask); > attrs &= ~(1ULL << OVS_KEY_ATTR_ICMPV6); > } > @@ -1024,19 +997,11 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, > if (!nla) > goto nla_put_failure; > tcp_key = nla_data(nla); > - if (swkey->eth.type == htons(ETH_P_IP)) { > - tcp_key->tcp_src = output->ipv4.tp.src; > - tcp_key->tcp_dst = output->ipv4.tp.dst; > - if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, > - output->ipv4.tp.flags)) > - goto nla_put_failure; > - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { > - tcp_key->tcp_src = output->ipv6.tp.src; > - tcp_key->tcp_dst = output->ipv6.tp.dst; > - if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, > - output->ipv6.tp.flags)) > - goto nla_put_failure; > - } > + tcp_key->tcp_src = output->tp.src; > + tcp_key->tcp_dst = output->tp.dst; > + if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, > + output->tp.flags)) > + goto nla_put_failure; > } else if (swkey->ip.proto == IPPROTO_UDP) { > struct ovs_key_udp *udp_key; > > @@ -1044,13 +1009,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, > if (!nla) > goto nla_put_failure; > udp_key = nla_data(nla); > - if (swkey->eth.type == htons(ETH_P_IP)) { > - udp_key->udp_src = output->ipv4.tp.src; > - udp_key->udp_dst = output->ipv4.tp.dst; > - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { > - udp_key->udp_src = output->ipv6.tp.src; > - udp_key->udp_dst = output->ipv6.tp.dst; > - } > + udp_key->udp_src = output->tp.src; > + udp_key->udp_dst = output->tp.dst; > } else if (swkey->ip.proto == IPPROTO_SCTP) { > struct ovs_key_sctp *sctp_key; > > @@ -1058,13 +1018,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, > if (!nla) > goto nla_put_failure; > sctp_key = nla_data(nla); > - if (swkey->eth.type == htons(ETH_P_IP)) { > - sctp_key->sctp_src = output->ipv4.tp.src; > - sctp_key->sctp_dst = output->ipv4.tp.dst; > - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { > - sctp_key->sctp_src = output->ipv6.tp.src; > - sctp_key->sctp_dst = output->ipv6.tp.dst; > - } > + sctp_key->sctp_src = output->tp.src; > + sctp_key->sctp_dst = output->tp.dst; > } else if (swkey->eth.type == htons(ETH_P_IP) && > swkey->ip.proto == IPPROTO_ICMP) { > struct ovs_key_icmp *icmp_key; > @@ -1073,8 +1028,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, > if (!nla) > goto nla_put_failure; > icmp_key = nla_data(nla); > - icmp_key->icmp_type = ntohs(output->ipv4.tp.src); > - icmp_key->icmp_code = ntohs(output->ipv4.tp.dst); > + icmp_key->icmp_type = ntohs(output->tp.src); > + icmp_key->icmp_code = ntohs(output->tp.dst); > } else if (swkey->eth.type == htons(ETH_P_IPV6) && > swkey->ip.proto == IPPROTO_ICMPV6) { > struct ovs_key_icmpv6 *icmpv6_key; > @@ -1084,8 +1039,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, > if (!nla) > goto nla_put_failure; > icmpv6_key = nla_data(nla); > - icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src); > - icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst); > + icmpv6_key->icmpv6_type = ntohs(output->tp.src); > + icmpv6_key->icmpv6_code = ntohs(output->tp.dst); > > if (icmpv6_key->icmpv6_type == > NDISC_NEIGHBOUR_SOLICITATION || > icmpv6_key->icmpv6_type == > NDISC_NEIGHBOUR_ADVERTISEMENT) { > @@ -1271,13 +1226,10 @@ static int validate_and_copy_sample(const struct > nlattr *attr, > > static int validate_tp_port(const struct sw_flow_key *flow_key) > { > - if (flow_key->eth.type == htons(ETH_P_IP)) { > - if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) > - return 0; > - } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { > - if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) > - return 0; > - } > + if ((flow_key->eth.type == htons(ETH_P_IP) || > + flow_key->eth.type == htons(ETH_P_IPV6)) && > + (flow_key->tp.src || flow_key->tp.dst)) > + return 0; > > return -EINVAL; > } looks good. Acked-by: Pravin B Shelar <pshe...@nicira.com> > -- > 1.7.10.4 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev