Masked set actions allow more megaflow wildcarding. All other key types than the tunnel key that can be set, can now be set with a mask.
It is not clear wether masked set is useful for skb_priority. However, we already use the LSB of pkt_mark for IPSec in tunnels, so it might be useful to be able to set individual bits on pkt_mark. Signed-off-by: Jarno Rajahalme <jrajaha...@nicira.com> --- datapath/actions.c | 218 ++++++++++++++++++++++++++++++++----------- datapath/flow_netlink.c | 51 ++++++++-- include/linux/openvswitch.h | 12 ++- 3 files changed, 218 insertions(+), 63 deletions(-) diff --git a/datapath/actions.c b/datapath/actions.c index 0b66e7c..435b7c6 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -125,8 +125,21 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla return 0; } +/* 'src' is already properly masked. */ +static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_) +{ + u16 *dst = (u16 *)dst_; + const u16 *src = (const u16 *)src_; + const u16 *mask = (const u16 *)mask_; + + dst[0] = src[0] | (dst[0] & ~mask[0]); + dst[1] = src[1] | (dst[1] & ~mask[1]); + dst[2] = src[2] | (dst[2] & ~mask[2]); +} + static int set_eth_addr(struct sk_buff *skb, - const struct ovs_key_ethernet *eth_key) + const struct ovs_key_ethernet *key, + const struct ovs_key_ethernet *mask) { int err; err = make_writable(skb, ETH_HLEN); @@ -135,8 +148,15 @@ static int set_eth_addr(struct sk_buff *skb, skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); - ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src); - ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst); + if (mask) { + ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src, + mask->eth_src); + ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst, + mask->eth_dst); + } else { + ether_addr_copy(eth_hdr(skb)->h_source, key->eth_src); + ether_addr_copy(eth_hdr(skb)->h_dest, key->eth_dst); + } ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); @@ -144,7 +164,7 @@ static int set_eth_addr(struct sk_buff *skb, } static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh, - __be32 *addr, __be32 new_addr) + __be32 *addr, __be32 new_addr) { int transport_len = skb->len - skb_transport_offset(skb); @@ -204,14 +224,19 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, memcpy(addr, new_addr, sizeof(__be32[4])); } -static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc) +static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc, u8 mask) { + /* Keep the unmasked bits. */ + tc |= (nh->priority << 4 | (nh->flow_lbl[0] & 0xF0) >> 4) & ~mask; nh->priority = tc >> 4; nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4); } -static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl) +static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask) { + /* Keep the unmasked bits. */ + fl |= ((u32)(nh->flow_lbl[0] & 0x0F) << 16 + | nh->flow_lbl[1] << 8 | nh->flow_lbl[2]) & ~mask; nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16; nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8; nh->flow_lbl[2] = fl & 0x000000FF; @@ -223,10 +248,13 @@ static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl) nh->ttl = new_ttl; } -static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key) +static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *key, + const struct ovs_key_ipv4 *mask) { struct iphdr *nh; int err; + __be32 saddr, daddr; + u8 tos, ttl; err = make_writable(skb, skb_network_offset(skb) + sizeof(struct iphdr)); @@ -235,27 +263,54 @@ static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key) nh = ip_hdr(skb); - if (ipv4_key->ipv4_src != nh->saddr) - set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src); + saddr = key->ipv4_src; + daddr = key->ipv4_dst; + tos = key->ipv4_tos; + ttl = key->ipv4_ttl; - if (ipv4_key->ipv4_dst != nh->daddr) - set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst); + if (mask) { + saddr |= nh->saddr & ~mask->ipv4_src; + daddr |= nh->daddr & ~mask->ipv4_dst; + tos |= nh->tos & ~mask->ipv4_tos; + ttl |= nh->ttl & ~mask->ipv4_ttl; + } + + if (saddr != nh->saddr) + set_ip_addr(skb, nh, &nh->saddr, saddr); + + if (daddr != nh->daddr) + set_ip_addr(skb, nh, &nh->daddr, daddr); - if (ipv4_key->ipv4_tos != nh->tos) - ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos); + if (tos != nh->tos) + ipv4_change_dsfield(nh, 0, tos); - if (ipv4_key->ipv4_ttl != nh->ttl) - set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl); + if (ttl != nh->ttl) + set_ip_ttl(skb, nh, ttl); return 0; } -static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key) +static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4], + const __be32 mask[4], __be32 masked[4]) +{ + masked[0] = addr[0] | (old[0] & ~mask[0]); + masked[1] = addr[1] | (old[1] & ~mask[1]); + masked[2] = addr[2] | (old[2] & ~mask[2]); + masked[3] = addr[3] | (old[3] & ~mask[3]); +} + +static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *key, + const struct ovs_key_ipv6 *mask) { struct ipv6hdr *nh; int err; __be32 *saddr; __be32 *daddr; + __be32 smasked[4], dmasked[4]; + const __be32 *key_saddr, *key_daddr; + u8 tc_mask = 0xff; + u32 fl_mask = UINT_MAX; + u8 hl_mask = 0xff; err = make_writable(skb, skb_network_offset(skb) + sizeof(struct ipv6hdr)); @@ -265,12 +320,23 @@ static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key) nh = ipv6_hdr(skb); saddr = (__be32 *)&nh->saddr; daddr = (__be32 *)&nh->daddr; + key_saddr = key->ipv6_src; + key_daddr = key->ipv6_dst; + + if (mask) { + mask_ipv6_addr(saddr, key_saddr, mask->ipv6_src, smasked); + key_saddr = smasked; + mask_ipv6_addr(daddr, key_daddr, mask->ipv6_dst, dmasked); + key_daddr = dmasked; + tc_mask = mask->ipv6_tclass; + fl_mask = ntohl(mask->ipv6_label); + hl_mask = mask->ipv6_hlimit; + } - if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) - set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr, - ipv6_key->ipv6_src, true); + if (memcmp(key_saddr, saddr, sizeof(key->ipv6_src))) + set_ipv6_addr(skb, key->ipv6_proto, saddr, key_saddr, true); - if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) { + if (memcmp(key_daddr, daddr, sizeof(key->ipv6_dst))) { unsigned int offset = 0; int flags = OVS_IP6T_FH_F_SKIP_RH; bool recalc_csum = true; @@ -280,13 +346,13 @@ static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key) NEXTHDR_ROUTING, NULL, &flags) != NEXTHDR_ROUTING; - set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr, - ipv6_key->ipv6_dst, recalc_csum); + set_ipv6_addr(skb, key->ipv6_proto, daddr, key_daddr, + recalc_csum); } - set_ipv6_tc(nh, ipv6_key->ipv6_tclass); - set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label)); - nh->hop_limit = ipv6_key->ipv6_hlimit; + set_ipv6_tc(nh, key->ipv6_tclass, tc_mask); + set_ipv6_fl(nh, ntohl(key->ipv6_label), fl_mask); + nh->hop_limit = key->ipv6_hlimit | (nh->hop_limit & ~hl_mask); return 0; } @@ -315,10 +381,12 @@ static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port) } } -static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key) +static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *key, + const struct ovs_key_udp *mask) { struct udphdr *uh; int err; + __be16 src, dst; err = make_writable(skb, skb_transport_offset(skb) + sizeof(struct udphdr)); @@ -326,19 +394,29 @@ static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key) return err; uh = udp_hdr(skb); - if (udp_port_key->udp_src != uh->source) - set_udp_port(skb, &uh->source, udp_port_key->udp_src); - if (udp_port_key->udp_dst != uh->dest) - set_udp_port(skb, &uh->dest, udp_port_key->udp_dst); + src = key->udp_src; + dst = key->udp_dst; + if (mask) { + src |= uh->source & ~mask->udp_src; + dst |= uh->dest & ~mask->udp_dst; + } + + if (src != uh->source) + set_udp_port(skb, &uh->source, src); + + if (dst != uh->dest) + set_udp_port(skb, &uh->dest, dst); return 0; } -static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key) +static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *key, + const struct ovs_key_tcp *mask) { struct tcphdr *th; int err; + __be16 src, dst; err = make_writable(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)); @@ -346,36 +424,53 @@ static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key) return err; th = tcp_hdr(skb); - if (tcp_port_key->tcp_src != th->source) - set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check); - if (tcp_port_key->tcp_dst != th->dest) - set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check); + src = key->tcp_src; + dst = key->tcp_dst; + if (mask) { + src |= th->source & ~mask->tcp_src; + dst |= th->dest & ~mask->tcp_dst; + } + + if (src != th->source) + set_tp_port(skb, &th->source, src, &th->check); + + if (dst != th->dest) + set_tp_port(skb, &th->dest, dst, &th->check); return 0; } static int set_sctp(struct sk_buff *skb, - const struct ovs_key_sctp *sctp_port_key) + const struct ovs_key_sctp *key, + const struct ovs_key_sctp *mask) { struct sctphdr *sh; int err; unsigned int sctphoff = skb_transport_offset(skb); + __be16 src, dst; err = make_writable(skb, sctphoff + sizeof(struct sctphdr)); if (unlikely(err)) return err; sh = sctp_hdr(skb); - if (sctp_port_key->sctp_src != sh->source || - sctp_port_key->sctp_dst != sh->dest) { + + src = key->sctp_src; + dst = key->sctp_dst; + if (mask) { + src |= sh->source & ~mask->sctp_src; + dst |= sh->dest & ~mask->sctp_dst; + } + + if (src != sh->source || dst != sh->dest) { __le32 old_correct_csum, new_csum, old_csum; old_csum = sh->checksum; old_correct_csum = sctp_compute_cksum(skb, sctphoff); - sh->source = sctp_port_key->sctp_src; - sh->dest = sctp_port_key->sctp_dst; + sh->source = src; + sh->dest = dst; new_csum = sctp_compute_cksum(skb, sctphoff); @@ -460,46 +555,65 @@ static int sample(struct datapath *dp, struct sk_buff *skb, nla_len(acts_list), true); } -static int execute_set_action(struct sk_buff *skb, - const struct nlattr *nested_attr) +#define get_mask(a, type) ((const type *)nla_data(a) + 1) + +static int execute_set_action(struct sk_buff *skb, const struct nlattr *a) { int err = 0; + const void *mask; - switch (nla_type(nested_attr)) { + switch (nla_type(a)) { case OVS_KEY_ATTR_PRIORITY: - skb->priority = nla_get_u32(nested_attr); + skb->priority = (nla_len(a) == 2 * sizeof(u32)) + ? nla_get_u32(a) | (skb->priority & ~*get_mask(a, u32)) + : nla_get_u32(a); break; case OVS_KEY_ATTR_SKB_MARK: - skb->mark = nla_get_u32(nested_attr); + skb->mark = (nla_len(a) == 2 * sizeof(u32)) + ? nla_get_u32(a) | (skb->mark & ~*get_mask(a, u32)) + : nla_get_u32(a); break; case OVS_KEY_ATTR_IPV4_TUNNEL: - OVS_CB(skb)->tun_key = nla_data(nested_attr); + /* Masked data not supported for tunnel. */ + OVS_CB(skb)->tun_key = nla_data(a); break; case OVS_KEY_ATTR_ETHERNET: - err = set_eth_addr(skb, nla_data(nested_attr)); + mask = (nla_len(a) == 2 * sizeof(struct ovs_key_ethernet)) + ? get_mask(a, struct ovs_key_ethernet) : NULL; + err = set_eth_addr(skb, nla_data(a), mask); break; case OVS_KEY_ATTR_IPV4: - err = set_ipv4(skb, nla_data(nested_attr)); + mask = (nla_len(a) == 2 * sizeof(struct ovs_key_ipv4)) + ? get_mask(a, struct ovs_key_ipv4) : NULL; + err = set_ipv4(skb, nla_data(a), mask); break; case OVS_KEY_ATTR_IPV6: - err = set_ipv6(skb, nla_data(nested_attr)); + mask = (nla_len(a) == 2 * sizeof(struct ovs_key_ipv6)) + ? get_mask(a, struct ovs_key_ipv6) : NULL; + err = set_ipv6(skb, nla_data(a), mask); break; case OVS_KEY_ATTR_TCP: - err = set_tcp(skb, nla_data(nested_attr)); + mask = (nla_len(a) == 2 * sizeof(struct ovs_key_tcp)) + ? get_mask(a, struct ovs_key_tcp) : NULL; + err = set_tcp(skb, nla_data(a), mask); break; case OVS_KEY_ATTR_UDP: - err = set_udp(skb, nla_data(nested_attr)); + mask = (nla_len(a) == 2 * sizeof(struct ovs_key_udp)) + ? get_mask(a, struct ovs_key_udp) : NULL; + err = set_udp(skb, nla_data(a), mask); break; case OVS_KEY_ATTR_SCTP: - err = set_sctp(skb, nla_data(nested_attr)); + mask = (nla_len(a) == 2 * sizeof(struct ovs_key_sctp)) + ? get_mask(a, struct ovs_key_sctp) : NULL; + err = set_sctp(skb, nla_data(a), mask); break; } diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index 5c32cd0..477cab6 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -1274,6 +1274,19 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, return err; } +/* Return false if there are any non-masked bits set. + * Mask follows data immediately, before any netlink padding. */ +static bool validate_masked(u8 *data, int bytes) +{ + int len = bytes / 2; + u8 *mask = data + len; + + while (len--) + if (*data++ & ~*mask++) + return false; + return true; +} + static int validate_set(const struct nlattr *a, const struct sw_flow_key *flow_key, struct sw_flow_actions **sfa, @@ -1281,13 +1294,17 @@ static int validate_set(const struct nlattr *a, { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); + bool have_mask; /* There can be only one key in a action */ if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) return -EINVAL; + have_mask = (ovs_key_lens[key_type] * 2 == nla_len(ovs_key)); + if (key_type > OVS_KEY_ATTR_MAX || (ovs_key_lens[key_type] != nla_len(ovs_key) && + (!have_mask || !validate_masked(nla_data(ovs_key), nla_len(ovs_key))) && ovs_key_lens[key_type] != -1)) return -EINVAL; @@ -1316,12 +1333,21 @@ static int validate_set(const struct nlattr *a, return -EINVAL; ipv4_key = nla_data(ovs_key); - if (ipv4_key->ipv4_proto != flow_key->ip.proto) - return -EINVAL; + if (have_mask) { + const struct ovs_key_ipv4 *mask = ipv4_key + 1; - if (ipv4_key->ipv4_frag != flow_key->ip.frag) - return -EINVAL; + if (mask->ipv4_proto) /* proto is not writeable. */ + return -EINVAL; + if (mask->ipv4_frag) /* frag is not writeable. */ + return -EINVAL; + } else { + if (ipv4_key->ipv4_proto != flow_key->ip.proto) + return -EINVAL; + + if (ipv4_key->ipv4_frag != flow_key->ip.frag) + return -EINVAL; + } break; case OVS_KEY_ATTR_IPV6: @@ -1332,12 +1358,21 @@ static int validate_set(const struct nlattr *a, return -EINVAL; ipv6_key = nla_data(ovs_key); - if (ipv6_key->ipv6_proto != flow_key->ip.proto) - return -EINVAL; + if (have_mask) { + const struct ovs_key_ipv6 *mask = ipv6_key + 1; - if (ipv6_key->ipv6_frag != flow_key->ip.frag) - return -EINVAL; + if (mask->ipv6_proto) /* proto is not writeable. */ + return -EINVAL; + if (mask->ipv6_frag) /* frag is not writeable. */ + return -EINVAL; + } else { + if (ipv6_key->ipv6_proto != flow_key->ip.proto) + return -EINVAL; + + if (ipv6_key->ipv6_frag != flow_key->ip.frag) + return -EINVAL; + } if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) return -EINVAL; diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index d7ad058..2212978 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -572,9 +572,15 @@ struct ovs_action_recirc { * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet. * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in * the nested %OVS_SAMPLE_ATTR_* attributes. - * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The - * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its - * value. + * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. A + * nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its + * value. The value can be immediately followed by a mask that specifies what + * bits of the header field are modified. The presence of the mask is + * indicated by the length field of the attribute header having value exactly + * double of the attribute value without a mask. That is, the value and the + * mask are included in the same netlink attribute. No mask causes all the + * bits to be set. Masking is not supported for the %OVS_KEY_ATTR_TUNNEL + * attribute. * @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the * top of the packets MPLS label stack. Set the ethertype of the * encapsulating frame to either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC to -- 1.7.10.4 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev