Jesse, I’ll send a new patch series soon, but here are the fixes to the comments below for your viewing pleasure:
Jarno diff --git a/datapath/actions.c b/datapath/actions.c index 0cb7c9e..243b672 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -510,7 +510,7 @@ static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *key, static bool is_ipv6_mask_nonzero(const __be32 addr[4]) { - return addr[0] | addr[1] | addr[2] | addr[3]; + return !!(addr[0] | addr[1] | addr[2] | addr[3]); } static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *key, @@ -565,7 +565,7 @@ static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *key, if (mask->ipv6_tclass) { ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass); flow_key_set_ip_tos(skb, ipv6_get_dsfield(nh)); - } + } if (mask->ipv6_label) { set_ipv6_fl(nh, ntohl(key->ipv6_label), ntohl(mask->ipv6_label)); @@ -599,6 +599,7 @@ static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *key, return err; uh = udp_hdr(skb); + /* Either of the masks is non-zero, so do not bother checking them. */ src = MASKED(uh->source, key->udp_src, mask->udp_src); dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst); @@ -824,8 +825,7 @@ static int execute_set_action(struct sk_buff *skb, const struct nlattr *a) } /* Mask is at the midpoint of the data. */ -#define get_mask(a, type) \ - ((const type *)((const char *)nla_data(a) + nla_len(a))) +#define get_mask(a, type) ((const type *)nla_data(a) + 1) static int execute_masked_set_action(struct sk_buff *skb, const struct nlattr *a) diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index faabeb2..722640a 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -325,6 +325,20 @@ static bool is_all_zero(const u8 *fp, size_t size) return true; } +static bool is_all_ones(const u8 *fp, size_t size) +{ + int i; + + if (!fp) + return false; + + for (i = 0; i < size; i++) + if (~fp[i]) + return false; + + return true; +} + static int __parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], u64 *attrsp, bool nz) @@ -1646,13 +1660,17 @@ static int validate_set(const struct nlattr *a, err = validate_and_copy_set_tun(a, sfa); if (err) return err; - return 0; + break; case OVS_KEY_ATTR_IPV4: if (eth_type != htons(ETH_P_IP)) return -EINVAL; + if (!flow_key->ip.proto) + return -EINVAL; + ipv4_key = nla_data(ovs_key); + if (masked) { const struct ovs_key_ipv4 *mask = ipv4_key + 1; @@ -1660,9 +1678,6 @@ static int validate_set(const struct nlattr *a, if (mask->ipv4_proto || mask->ipv4_frag) return -EINVAL; } else { - if (!flow_key->ip.proto) - return -EINVAL; - if (ipv4_key->ipv4_proto != flow_key->ip.proto) return -EINVAL; @@ -1675,6 +1690,9 @@ static int validate_set(const struct nlattr *a, if (eth_type != htons(ETH_P_IPV6)) return -EINVAL; + if (!flow_key->ip.proto) + return -EINVAL; + ipv6_key = nla_data(ovs_key); if (masked) { const struct ovs_key_ipv6 *mask = ipv6_key + 1; @@ -1687,9 +1705,6 @@ static int validate_set(const struct nlattr *a, if (ntohl(mask->ipv6_label) & 0xFFF00000) return -EINVAL; } else { - if (!flow_key->ip.proto) - return -EINVAL; - if (ipv6_key->ipv6_proto != flow_key->ip.proto) return -EINVAL; @@ -1734,35 +1749,36 @@ static int validate_set(const struct nlattr *a, return -EINVAL; } - /* Convert non-masked set actions to masked set actions. - * Tunnel set action returns before getting here. */ - if (!masked) { + /* Convert non-masked non-tunnel set actions to masked set actions. */ + if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) { int start, len = key_len * 2; struct nlattr *at; *skip_copy = true; - start = add_nested_action_start(sfa, - OVS_ACTION_ATTR_SET_MASKED); + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET_MASKED); if (start < 0) return start; - at = reserve_sfa_size(sfa, nla_attr_size(len)); + at = __add_action(sfa, key_type, NULL, len); if (IS_ERR(at)) return PTR_ERR(at); - at->nla_type = key_type; - at->nla_len = nla_attr_size(len); - memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */ /* Even though all-ones mask includes non-writeable fields, * which we do not allow above, we will not actually set them * when we execute the masked set action. */ memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */ - memset((unsigned char *)at + at->nla_len, 0, nla_padlen(len)); add_nested_action_end(*sfa, start); + } else if (masked) { + /* Reject fully masked masked set actions so that the above + * conversion can be unabiguously reverted when sending + * actions back to userspace. */ + if (is_all_ones(nla_data(ovs_key) + key_len, key_len)) + return -EINVAL; } + return 0; } @@ -1956,6 +1972,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, return 0; } +/* 'key' must be the masked key. */ int ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa) @@ -2043,6 +2060,27 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) return 0; } +static int masked_set_action_to_attr(const struct nlattr *a, + struct sk_buff *skb) +{ + const struct nlattr *ovs_key = nla_data(a); + size_t key_len = nla_len(ovs_key) / 2; + + if (is_all_ones(nla_data(ovs_key) + key_len, key_len)) { + /* Revert the conversion we did from a non-masked set action to + * masked set action. */ + if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a) - key_len, + ovs_key)) + return -EMSGSIZE; + } else { + if (nla_put(skb, OVS_ACTION_ATTR_SET_MASKED, nla_len(a), + ovs_key)) + return -EMSGSIZE; + } + + return 0; +} + int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) { const struct nlattr *a; @@ -2058,6 +2096,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) return err; break; + case OVS_ACTION_ATTR_SET_MASKED: + err = masked_set_action_to_attr(a, skb); + if (err) + return err; + break; + case OVS_ACTION_ATTR_SAMPLE: err = sample_action_to_attr(a, skb); if (err) On Aug 11, 2014, at 5:39 PM, Jesse Gross <je...@nicira.com> wrote: > On Mon, Aug 11, 2014 at 9:15 AM, Jarno Rajahalme <jrajaha...@nicira.com> > wrote: >> Masked set action allows more megaflow wildcarding. Masked set action >> is now supported for all writeable key types, except for the tunnel >> key. >> >> The set tunnel action is an exception as any input tunnel info is >> cleared before action processing starts, so there is no tunnel info to >> mask. >> >> The kernel module converts all (non-tunnel) set actions to masked set >> actions. This makes action processing more uniform, and results in >> less branching and duplicating the action processing code. >> >> Signed-off-by: Jarno Rajahalme <jrajaha...@nicira.com> > > This triggers some sparse warnings that would be nice to avoid: > > CHECK /home/jesse/openvswitch/datapath/linux/actions.c > /home/jesse/openvswitch/datapath/linux/actions.c:498:44: warning: > incorrect type in return expression (different base types) > /home/jesse/openvswitch/datapath/linux/actions.c:498:44: expected bool > /home/jesse/openvswitch/datapath/linux/actions.c:498:44: got > restricted __be32 > >> diff --git a/datapath/actions.c b/datapath/actions.c >> index 25c5d77..d57d779 100644 >> --- a/datapath/actions.c >> +++ b/datapath/actions.c >> @@ -432,35 +462,47 @@ static int set_ipv4(struct sk_buff *skb, const struct >> ovs_key_ipv4 *ipv4_key) >> >> nh = ip_hdr(skb); >> >> - if (ipv4_key->ipv4_src != nh->saddr) { >> - set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src); >> - flow_key_set_ipv4_src(skb, ipv4_key->ipv4_src); >> - } >> + /* Setting an IP addresses is typically only a side effect of >> + * matching on them in the current userspace implementation, so it >> + * makes sense to check if the value actually changed. */ >> + if (mask->ipv4_src) { >> + new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src); >> >> - if (ipv4_key->ipv4_dst != nh->daddr) { > > Here we are checking first that the mask is non-zero and then later > that the values are different. In other places we do only the second > step. What is the rationale? > >> +/* Mask is at the midpoint of the data. */ >> +#define get_mask(a, type) \ >> + ((const type *)((const char *)nla_data(a) + nla_len(a))) > > This doesn't seem right to me. From a netlink perspective, the > attribute length covers the whole thing, both the value and mask. So > won't this always find the end of the attribute, not the middle? > >> diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c >> index e525c9d..937f4d6 100644 >> --- a/datapath/flow_netlink.c >> +++ b/datapath/flow_netlink.c >> @@ -1543,42 +1563,62 @@ static int validate_set(const struct nlattr *a, >> break; >> >> case OVS_KEY_ATTR_TUNNEL: >> - *set_tun = true; >> + if (masked) >> + return -EINVAL; /* Masked tunnel set not supported. >> */ >> + *skip_copy = true; >> err = validate_and_copy_set_tun(a, sfa); >> if (err) >> return err; >> - break; >> + return 0; > > Returning 0 here makes me a little nervous - if we ever add more > validation at the end of the function, it might be silently skipped. > >> case OVS_KEY_ATTR_IPV4: > [...] >> - if (ipv4_key->ipv4_frag != flow_key->ip.frag) >> - return -EINVAL; >> + /* Non-writeable fields. */ >> + if (mask->ipv4_proto || mask->ipv4_frag) >> + return -EINVAL; >> + } else { >> + if (!flow_key->ip.proto) >> + return -EINVAL; > > I believe that this check on ip.proto is being used to verify that the > IP header is actually present, so this would mean that we can write > off the end of the packet in the masked case. > >> case OVS_KEY_ATTR_IPV6: > [...] >> - if (ipv6_key->ipv6_frag != flow_key->ip.frag) >> - return -EINVAL; >> + /* Non-writeable fields. */ >> + if (mask->ipv6_proto || mask->ipv6_frag) >> + return -EINVAL; >> + >> + /* Invalid bits in the flow label mask? */ >> + if (ntohl(mask->ipv6_label) & 0xFFF00000) >> + return -EINVAL; >> + } else { >> + if (!flow_key->ip.proto) >> + return -EINVAL; > > Same issue with header validation here. > >> @@ -1605,12 +1649,43 @@ static int validate_set(const struct nlattr *a, >> + /* Convert non-masked set actions to masked set actions. >> + * Tunnel set action returns before getting here. */ >> + if (!masked) { >> + int start, len = key_len * 2; >> + struct nlattr *at; >> + >> + *skip_copy = true; >> + >> + start = add_nested_action_start(sfa, >> + OVS_ACTION_ATTR_SET_MASKED); >> + if (start < 0) >> + return start; >> + >> + at = reserve_sfa_size(sfa, nla_attr_size(len)); >> + if (IS_ERR(at)) >> + return PTR_ERR(at); >> + >> + at->nla_type = key_type; >> + at->nla_len = nla_attr_size(len); > > I think this can be simplified by using add_action() or __add_action(). > > What about the reverse direction? I think this will return masked > actions to old userspace, which presumable will choke on them. > >> diff --git a/datapath/linux/compat/include/linux/openvswitch.h >> b/datapath/linux/compat/include/linux/openvswitch.h >> index 9ea1f37..a8b318a 100644 >> --- a/datapath/linux/compat/include/linux/openvswitch.h >> +++ b/datapath/linux/compat/include/linux/openvswitch.h >> @@ -590,6 +590,12 @@ struct ovs_action_hash { >> * indicate the new packet contents. This could potentially still be >> * %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there >> * is no MPLS label stack, as determined by ethertype, no action is taken. >> + * @OVS_ACTION_ATTR_SET_MASKED: Replaces the contents of an existing >> header. A >> + * nested %OVS_KEY_ATTR_* attribute specifies a header to modify, its value, >> + * and a mask. For every bit set in the mask, the corresponding bit value >> + * is copied from the value to the packet header field, rest of the bits are >> + * left unchanged. The non-masked value bits must be passed in as zeroes. >> + * Masking is not supported for the %OVS_KEY_ATTR_TUNNEL attribute. > > It is probably a good idea to add the attribute and the comment in the > same patch. Also, I don't know how Pravin feels but in general it is a > little easier if the kernel code is in a single patch. Otherwise, this > will have to be transformed before upstreaming. _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev