Jesse,
I’ll send a new patch series soon, but here are the fixes to the comments below
for your viewing pleasure:
Jarno
diff --git a/datapath/actions.c b/datapath/actions.c
index 0cb7c9e..243b672 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -510,7 +510,7 @@ static int set_ipv4(struct sk_buff *skb, const struct
ovs_key_ipv4 *key,
static bool is_ipv6_mask_nonzero(const __be32 addr[4])
{
- return addr[0] | addr[1] | addr[2] | addr[3];
+ return !!(addr[0] | addr[1] | addr[2] | addr[3]);
}
static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *key,
@@ -565,7 +565,7 @@ static int set_ipv6(struct sk_buff *skb, const struct
ovs_key_ipv6 *key,
if (mask->ipv6_tclass) {
ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
flow_key_set_ip_tos(skb, ipv6_get_dsfield(nh));
- }
+ }
if (mask->ipv6_label) {
set_ipv6_fl(nh, ntohl(key->ipv6_label),
ntohl(mask->ipv6_label));
@@ -599,6 +599,7 @@ static int set_udp(struct sk_buff *skb, const struct
ovs_key_udp *key,
return err;
uh = udp_hdr(skb);
+ /* Either of the masks is non-zero, so do not bother checking them. */
src = MASKED(uh->source, key->udp_src, mask->udp_src);
dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst);
@@ -824,8 +825,7 @@ static int execute_set_action(struct sk_buff *skb, const
struct nlattr *a)
}
/* Mask is at the midpoint of the data. */
-#define get_mask(a, type) \
- ((const type *)((const char *)nla_data(a) + nla_len(a)))
+#define get_mask(a, type) ((const type *)nla_data(a) + 1)
static int execute_masked_set_action(struct sk_buff *skb,
const struct nlattr *a)
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index faabeb2..722640a 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -325,6 +325,20 @@ static bool is_all_zero(const u8 *fp, size_t size)
return true;
}
+static bool is_all_ones(const u8 *fp, size_t size)
+{
+ int i;
+
+ if (!fp)
+ return false;
+
+ for (i = 0; i < size; i++)
+ if (~fp[i])
+ return false;
+
+ return true;
+}
+
static int __parse_flow_nlattrs(const struct nlattr *attr,
const struct nlattr *a[],
u64 *attrsp, bool nz)
@@ -1646,13 +1660,17 @@ static int validate_set(const struct nlattr *a,
err = validate_and_copy_set_tun(a, sfa);
if (err)
return err;
- return 0;
+ break;
case OVS_KEY_ATTR_IPV4:
if (eth_type != htons(ETH_P_IP))
return -EINVAL;
+ if (!flow_key->ip.proto)
+ return -EINVAL;
+
ipv4_key = nla_data(ovs_key);
+
if (masked) {
const struct ovs_key_ipv4 *mask = ipv4_key + 1;
@@ -1660,9 +1678,6 @@ static int validate_set(const struct nlattr *a,
if (mask->ipv4_proto || mask->ipv4_frag)
return -EINVAL;
} else {
- if (!flow_key->ip.proto)
- return -EINVAL;
-
if (ipv4_key->ipv4_proto != flow_key->ip.proto)
return -EINVAL;
@@ -1675,6 +1690,9 @@ static int validate_set(const struct nlattr *a,
if (eth_type != htons(ETH_P_IPV6))
return -EINVAL;
+ if (!flow_key->ip.proto)
+ return -EINVAL;
+
ipv6_key = nla_data(ovs_key);
if (masked) {
const struct ovs_key_ipv6 *mask = ipv6_key + 1;
@@ -1687,9 +1705,6 @@ static int validate_set(const struct nlattr *a,
if (ntohl(mask->ipv6_label) & 0xFFF00000)
return -EINVAL;
} else {
- if (!flow_key->ip.proto)
- return -EINVAL;
-
if (ipv6_key->ipv6_proto != flow_key->ip.proto)
return -EINVAL;
@@ -1734,35 +1749,36 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
}
- /* Convert non-masked set actions to masked set actions.
- * Tunnel set action returns before getting here. */
- if (!masked) {
+ /* Convert non-masked non-tunnel set actions to masked set actions. */
+ if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
int start, len = key_len * 2;
struct nlattr *at;
*skip_copy = true;
- start = add_nested_action_start(sfa,
- OVS_ACTION_ATTR_SET_MASKED);
+ start = add_nested_action_start(sfa,
OVS_ACTION_ATTR_SET_MASKED);
if (start < 0)
return start;
- at = reserve_sfa_size(sfa, nla_attr_size(len));
+ at = __add_action(sfa, key_type, NULL, len);
if (IS_ERR(at))
return PTR_ERR(at);
- at->nla_type = key_type;
- at->nla_len = nla_attr_size(len);
-
memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
/* Even though all-ones mask includes non-writeable fields,
* which we do not allow above, we will not actually set them
* when we execute the masked set action. */
memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */
- memset((unsigned char *)at + at->nla_len, 0, nla_padlen(len));
add_nested_action_end(*sfa, start);
+ } else if (masked) {
+ /* Reject fully masked masked set actions so that the above
+ * conversion can be unabiguously reverted when sending
+ * actions back to userspace. */
+ if (is_all_ones(nla_data(ovs_key) + key_len, key_len))
+ return -EINVAL;
}
+
return 0;
}
@@ -1956,6 +1972,7 @@ static int __ovs_nla_copy_actions(const struct nlattr
*attr,
return 0;
}
+/* 'key' must be the masked key. */
int ovs_nla_copy_actions(const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa)
@@ -2043,6 +2060,27 @@ static int set_action_to_attr(const struct nlattr *a,
struct sk_buff *skb)
return 0;
}
+static int masked_set_action_to_attr(const struct nlattr *a,
+ struct sk_buff *skb)
+{
+ const struct nlattr *ovs_key = nla_data(a);
+ size_t key_len = nla_len(ovs_key) / 2;
+
+ if (is_all_ones(nla_data(ovs_key) + key_len, key_len)) {
+ /* Revert the conversion we did from a non-masked set action to
+ * masked set action. */
+ if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a) - key_len,
+ ovs_key))
+ return -EMSGSIZE;
+ } else {
+ if (nla_put(skb, OVS_ACTION_ATTR_SET_MASKED, nla_len(a),
+ ovs_key))
+ return -EMSGSIZE;
+ }
+
+ return 0;
+}
+
int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff
*skb)
{
const struct nlattr *a;
@@ -2058,6 +2096,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int
len, struct sk_buff *skb)
return err;
break;
+ case OVS_ACTION_ATTR_SET_MASKED:
+ err = masked_set_action_to_attr(a, skb);
+ if (err)
+ return err;
+ break;
+
case OVS_ACTION_ATTR_SAMPLE:
err = sample_action_to_attr(a, skb);
if (err)
On Aug 11, 2014, at 5:39 PM, Jesse Gross <[email protected]> wrote:
> On Mon, Aug 11, 2014 at 9:15 AM, Jarno Rajahalme <[email protected]>
> wrote:
>> Masked set action allows more megaflow wildcarding. Masked set action
>> is now supported for all writeable key types, except for the tunnel
>> key.
>>
>> The set tunnel action is an exception as any input tunnel info is
>> cleared before action processing starts, so there is no tunnel info to
>> mask.
>>
>> The kernel module converts all (non-tunnel) set actions to masked set
>> actions. This makes action processing more uniform, and results in
>> less branching and duplicating the action processing code.
>>
>> Signed-off-by: Jarno Rajahalme <[email protected]>
>
> This triggers some sparse warnings that would be nice to avoid:
>
> CHECK /home/jesse/openvswitch/datapath/linux/actions.c
> /home/jesse/openvswitch/datapath/linux/actions.c:498:44: warning:
> incorrect type in return expression (different base types)
> /home/jesse/openvswitch/datapath/linux/actions.c:498:44: expected bool
> /home/jesse/openvswitch/datapath/linux/actions.c:498:44: got
> restricted __be32
>
>> diff --git a/datapath/actions.c b/datapath/actions.c
>> index 25c5d77..d57d779 100644
>> --- a/datapath/actions.c
>> +++ b/datapath/actions.c
>> @@ -432,35 +462,47 @@ static int set_ipv4(struct sk_buff *skb, const struct
>> ovs_key_ipv4 *ipv4_key)
>>
>> nh = ip_hdr(skb);
>>
>> - if (ipv4_key->ipv4_src != nh->saddr) {
>> - set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
>> - flow_key_set_ipv4_src(skb, ipv4_key->ipv4_src);
>> - }
>> + /* Setting an IP addresses is typically only a side effect of
>> + * matching on them in the current userspace implementation, so it
>> + * makes sense to check if the value actually changed. */
>> + if (mask->ipv4_src) {
>> + new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
>>
>> - if (ipv4_key->ipv4_dst != nh->daddr) {
>
> Here we are checking first that the mask is non-zero and then later
> that the values are different. In other places we do only the second
> step. What is the rationale?
>
>> +/* Mask is at the midpoint of the data. */
>> +#define get_mask(a, type) \
>> + ((const type *)((const char *)nla_data(a) + nla_len(a)))
>
> This doesn't seem right to me. From a netlink perspective, the
> attribute length covers the whole thing, both the value and mask. So
> won't this always find the end of the attribute, not the middle?
>
>> diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
>> index e525c9d..937f4d6 100644
>> --- a/datapath/flow_netlink.c
>> +++ b/datapath/flow_netlink.c
>> @@ -1543,42 +1563,62 @@ static int validate_set(const struct nlattr *a,
>> break;
>>
>> case OVS_KEY_ATTR_TUNNEL:
>> - *set_tun = true;
>> + if (masked)
>> + return -EINVAL; /* Masked tunnel set not supported.
>> */
>> + *skip_copy = true;
>> err = validate_and_copy_set_tun(a, sfa);
>> if (err)
>> return err;
>> - break;
>> + return 0;
>
> Returning 0 here makes me a little nervous - if we ever add more
> validation at the end of the function, it might be silently skipped.
>
>> case OVS_KEY_ATTR_IPV4:
> [...]
>> - if (ipv4_key->ipv4_frag != flow_key->ip.frag)
>> - return -EINVAL;
>> + /* Non-writeable fields. */
>> + if (mask->ipv4_proto || mask->ipv4_frag)
>> + return -EINVAL;
>> + } else {
>> + if (!flow_key->ip.proto)
>> + return -EINVAL;
>
> I believe that this check on ip.proto is being used to verify that the
> IP header is actually present, so this would mean that we can write
> off the end of the packet in the masked case.
>
>> case OVS_KEY_ATTR_IPV6:
> [...]
>> - if (ipv6_key->ipv6_frag != flow_key->ip.frag)
>> - return -EINVAL;
>> + /* Non-writeable fields. */
>> + if (mask->ipv6_proto || mask->ipv6_frag)
>> + return -EINVAL;
>> +
>> + /* Invalid bits in the flow label mask? */
>> + if (ntohl(mask->ipv6_label) & 0xFFF00000)
>> + return -EINVAL;
>> + } else {
>> + if (!flow_key->ip.proto)
>> + return -EINVAL;
>
> Same issue with header validation here.
>
>> @@ -1605,12 +1649,43 @@ static int validate_set(const struct nlattr *a,
>> + /* Convert non-masked set actions to masked set actions.
>> + * Tunnel set action returns before getting here. */
>> + if (!masked) {
>> + int start, len = key_len * 2;
>> + struct nlattr *at;
>> +
>> + *skip_copy = true;
>> +
>> + start = add_nested_action_start(sfa,
>> + OVS_ACTION_ATTR_SET_MASKED);
>> + if (start < 0)
>> + return start;
>> +
>> + at = reserve_sfa_size(sfa, nla_attr_size(len));
>> + if (IS_ERR(at))
>> + return PTR_ERR(at);
>> +
>> + at->nla_type = key_type;
>> + at->nla_len = nla_attr_size(len);
>
> I think this can be simplified by using add_action() or __add_action().
>
> What about the reverse direction? I think this will return masked
> actions to old userspace, which presumable will choke on them.
>
>> diff --git a/datapath/linux/compat/include/linux/openvswitch.h
>> b/datapath/linux/compat/include/linux/openvswitch.h
>> index 9ea1f37..a8b318a 100644
>> --- a/datapath/linux/compat/include/linux/openvswitch.h
>> +++ b/datapath/linux/compat/include/linux/openvswitch.h
>> @@ -590,6 +590,12 @@ struct ovs_action_hash {
>> * indicate the new packet contents. This could potentially still be
>> * %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there
>> * is no MPLS label stack, as determined by ethertype, no action is taken.
>> + * @OVS_ACTION_ATTR_SET_MASKED: Replaces the contents of an existing
>> header. A
>> + * nested %OVS_KEY_ATTR_* attribute specifies a header to modify, its value,
>> + * and a mask. For every bit set in the mask, the corresponding bit value
>> + * is copied from the value to the packet header field, rest of the bits are
>> + * left unchanged. The non-masked value bits must be passed in as zeroes.
>> + * Masking is not supported for the %OVS_KEY_ATTR_TUNNEL attribute.
>
> It is probably a good idea to add the attribute and the comment in the
> same patch. Also, I don't know how Pravin feels but in general it is a
> little easier if the kernel code is in a single patch. Otherwise, this
> will have to be transformed before upstreaming.
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev