Masked set actions allow more megaflow wildcarding.  All other key
types than the tunnel key that can be set, can now be set with a mask.

It is not clear wether masked set is useful for skb_priority.
However, we already use the LSB of pkt_mark for IPSec in tunnels, so
it might be useful to be able to set individual bits on pkt_mark.

Signed-off-by: Jarno Rajahalme <jrajaha...@nicira.com>
---
 datapath/actions.c          |  218 ++++++++++++++++++++++++++++++++-----------
 datapath/flow_netlink.c     |   51 ++++++++--
 include/linux/openvswitch.h |   12 ++-
 3 files changed, 218 insertions(+), 63 deletions(-)

diff --git a/datapath/actions.c b/datapath/actions.c
index 0b66e7c..435b7c6 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -125,8 +125,21 @@ static int push_vlan(struct sk_buff *skb, const struct 
ovs_action_push_vlan *vla
        return 0;
 }
 
+/* 'src' is already properly masked. */
+static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
+{
+       u16 *dst = (u16 *)dst_;
+       const u16 *src = (const u16 *)src_;
+       const u16 *mask = (const u16 *)mask_;
+
+       dst[0] = src[0] | (dst[0] & ~mask[0]);
+       dst[1] = src[1] | (dst[1] & ~mask[1]);
+       dst[2] = src[2] | (dst[2] & ~mask[2]);
+}
+
 static int set_eth_addr(struct sk_buff *skb,
-                       const struct ovs_key_ethernet *eth_key)
+                       const struct ovs_key_ethernet *key,
+                       const struct ovs_key_ethernet *mask)
 {
        int err;
        err = make_writable(skb, ETH_HLEN);
@@ -135,8 +148,15 @@ static int set_eth_addr(struct sk_buff *skb,
 
        skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 
-       ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src);
-       ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst);
+       if (mask) {
+               ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
+                                      mask->eth_src);
+               ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
+                                      mask->eth_dst);
+       } else {
+               ether_addr_copy(eth_hdr(skb)->h_source, key->eth_src);
+               ether_addr_copy(eth_hdr(skb)->h_dest, key->eth_dst);
+       }
 
        ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 
@@ -144,7 +164,7 @@ static int set_eth_addr(struct sk_buff *skb,
 }
 
 static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
-                               __be32 *addr, __be32 new_addr)
+                       __be32 *addr, __be32 new_addr)
 {
        int transport_len = skb->len - skb_transport_offset(skb);
 
@@ -204,14 +224,19 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 
l4_proto,
        memcpy(addr, new_addr, sizeof(__be32[4]));
 }
 
-static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc)
+static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc, u8 mask)
 {
+       /* Keep the unmasked bits. */
+       tc |= (nh->priority << 4 | (nh->flow_lbl[0] & 0xF0) >> 4) & ~mask;
        nh->priority = tc >> 4;
        nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4);
 }
 
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl)
+static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
 {
+       /* Keep the unmasked bits. */
+       fl |= ((u32)(nh->flow_lbl[0] & 0x0F) << 16
+              | nh->flow_lbl[1] << 8 | nh->flow_lbl[2]) & ~mask;
        nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16;
        nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8;
        nh->flow_lbl[2] = fl & 0x000000FF;
@@ -223,10 +248,13 @@ static void set_ip_ttl(struct sk_buff *skb, struct iphdr 
*nh, u8 new_ttl)
        nh->ttl = new_ttl;
 }
 
-static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
+static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *key,
+                   const struct ovs_key_ipv4 *mask)
 {
        struct iphdr *nh;
        int err;
+       __be32 saddr, daddr;
+       u8 tos, ttl;
 
        err = make_writable(skb, skb_network_offset(skb) +
                                 sizeof(struct iphdr));
@@ -235,27 +263,54 @@ static int set_ipv4(struct sk_buff *skb, const struct 
ovs_key_ipv4 *ipv4_key)
 
        nh = ip_hdr(skb);
 
-       if (ipv4_key->ipv4_src != nh->saddr)
-               set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
+       saddr = key->ipv4_src;
+       daddr = key->ipv4_dst;
+       tos = key->ipv4_tos;
+       ttl = key->ipv4_ttl;
 
-       if (ipv4_key->ipv4_dst != nh->daddr)
-               set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
+       if (mask) {
+               saddr |= nh->saddr & ~mask->ipv4_src;
+               daddr |= nh->daddr & ~mask->ipv4_dst;
+               tos |= nh->tos & ~mask->ipv4_tos;
+               ttl |= nh->ttl & ~mask->ipv4_ttl;
+       }
+
+       if (saddr != nh->saddr)
+               set_ip_addr(skb, nh, &nh->saddr, saddr);
+
+       if (daddr != nh->daddr)
+               set_ip_addr(skb, nh, &nh->daddr, daddr);
 
-       if (ipv4_key->ipv4_tos != nh->tos)
-               ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
+       if (tos != nh->tos)
+               ipv4_change_dsfield(nh, 0, tos);
 
-       if (ipv4_key->ipv4_ttl != nh->ttl)
-               set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
+       if (ttl != nh->ttl)
+               set_ip_ttl(skb, nh, ttl);
 
        return 0;
 }
 
-static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
+static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
+                          const __be32 mask[4], __be32 masked[4])
+{
+       masked[0] = addr[0] | (old[0] & ~mask[0]);
+       masked[1] = addr[1] | (old[1] & ~mask[1]);
+       masked[2] = addr[2] | (old[2] & ~mask[2]);
+       masked[3] = addr[3] | (old[3] & ~mask[3]);
+}
+
+static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *key,
+                   const struct ovs_key_ipv6 *mask)
 {
        struct ipv6hdr *nh;
        int err;
        __be32 *saddr;
        __be32 *daddr;
+       __be32 smasked[4], dmasked[4];
+       const __be32 *key_saddr, *key_daddr;
+       u8 tc_mask = 0xff;
+       u32 fl_mask = UINT_MAX;
+       u8 hl_mask = 0xff;
 
        err = make_writable(skb, skb_network_offset(skb) +
                            sizeof(struct ipv6hdr));
@@ -265,12 +320,23 @@ static int set_ipv6(struct sk_buff *skb, const struct 
ovs_key_ipv6 *ipv6_key)
        nh = ipv6_hdr(skb);
        saddr = (__be32 *)&nh->saddr;
        daddr = (__be32 *)&nh->daddr;
+       key_saddr = key->ipv6_src;
+       key_daddr = key->ipv6_dst;
+
+       if (mask) {
+               mask_ipv6_addr(saddr, key_saddr, mask->ipv6_src, smasked);
+               key_saddr = smasked;
+               mask_ipv6_addr(daddr, key_daddr, mask->ipv6_dst, dmasked);
+               key_daddr = dmasked;
+               tc_mask = mask->ipv6_tclass;
+               fl_mask = ntohl(mask->ipv6_label);
+               hl_mask = mask->ipv6_hlimit;
+       }
 
-       if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src)))
-               set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
-                             ipv6_key->ipv6_src, true);
+       if (memcmp(key_saddr, saddr, sizeof(key->ipv6_src)))
+               set_ipv6_addr(skb, key->ipv6_proto, saddr, key_saddr, true);
 
-       if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
+       if (memcmp(key_daddr, daddr, sizeof(key->ipv6_dst))) {
                unsigned int offset = 0;
                int flags = OVS_IP6T_FH_F_SKIP_RH;
                bool recalc_csum = true;
@@ -280,13 +346,13 @@ static int set_ipv6(struct sk_buff *skb, const struct 
ovs_key_ipv6 *ipv6_key)
                                                    NEXTHDR_ROUTING, NULL,
                                                    &flags) != NEXTHDR_ROUTING;
 
-               set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
-                             ipv6_key->ipv6_dst, recalc_csum);
+               set_ipv6_addr(skb, key->ipv6_proto, daddr, key_daddr,
+                             recalc_csum);
        }
 
-       set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
-       set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
-       nh->hop_limit = ipv6_key->ipv6_hlimit;
+       set_ipv6_tc(nh, key->ipv6_tclass, tc_mask);
+       set_ipv6_fl(nh, ntohl(key->ipv6_label), fl_mask);
+       nh->hop_limit = key->ipv6_hlimit | (nh->hop_limit & ~hl_mask);
 
        return 0;
 }
@@ -315,10 +381,12 @@ static void set_udp_port(struct sk_buff *skb, __be16 
*port, __be16 new_port)
        }
 }
 
-static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key)
+static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *key,
+                  const struct ovs_key_udp *mask)
 {
        struct udphdr *uh;
        int err;
+       __be16 src, dst;
 
        err = make_writable(skb, skb_transport_offset(skb) +
                                 sizeof(struct udphdr));
@@ -326,19 +394,29 @@ static int set_udp(struct sk_buff *skb, const struct 
ovs_key_udp *udp_port_key)
                return err;
 
        uh = udp_hdr(skb);
-       if (udp_port_key->udp_src != uh->source)
-               set_udp_port(skb, &uh->source, udp_port_key->udp_src);
 
-       if (udp_port_key->udp_dst != uh->dest)
-               set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
+       src = key->udp_src;
+       dst = key->udp_dst;
+       if (mask) {
+               src |= uh->source & ~mask->udp_src;
+               dst |= uh->dest & ~mask->udp_dst;
+       }
+
+       if (src != uh->source)
+               set_udp_port(skb, &uh->source, src);
+
+       if (dst != uh->dest)
+               set_udp_port(skb, &uh->dest, dst);
 
        return 0;
 }
 
-static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
+static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *key,
+                  const struct ovs_key_tcp *mask)
 {
        struct tcphdr *th;
        int err;
+       __be16 src, dst;
 
        err = make_writable(skb, skb_transport_offset(skb) +
                                 sizeof(struct tcphdr));
@@ -346,36 +424,53 @@ static int set_tcp(struct sk_buff *skb, const struct 
ovs_key_tcp *tcp_port_key)
                return err;
 
        th = tcp_hdr(skb);
-       if (tcp_port_key->tcp_src != th->source)
-               set_tp_port(skb, &th->source, tcp_port_key->tcp_src, 
&th->check);
 
-       if (tcp_port_key->tcp_dst != th->dest)
-               set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
+       src = key->tcp_src;
+       dst = key->tcp_dst;
+       if (mask) {
+               src |= th->source & ~mask->tcp_src;
+               dst |= th->dest & ~mask->tcp_dst;
+       }
+
+       if (src != th->source)
+               set_tp_port(skb, &th->source, src, &th->check);
+
+       if (dst != th->dest)
+               set_tp_port(skb, &th->dest, dst, &th->check);
 
        return 0;
 }
 
 static int set_sctp(struct sk_buff *skb,
-                    const struct ovs_key_sctp *sctp_port_key)
+                   const struct ovs_key_sctp *key,
+                   const struct ovs_key_sctp *mask)
 {
        struct sctphdr *sh;
        int err;
        unsigned int sctphoff = skb_transport_offset(skb);
+       __be16 src, dst;
 
        err = make_writable(skb, sctphoff + sizeof(struct sctphdr));
        if (unlikely(err))
                return err;
 
        sh = sctp_hdr(skb);
-       if (sctp_port_key->sctp_src != sh->source ||
-           sctp_port_key->sctp_dst != sh->dest) {
+
+       src = key->sctp_src;
+       dst = key->sctp_dst;
+       if (mask) {
+               src |= sh->source & ~mask->sctp_src;
+               dst |= sh->dest & ~mask->sctp_dst;
+       }
+
+       if (src != sh->source || dst != sh->dest) {
                __le32 old_correct_csum, new_csum, old_csum;
 
                old_csum = sh->checksum;
                old_correct_csum = sctp_compute_cksum(skb, sctphoff);
 
-               sh->source = sctp_port_key->sctp_src;
-               sh->dest = sctp_port_key->sctp_dst;
+               sh->source = src;
+               sh->dest = dst;
 
                new_csum = sctp_compute_cksum(skb, sctphoff);
 
@@ -460,46 +555,65 @@ static int sample(struct datapath *dp, struct sk_buff 
*skb,
                                  nla_len(acts_list), true);
 }
 
-static int execute_set_action(struct sk_buff *skb,
-                                const struct nlattr *nested_attr)
+#define get_mask(a, type) ((const type *)nla_data(a) + 1)
+
+static int execute_set_action(struct sk_buff *skb, const struct nlattr *a)
 {
        int err = 0;
+       const void *mask;
 
-       switch (nla_type(nested_attr)) {
+       switch (nla_type(a)) {
        case OVS_KEY_ATTR_PRIORITY:
-               skb->priority = nla_get_u32(nested_attr);
+               skb->priority = (nla_len(a) == 2 * sizeof(u32))
+                       ? nla_get_u32(a) | (skb->priority & ~*get_mask(a, u32))
+                       : nla_get_u32(a);
                break;
 
        case OVS_KEY_ATTR_SKB_MARK:
-               skb->mark = nla_get_u32(nested_attr);
+               skb->mark = (nla_len(a) == 2 * sizeof(u32))
+                       ? nla_get_u32(a) | (skb->mark & ~*get_mask(a, u32))
+                       : nla_get_u32(a);
                break;
 
        case OVS_KEY_ATTR_IPV4_TUNNEL:
-               OVS_CB(skb)->tun_key = nla_data(nested_attr);
+               /* Masked data not supported for tunnel. */
+               OVS_CB(skb)->tun_key = nla_data(a);
                break;
 
        case OVS_KEY_ATTR_ETHERNET:
-               err = set_eth_addr(skb, nla_data(nested_attr));
+               mask = (nla_len(a) == 2 * sizeof(struct ovs_key_ethernet))
+                       ? get_mask(a, struct ovs_key_ethernet) : NULL;
+               err = set_eth_addr(skb, nla_data(a), mask);
                break;
 
        case OVS_KEY_ATTR_IPV4:
-               err = set_ipv4(skb, nla_data(nested_attr));
+               mask = (nla_len(a) == 2 * sizeof(struct ovs_key_ipv4))
+                       ? get_mask(a, struct ovs_key_ipv4) : NULL;
+               err = set_ipv4(skb, nla_data(a), mask);
                break;
 
        case OVS_KEY_ATTR_IPV6:
-               err = set_ipv6(skb, nla_data(nested_attr));
+               mask = (nla_len(a) == 2 * sizeof(struct ovs_key_ipv6))
+                       ? get_mask(a, struct ovs_key_ipv6) : NULL;
+               err = set_ipv6(skb, nla_data(a), mask);
                break;
 
        case OVS_KEY_ATTR_TCP:
-               err = set_tcp(skb, nla_data(nested_attr));
+               mask = (nla_len(a) == 2 * sizeof(struct ovs_key_tcp))
+                       ? get_mask(a, struct ovs_key_tcp) : NULL;
+               err = set_tcp(skb, nla_data(a), mask);
                break;
 
        case OVS_KEY_ATTR_UDP:
-               err = set_udp(skb, nla_data(nested_attr));
+               mask = (nla_len(a) == 2 * sizeof(struct ovs_key_udp))
+                       ? get_mask(a, struct ovs_key_udp) : NULL;
+               err = set_udp(skb, nla_data(a), mask);
                break;
 
        case OVS_KEY_ATTR_SCTP:
-               err = set_sctp(skb, nla_data(nested_attr));
+               mask = (nla_len(a) == 2 * sizeof(struct ovs_key_sctp))
+                       ? get_mask(a, struct ovs_key_sctp) : NULL;
+               err = set_sctp(skb, nla_data(a), mask);
                break;
        }
 
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index 5c32cd0..477cab6 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -1274,6 +1274,19 @@ static int validate_and_copy_set_tun(const struct nlattr 
*attr,
        return err;
 }
 
+/* Return false if there are any non-masked bits set.
+ * Mask follows data immediately, before any netlink padding. */
+static bool validate_masked(u8 *data, int bytes)
+{
+       int len = bytes / 2;
+       u8 *mask = data + len;
+
+       while (len--)
+               if (*data++ & ~*mask++)
+                       return false;
+       return true;
+}
+
 static int validate_set(const struct nlattr *a,
                        const struct sw_flow_key *flow_key,
                        struct sw_flow_actions **sfa,
@@ -1281,13 +1294,17 @@ static int validate_set(const struct nlattr *a,
 {
        const struct nlattr *ovs_key = nla_data(a);
        int key_type = nla_type(ovs_key);
+       bool have_mask;
 
        /* There can be only one key in a action */
        if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
                return -EINVAL;
 
+       have_mask = (ovs_key_lens[key_type] * 2 == nla_len(ovs_key));
+
        if (key_type > OVS_KEY_ATTR_MAX ||
            (ovs_key_lens[key_type] != nla_len(ovs_key) &&
+            (!have_mask || !validate_masked(nla_data(ovs_key), 
nla_len(ovs_key))) &&
             ovs_key_lens[key_type] != -1))
                return -EINVAL;
 
@@ -1316,12 +1333,21 @@ static int validate_set(const struct nlattr *a,
                        return -EINVAL;
 
                ipv4_key = nla_data(ovs_key);
-               if (ipv4_key->ipv4_proto != flow_key->ip.proto)
-                       return -EINVAL;
+               if (have_mask) {
+                       const struct ovs_key_ipv4 *mask = ipv4_key + 1;
 
-               if (ipv4_key->ipv4_frag != flow_key->ip.frag)
-                       return -EINVAL;
+                       if (mask->ipv4_proto) /* proto is not writeable. */
+                               return -EINVAL;
 
+                       if (mask->ipv4_frag) /* frag is not writeable. */
+                               return -EINVAL;
+               } else {
+                       if (ipv4_key->ipv4_proto != flow_key->ip.proto)
+                               return -EINVAL;
+
+                       if (ipv4_key->ipv4_frag != flow_key->ip.frag)
+                               return -EINVAL;
+               }
                break;
 
        case OVS_KEY_ATTR_IPV6:
@@ -1332,12 +1358,21 @@ static int validate_set(const struct nlattr *a,
                        return -EINVAL;
 
                ipv6_key = nla_data(ovs_key);
-               if (ipv6_key->ipv6_proto != flow_key->ip.proto)
-                       return -EINVAL;
+               if (have_mask) {
+                       const struct ovs_key_ipv6 *mask = ipv6_key + 1;
 
-               if (ipv6_key->ipv6_frag != flow_key->ip.frag)
-                       return -EINVAL;
+                       if (mask->ipv6_proto) /* proto is not writeable. */
+                               return -EINVAL;
 
+                       if (mask->ipv6_frag) /* frag is not writeable. */
+                               return -EINVAL;
+               } else {
+                       if (ipv6_key->ipv6_proto != flow_key->ip.proto)
+                               return -EINVAL;
+
+                       if (ipv6_key->ipv6_frag != flow_key->ip.frag)
+                               return -EINVAL;
+               }
                if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
                        return -EINVAL;
 
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index d7ad058..2212978 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -572,9 +572,15 @@ struct ovs_action_recirc {
  * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
  * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
  * the nested %OVS_SAMPLE_ATTR_* attributes.
- * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header.  The
- * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
- * value.
+ * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header.  A
+ * nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
+ * value.  The value can be immediately followed by a mask that specifies what
+ * bits of the header field are modified.  The presence of the mask is
+ * indicated by the length field of the attribute header having value exactly
+ * double of the attribute value without a mask.  That is, the value and the
+ * mask are included in the same netlink attribute.   No mask causes all the
+ * bits to be set.  Masking is not supported for the %OVS_KEY_ATTR_TUNNEL
+ * attribute.
  * @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the
  * top of the packets MPLS label stack.  Set the ethertype of the
  * encapsulating frame to either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC to
-- 
1.7.10.4

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to