WIP
---
 datapath/actions.c                                |  95 +++++++++++++++
 datapath/flow.h                                   |   6 +
 datapath/flow_netlink.c                           | 139 ++++++++++++++++++++++
 datapath/linux/compat/include/linux/openvswitch.h |  54 +++++++++
 4 files changed, 294 insertions(+)

diff --git a/datapath/actions.c b/datapath/actions.c
index 05b465c..b3196b7 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -33,6 +33,7 @@
 #include <net/checksum.h>
 #include <net/dsfield.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_nat_core.h>
 #include <net/sctp/checksum.h>
 
 #include "datapath.h"
@@ -782,6 +783,96 @@ static int conntrack(struct datapath *dp, struct sk_buff 
*skb,
        return 0;
 }
 
+static int ovs_nat_handle_ct_new(struct nf_conn *ct, struct ovs_nat_info *info)
+{
+       int err;
+
+       /* Seen it before?  This can happen for loopback, retrans,
+        * or local packets.
+        */
+       if (nf_nat_initialized(ct, info->type))
+               return 0;
+
+       if (info->range.flags & NF_NAT_RANGE_MAP_IPS) {
+               /* Action is set up to establish a new mapping */
+               err = nf_nat_setup_info(ct, &info->range, info->type);
+       } else {
+               /* Force range to this IP; let proto decide mapping for
+                * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+                * Use reply in case it's already been mangled (eg local
+                * packet).
+                */
+               union nf_inet_addr ip =
+                       (info->type == NF_NAT_MANIP_SRC ?
+                       ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
+                       ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
+
+               struct nf_nat_range range = {
+                       .flags          = NF_NAT_RANGE_MAP_IPS,
+                       .min_addr       = ip,
+                       .max_addr       = ip,
+               };
+
+               err = nf_nat_setup_info(ct, &range, info->type);
+       }
+
+       return err;
+}
+
+static int ovs_nat(struct sk_buff *skb, struct ovs_nat_info *info)
+{
+       enum ip_conntrack_info ctinfo;
+       struct nf_conn_nat *nat;
+       struct nf_conn *ct;
+       int hooknum, nh_off, err;
+
+       ct = nf_ct_get(skb, &ctinfo);
+       if (!ct || nf_ct_is_untracked(ct)) {
+               WARN(1, "NAT: Untracked packet");
+               /* FIXME: Bump counter? */
+               return 0;
+       }
+
+       nat = nf_ct_nat_ext_add(ct);
+       if (nat == NULL)
+               return 0;
+
+       nh_off = skb_network_offset(skb);
+       skb_pull(skb, nh_off);
+       /* FIXME: COW */
+
+       switch (ctinfo) {
+       case IP_CT_RELATED:
+       case IP_CT_RELATED_REPLY:
+               /* FIXME: Handle ICMP, see nf_nat_ipv4_fn() */
+
+               /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+       case IP_CT_NEW:
+               if (ovs_nat_handle_ct_new(ct, info) != NF_ACCEPT) {
+                       err = -EINVAL;
+                       goto push;
+               }
+               break;
+
+       default:
+               WARN_ON(ctinfo != IP_CT_ESTABLISHED &&
+                       ctinfo != IP_CT_ESTABLISHED_REPLY);
+               err = -EINVAL;
+               goto push;
+       }
+
+       if (info->type == NF_NAT_MANIP_SRC)
+               hooknum = NF_INET_LOCAL_IN;
+       else
+               hooknum = NF_INET_LOCAL_OUT;
+
+       err = nf_nat_packet(ct, ctinfo, hooknum, skb);
+push:
+       skb_push(skb, nh_off);
+
+       return err;
+}
+
 static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key,
                              const struct nlattr *nested_attr)
 {
@@ -956,6 +1047,10 @@ static int do_execute_actions(struct datapath *dp, struct 
sk_buff *skb,
                case OVS_ACTION_ATTR_CONNTRACK:
                        err = conntrack(dp, skb, key, nla_data(a));
                        break;
+
+               case OVS_ACTION_ATTR_NAT:
+                       err = ovs_nat(skb, nla_data(a));
+                       break;
                }
 
                if (unlikely(err)) {
diff --git a/datapath/flow.h b/datapath/flow.h
index ce74958..5f2c0bb 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -35,6 +35,7 @@
 #include <net/inet_ecn.h>
 #include <net/ip_tunnels.h>
 #include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
 
 struct sk_buff;
 
@@ -69,6 +70,11 @@ struct ovs_tunnel_info {
                                        FIELD_SIZEOF(struct sw_flow_key, 
tun_opts) - \
                                           opt_len)
 
+struct ovs_nat_info {
+       __u32 type;
+       struct nf_nat_range range;
+};
+
 static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
                                            __be32 saddr, __be32 daddr,
                                            u8 tos, u8 ttl,
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index 75dc87f..ebf7bd7 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -43,6 +43,7 @@
 #include <linux/icmp.h>
 #include <linux/icmpv6.h>
 #include <linux/rculist.h>
+#include <linux/netfilter/nf_nat.h>
 #include <net/geneve.h>
 #include <net/ip.h>
 #include <net/ip_tunnels.h>
@@ -1806,6 +1807,106 @@ static int validate_userspace(const struct nlattr *attr)
        return 0;
 }
 
+static int validate_and_copy_nat(const struct nlattr *attr,
+                               const struct sw_flow_key *key,
+                               struct sw_flow_actions **sfa)
+{
+       struct ovs_nat_info nat_info;
+       struct nlattr *a;
+       int rem;
+
+       BUILD_BUG_ON(OVS_NAT_FLAG_PROTO_RAND != NF_NAT_RANGE_PROTO_RANDOM);
+       BUILD_BUG_ON(OVS_NAT_FLAG_PERSISTENT != NF_NAT_RANGE_PERSISTENT);
+       BUILD_BUG_ON(OVS_NAT_FLAG_PROTO_FULL_RAND != 
NF_NAT_RANGE_PROTO_RANDOM_FULLY);
+
+       memset(&nat_info, 0, sizeof(nat_info));
+
+       nla_for_each_nested(a, attr, rem) {
+               static const u32 ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1] = {
+                       [OVS_NAT_ATTR_TYPE] = sizeof(u32),
+                       [OVS_NAT_ATTR_IP_MIN] = -1,
+                       [OVS_NAT_ATTR_IP_MAX] = -1,
+                       [OVS_NAT_ATTR_PROTO_MIN] = sizeof(u16),
+                       [OVS_NAT_ATTR_PROTO_MAX] = sizeof(u16),
+                       [OVS_NAT_ATTR_FLAGS] = sizeof(u32),
+               };
+               int type = nla_type(a);
+
+               if (type > OVS_NAT_ATTR_MAX) {
+                       OVS_NLERR("Unknown nat attribute (type=%d, max=%d).\n",
+                       type, OVS_NAT_ATTR_MAX);
+                       return -EINVAL;
+               }
+
+               if (ovs_nat_attr_lens[type] != nla_len(a) &&
+                   ovs_nat_attr_lens[type] != -1) {
+                       OVS_NLERR("NAT attribute type has unexpected "
+                                 " length (type=%d, length=%d, 
expected=%d).\n",
+                                 type, nla_len(a), ovs_nat_attr_lens[type]);
+                       return -EINVAL;
+               }
+
+               switch (type) {
+               case OVS_NAT_ATTR_TYPE:
+                       nat_info.type = nla_get_u32(a);
+                       if (nat_info.type > OVS_NAT_TYPE_MAX) {
+                               OVS_NLERR("NAT type %d out of range 0..%d\n",
+                                   nat_info.type, OVS_NAT_TYPE_MAX);
+                               return -ERANGE;
+                       }
+                       break;
+
+               case OVS_NAT_ATTR_IP_MIN:
+                       if (nla_len(a) != sizeof(struct in_addr) &&
+                           nla_len(a) != sizeof(struct in6_addr)) {
+                               return -ERANGE;
+                       }
+
+                       nla_memcpy(&nat_info.range.min_addr, a,
+                                  sizeof(nat_info.range.min_addr));
+                       nat_info.range.flags |= NF_NAT_RANGE_MAP_IPS;
+                       break;
+
+               case OVS_NAT_ATTR_IP_MAX:
+                       if (nla_len(a) != sizeof(struct in_addr) &&
+                           nla_len(a) != sizeof(struct in6_addr)) {
+                               return -ERANGE;
+                       }
+
+                       nla_memcpy(&nat_info.range.min_addr, a,
+                                  sizeof(nat_info.range.min_addr));
+                       nat_info.range.flags |= NF_NAT_RANGE_MAP_IPS;
+                       break;
+
+               case OVS_NAT_ATTR_PROTO_MIN:
+                       nat_info.range.min_proto.all = nla_get_u16(a);
+                       nat_info.range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+                       break;
+
+               case OVS_NAT_ATTR_PROTO_MAX:
+                       nat_info.range.max_proto.all = nla_get_u16(a);
+                       nat_info.range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+                       break;
+
+               case OVS_NAT_ATTR_FLAGS:
+                       nat_info.range.flags |= (nla_get_u32(a) | 
OVS_NAT_FLAGS);
+                       break;
+
+               default:
+                       OVS_NLERR("Unknown nat attribute (%d).\n", type);
+                       return -EINVAL;
+               }
+       }
+
+       if (rem > 0) {
+               OVS_NLERR("NAT attribute has %d unknown bytes.\n", rem);
+               return -EINVAL;
+       }
+
+       return add_action(sfa, OVS_ACTION_ATTR_NAT, &nat_info,
+                         sizeof(nat_info));
+}
+
 static int copy_action(const struct nlattr *from,
                       struct sw_flow_actions **sfa)
 {
@@ -1845,6 +1946,7 @@ static int __ovs_nla_copy_actions(struct net *net, const 
struct nlattr *attr,
                        [OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
                        [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
                        [OVS_ACTION_ATTR_CONNTRACK] = (u32)-1,
+                       [OVS_ACTION_ATTR_NAT] = (u32)-1,
                };
                const struct ovs_action_push_vlan *vlan;
                int type = nla_type(a);
@@ -1957,6 +2059,13 @@ static int __ovs_nla_copy_actions(struct net *net, const 
struct nlattr *attr,
                        skip_copy = true;
                        break;
 
+               case OVS_ACTION_ATTR_NAT:
+                       err = validate_and_copy_nat(a, key, sfa);
+                       if (err)
+                               return err;
+                       skip_copy = true;
+                       break;
+
                default:
                        return -EINVAL;
                }
@@ -2079,6 +2188,30 @@ static int conntrack_action_to_attr(const struct nlattr 
*attr, struct sk_buff *s
        return 0;
 }
 
+static int nat_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
+{
+       struct ovs_nat_info *info;
+       struct nlattr *start;
+
+       start = nla_nest_start(skb, OVS_ACTION_ATTR_NAT);
+       if (!start)
+               return -EMSGSIZE;
+
+       info = nla_data(attr);
+
+       if (nla_put_u32(skb, OVS_NAT_ATTR_TYPE, info->type) ||
+           nla_put_u32(skb, OVS_NAT_ATTR_IP_MIN, info->range.min_addr.ip) ||
+           nla_put_u32(skb, OVS_NAT_ATTR_IP_MAX, info->range.max_addr.ip) ||
+           nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MIN, info->range.min_proto.all) 
||
+           nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MAX, info->range.max_proto.all) 
||
+           nla_put_u32(skb, OVS_NAT_ATTR_FLAGS, info->range.flags | 
OVS_NAT_FLAGS))
+               return -EMSGSIZE;
+
+       nla_nest_end(skb, start);
+
+       return 0;
+}
+
 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff 
*skb)
 {
        const struct nlattr *a;
@@ -2106,6 +2239,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int 
len, struct sk_buff *skb)
                                return err;
                        break;
 
+               case OVS_ACTION_ATTR_NAT:
+                       err = nat_action_to_attr(a, skb);
+                       if (err)
+                               return err;
+                       break;
+
                default:
                        if (nla_put(skb, type, nla_len(a), nla_data(a)))
                                return -EMSGSIZE;
diff --git a/datapath/linux/compat/include/linux/openvswitch.h 
b/datapath/linux/compat/include/linux/openvswitch.h
index f3654de..b65efc6 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -592,6 +592,57 @@ enum ovs_conntrack_attr {
 #define OVS_CT_ATTR_MAX (__OVS_CT_ATTR_MAX - 1)
 
 /**
+ * enum ovs_nat_type - Supported NAT modes
+ */
+enum ovs_nat_type {
+       OVS_NAT_TYPE_SRC,
+       OVS_NAT_TYPE_DST,
+       __OVS_NAT_TYPE_MAX,
+};
+
+#define OVS_NAT_TYPE_MAX (__OVS_NAT_TYPE_MAX - 1)
+
+/**
+ * enum ovs_nat_flag - Supported NAT flags
+ * @OVS_NAT_FLAG_PROTO_RAND: Pseudo random hash based L4 port mapping (MD5)
+ * @OVS_NAT_FLAG_PERSISTENT: Persistent IP mapping across reboots
+ * @OVS_NAT_FLAG_PROTO_FULL_RAND: Fully randomized L4 port mapping
+ *
+ * NOTE: The flags values must be compatible with NF_NAT_RANGE_* in
+ * <linux/netfilter/nf_nat.h>.
+ */
+enum ovs_nat_flag {
+       OVS_NAT_FLAG_PROTO_RAND         = 0x4,
+       OVS_NAT_FLAG_PERSISTENT         = 0x8,
+       OVS_NAT_FLAG_PROTO_FULL_RAND    = 0x10,
+};
+
+#define OVS_NAT_FLAGS (OVS_NAT_FLAG_PROTO_RAND | OVS_NAT_FLAG_PERSISTENT | \
+                      OVS_NAT_FLAG_PROTO_FULL_RAND)
+
+/**
+ * enum ovs_nat_attr - Attributes for %OVS_ACTION_ATTR_NAT action.
+ * @OVS_NAT_ATTR_TYPE: u32 NAT type (enum ovs_nat_type)
+ * @OVS_NAT_ATTR_IP_MIN: struct in_addr or struct in6_addr
+ * @OVS_NAT_ATTR_IP_MAX: struct in_addr or struct in6_addr
+ * @OVS_NAT_ATTR_PROTO_MIN: u16 L4 protocol specific lower boundary (port)
+ * @OVS_NAT_ATTR_PROTO_MAX: u16 L4 protocol specific upper boundary (port)
+ * @OVS_NAT_ATTR_FLAGS: u32 NAT flags (OVS_NAT_FLAG_*)
+ */
+enum ovs_nat_attr {
+       OVS_NAT_ATTR_UNSPEC,
+       OVS_NAT_ATTR_TYPE,
+       OVS_NAT_ATTR_IP_MIN,
+       OVS_NAT_ATTR_IP_MAX,
+       OVS_NAT_ATTR_PROTO_MIN,
+       OVS_NAT_ATTR_PROTO_MAX,
+       OVS_NAT_ATTR_FLAGS,
+       __OVS_NAT_ATTR_MAX,
+};
+
+#define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1)
+
+/**
  * enum ovs_action_attr - Action types.
  *
  * @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
@@ -623,6 +674,8 @@ enum ovs_conntrack_attr {
  * %ETH_P_MPLS if the resulting MPLS label stack is not empty.  If there
  * is no MPLS label stack, as determined by ethertype, no action is taken.
  * @OVS_ACTION_ATTR_CONNTRACK: Track the connection.
+ * @OVS_ACTION_ATTR_NAT: Perform L3 network address translation (NAT) on
+ * the packet using the Netfilter subsystem.
  *
  * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
  * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -646,6 +699,7 @@ enum ovs_action_attr {
                                       * The data must be zero for the unmasked
                                       * bits. */
        OVS_ACTION_ATTR_CONNTRACK,    /* One nested OVS_CT_ATTR_* */
+       OVS_ACTION_ATTR_NAT,          /* Nested OVS_NAT_ATTR_* */
        __OVS_ACTION_ATTR_MAX
 };
 
-- 
1.9.3

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to