WIP --- datapath/actions.c | 95 +++++++++++++++ datapath/flow.h | 6 + datapath/flow_netlink.c | 139 ++++++++++++++++++++++ datapath/linux/compat/include/linux/openvswitch.h | 54 +++++++++ 4 files changed, 294 insertions(+)
diff --git a/datapath/actions.c b/datapath/actions.c index 05b465c..b3196b7 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -33,6 +33,7 @@ #include <net/checksum.h> #include <net/dsfield.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_nat_core.h> #include <net/sctp/checksum.h> #include "datapath.h" @@ -782,6 +783,96 @@ static int conntrack(struct datapath *dp, struct sk_buff *skb, return 0; } +static int ovs_nat_handle_ct_new(struct nf_conn *ct, struct ovs_nat_info *info) +{ + int err; + + /* Seen it before? This can happen for loopback, retrans, + * or local packets. + */ + if (nf_nat_initialized(ct, info->type)) + return 0; + + if (info->range.flags & NF_NAT_RANGE_MAP_IPS) { + /* Action is set up to establish a new mapping */ + err = nf_nat_setup_info(ct, &info->range, info->type); + } else { + /* Force range to this IP; let proto decide mapping for + * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). + * Use reply in case it's already been mangled (eg local + * packet). + */ + union nf_inet_addr ip = + (info->type == NF_NAT_MANIP_SRC ? + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 : + ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3); + + struct nf_nat_range range = { + .flags = NF_NAT_RANGE_MAP_IPS, + .min_addr = ip, + .max_addr = ip, + }; + + err = nf_nat_setup_info(ct, &range, info->type); + } + + return err; +} + +static int ovs_nat(struct sk_buff *skb, struct ovs_nat_info *info) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn_nat *nat; + struct nf_conn *ct; + int hooknum, nh_off, err; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct || nf_ct_is_untracked(ct)) { + WARN(1, "NAT: Untracked packet"); + /* FIXME: Bump counter? */ + return 0; + } + + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return 0; + + nh_off = skb_network_offset(skb); + skb_pull(skb, nh_off); + /* FIXME: COW */ + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: + /* FIXME: Handle ICMP, see nf_nat_ipv4_fn() */ + + /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ + case IP_CT_NEW: + if (ovs_nat_handle_ct_new(ct, info) != NF_ACCEPT) { + err = -EINVAL; + goto push; + } + break; + + default: + WARN_ON(ctinfo != IP_CT_ESTABLISHED && + ctinfo != IP_CT_ESTABLISHED_REPLY); + err = -EINVAL; + goto push; + } + + if (info->type == NF_NAT_MANIP_SRC) + hooknum = NF_INET_LOCAL_IN; + else + hooknum = NF_INET_LOCAL_OUT; + + err = nf_nat_packet(ct, ctinfo, hooknum, skb); +push: + skb_push(skb, nh_off); + + return err; +} + static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key, const struct nlattr *nested_attr) { @@ -956,6 +1047,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_CONNTRACK: err = conntrack(dp, skb, key, nla_data(a)); break; + + case OVS_ACTION_ATTR_NAT: + err = ovs_nat(skb, nla_data(a)); + break; } if (unlikely(err)) { diff --git a/datapath/flow.h b/datapath/flow.h index ce74958..5f2c0bb 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -35,6 +35,7 @@ #include <net/inet_ecn.h> #include <net/ip_tunnels.h> #include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_nat.h> struct sk_buff; @@ -69,6 +70,11 @@ struct ovs_tunnel_info { FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \ opt_len) +struct ovs_nat_info { + __u32 type; + struct nf_nat_range range; +}; + static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, __be32 saddr, __be32 daddr, u8 tos, u8 ttl, diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index 75dc87f..ebf7bd7 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -43,6 +43,7 @@ #include <linux/icmp.h> #include <linux/icmpv6.h> #include <linux/rculist.h> +#include <linux/netfilter/nf_nat.h> #include <net/geneve.h> #include <net/ip.h> #include <net/ip_tunnels.h> @@ -1806,6 +1807,106 @@ static int validate_userspace(const struct nlattr *attr) return 0; } +static int validate_and_copy_nat(const struct nlattr *attr, + const struct sw_flow_key *key, + struct sw_flow_actions **sfa) +{ + struct ovs_nat_info nat_info; + struct nlattr *a; + int rem; + + BUILD_BUG_ON(OVS_NAT_FLAG_PROTO_RAND != NF_NAT_RANGE_PROTO_RANDOM); + BUILD_BUG_ON(OVS_NAT_FLAG_PERSISTENT != NF_NAT_RANGE_PERSISTENT); + BUILD_BUG_ON(OVS_NAT_FLAG_PROTO_FULL_RAND != NF_NAT_RANGE_PROTO_RANDOM_FULLY); + + memset(&nat_info, 0, sizeof(nat_info)); + + nla_for_each_nested(a, attr, rem) { + static const u32 ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1] = { + [OVS_NAT_ATTR_TYPE] = sizeof(u32), + [OVS_NAT_ATTR_IP_MIN] = -1, + [OVS_NAT_ATTR_IP_MAX] = -1, + [OVS_NAT_ATTR_PROTO_MIN] = sizeof(u16), + [OVS_NAT_ATTR_PROTO_MAX] = sizeof(u16), + [OVS_NAT_ATTR_FLAGS] = sizeof(u32), + }; + int type = nla_type(a); + + if (type > OVS_NAT_ATTR_MAX) { + OVS_NLERR("Unknown nat attribute (type=%d, max=%d).\n", + type, OVS_NAT_ATTR_MAX); + return -EINVAL; + } + + if (ovs_nat_attr_lens[type] != nla_len(a) && + ovs_nat_attr_lens[type] != -1) { + OVS_NLERR("NAT attribute type has unexpected " + " length (type=%d, length=%d, expected=%d).\n", + type, nla_len(a), ovs_nat_attr_lens[type]); + return -EINVAL; + } + + switch (type) { + case OVS_NAT_ATTR_TYPE: + nat_info.type = nla_get_u32(a); + if (nat_info.type > OVS_NAT_TYPE_MAX) { + OVS_NLERR("NAT type %d out of range 0..%d\n", + nat_info.type, OVS_NAT_TYPE_MAX); + return -ERANGE; + } + break; + + case OVS_NAT_ATTR_IP_MIN: + if (nla_len(a) != sizeof(struct in_addr) && + nla_len(a) != sizeof(struct in6_addr)) { + return -ERANGE; + } + + nla_memcpy(&nat_info.range.min_addr, a, + sizeof(nat_info.range.min_addr)); + nat_info.range.flags |= NF_NAT_RANGE_MAP_IPS; + break; + + case OVS_NAT_ATTR_IP_MAX: + if (nla_len(a) != sizeof(struct in_addr) && + nla_len(a) != sizeof(struct in6_addr)) { + return -ERANGE; + } + + nla_memcpy(&nat_info.range.min_addr, a, + sizeof(nat_info.range.min_addr)); + nat_info.range.flags |= NF_NAT_RANGE_MAP_IPS; + break; + + case OVS_NAT_ATTR_PROTO_MIN: + nat_info.range.min_proto.all = nla_get_u16(a); + nat_info.range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + break; + + case OVS_NAT_ATTR_PROTO_MAX: + nat_info.range.max_proto.all = nla_get_u16(a); + nat_info.range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + break; + + case OVS_NAT_ATTR_FLAGS: + nat_info.range.flags |= (nla_get_u32(a) | OVS_NAT_FLAGS); + break; + + default: + OVS_NLERR("Unknown nat attribute (%d).\n", type); + return -EINVAL; + } + } + + if (rem > 0) { + OVS_NLERR("NAT attribute has %d unknown bytes.\n", rem); + return -EINVAL; + } + + return add_action(sfa, OVS_ACTION_ATTR_NAT, &nat_info, + sizeof(nat_info)); +} + static int copy_action(const struct nlattr *from, struct sw_flow_actions **sfa) { @@ -1845,6 +1946,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), [OVS_ACTION_ATTR_CONNTRACK] = (u32)-1, + [OVS_ACTION_ATTR_NAT] = (u32)-1, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -1957,6 +2059,13 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, skip_copy = true; break; + case OVS_ACTION_ATTR_NAT: + err = validate_and_copy_nat(a, key, sfa); + if (err) + return err; + skip_copy = true; + break; + default: return -EINVAL; } @@ -2079,6 +2188,30 @@ static int conntrack_action_to_attr(const struct nlattr *attr, struct sk_buff *s return 0; } +static int nat_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) +{ + struct ovs_nat_info *info; + struct nlattr *start; + + start = nla_nest_start(skb, OVS_ACTION_ATTR_NAT); + if (!start) + return -EMSGSIZE; + + info = nla_data(attr); + + if (nla_put_u32(skb, OVS_NAT_ATTR_TYPE, info->type) || + nla_put_u32(skb, OVS_NAT_ATTR_IP_MIN, info->range.min_addr.ip) || + nla_put_u32(skb, OVS_NAT_ATTR_IP_MAX, info->range.max_addr.ip) || + nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MIN, info->range.min_proto.all) || + nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MAX, info->range.max_proto.all) || + nla_put_u32(skb, OVS_NAT_ATTR_FLAGS, info->range.flags | OVS_NAT_FLAGS)) + return -EMSGSIZE; + + nla_nest_end(skb, start); + + return 0; +} + int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) { const struct nlattr *a; @@ -2106,6 +2239,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) return err; break; + case OVS_ACTION_ATTR_NAT: + err = nat_action_to_attr(a, skb); + if (err) + return err; + break; + default: if (nla_put(skb, type, nla_len(a), nla_data(a))) return -EMSGSIZE; diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h index f3654de..b65efc6 100644 --- a/datapath/linux/compat/include/linux/openvswitch.h +++ b/datapath/linux/compat/include/linux/openvswitch.h @@ -592,6 +592,57 @@ enum ovs_conntrack_attr { #define OVS_CT_ATTR_MAX (__OVS_CT_ATTR_MAX - 1) /** + * enum ovs_nat_type - Supported NAT modes + */ +enum ovs_nat_type { + OVS_NAT_TYPE_SRC, + OVS_NAT_TYPE_DST, + __OVS_NAT_TYPE_MAX, +}; + +#define OVS_NAT_TYPE_MAX (__OVS_NAT_TYPE_MAX - 1) + +/** + * enum ovs_nat_flag - Supported NAT flags + * @OVS_NAT_FLAG_PROTO_RAND: Pseudo random hash based L4 port mapping (MD5) + * @OVS_NAT_FLAG_PERSISTENT: Persistent IP mapping across reboots + * @OVS_NAT_FLAG_PROTO_FULL_RAND: Fully randomized L4 port mapping + * + * NOTE: The flags values must be compatible with NF_NAT_RANGE_* in + * <linux/netfilter/nf_nat.h>. + */ +enum ovs_nat_flag { + OVS_NAT_FLAG_PROTO_RAND = 0x4, + OVS_NAT_FLAG_PERSISTENT = 0x8, + OVS_NAT_FLAG_PROTO_FULL_RAND = 0x10, +}; + +#define OVS_NAT_FLAGS (OVS_NAT_FLAG_PROTO_RAND | OVS_NAT_FLAG_PERSISTENT | \ + OVS_NAT_FLAG_PROTO_FULL_RAND) + +/** + * enum ovs_nat_attr - Attributes for %OVS_ACTION_ATTR_NAT action. + * @OVS_NAT_ATTR_TYPE: u32 NAT type (enum ovs_nat_type) + * @OVS_NAT_ATTR_IP_MIN: struct in_addr or struct in6_addr + * @OVS_NAT_ATTR_IP_MAX: struct in_addr or struct in6_addr + * @OVS_NAT_ATTR_PROTO_MIN: u16 L4 protocol specific lower boundary (port) + * @OVS_NAT_ATTR_PROTO_MAX: u16 L4 protocol specific upper boundary (port) + * @OVS_NAT_ATTR_FLAGS: u32 NAT flags (OVS_NAT_FLAG_*) + */ +enum ovs_nat_attr { + OVS_NAT_ATTR_UNSPEC, + OVS_NAT_ATTR_TYPE, + OVS_NAT_ATTR_IP_MIN, + OVS_NAT_ATTR_IP_MAX, + OVS_NAT_ATTR_PROTO_MIN, + OVS_NAT_ATTR_PROTO_MAX, + OVS_NAT_ATTR_FLAGS, + __OVS_NAT_ATTR_MAX, +}; + +#define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1) + +/** * enum ovs_action_attr - Action types. * * @OVS_ACTION_ATTR_OUTPUT: Output packet to port. @@ -623,6 +674,8 @@ enum ovs_conntrack_attr { * %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there * is no MPLS label stack, as determined by ethertype, no action is taken. * @OVS_ACTION_ATTR_CONNTRACK: Track the connection. + * @OVS_ACTION_ATTR_NAT: Perform L3 network address translation (NAT) on + * the packet using the Netfilter subsystem. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -646,6 +699,7 @@ enum ovs_action_attr { * The data must be zero for the unmasked * bits. */ OVS_ACTION_ATTR_CONNTRACK, /* One nested OVS_CT_ATTR_* */ + OVS_ACTION_ATTR_NAT, /* Nested OVS_NAT_ATTR_* */ __OVS_ACTION_ATTR_MAX }; -- 1.9.3 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev