This module uses the netlink interface provide by the Linux kernel connection tracker to provide some visibility into the conntrack tables.
The module provides functions to: * Convert a netlink representation of a connection into a struct 'ct_dpif_entry'. * Dump all the connections. * Flush all the connections. * Listen for updates by registering a netlink notifier. It will be used by dpif-netlink to implement the interface required by the ct-dpif module. Based on original work by Jarno Rajahalme Signed-off-by: Jarno Rajahalme <jrajaha...@nicira.com> Signed-off-by: Daniele Di Proietto <diproiet...@vmware.com> --- lib/automake.mk | 2 + lib/netlink-conntrack.c | 829 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/netlink-conntrack.h | 51 +++ 3 files changed, 882 insertions(+) create mode 100644 lib/netlink-conntrack.c create mode 100644 lib/netlink-conntrack.h diff --git a/lib/automake.mk b/lib/automake.mk index 1986a31..6a20e55 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -345,6 +345,8 @@ lib_libopenvswitch_la_SOURCES += \ lib/if-notifier.h \ lib/netdev-linux.c \ lib/netdev-linux.h \ + lib/netlink-conntrack.c \ + lib/netlink-conntrack.h \ lib/netlink-notifier.c \ lib/netlink-notifier.h \ lib/netlink-protocol.h \ diff --git a/lib/netlink-conntrack.c b/lib/netlink-conntrack.c new file mode 100644 index 0000000..b9b593c --- /dev/null +++ b/lib/netlink-conntrack.c @@ -0,0 +1,829 @@ +/* + * Copyright (c) 2015 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> + +#include "netlink-conntrack.h" + +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_conntrack.h> +#include <linux/netfilter/nf_conntrack_common.h> +#include <linux/netfilter/nf_conntrack_tcp.h> +#include <linux/netfilter/nf_conntrack_ftp.h> +#include <linux/netfilter/nf_conntrack_sctp.h> + +#include "byte-order.h" +#include "compiler.h" +#include "dynamic-string.h" +#include "netlink.h" +#include "netlink-socket.h" +#include "ofpbuf.h" +#include "openvswitch/vlog.h" +#include "poll-loop.h" +#include "timeval.h" +#include "unixctl.h" +#include "util.h" + +VLOG_DEFINE_THIS_MODULE(netlink_conntrack); +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + +/* This module works only if conntrack modules and features are enabled in the + * Linux kernel. This can be done from a root shell like this: + * + * $ modprobe ip_conntrack + * $ sysctl -w net.netfilter.nf_conntrack_acct=1 + * $ sysctl -w net.netfilter.nf_conntrack_timestamp=1 + * + * Also, if testing conntrack label feature without conntrack-aware OVS kernel + * module, there must be a connlabel rule in iptables for space to be reserved + * for the labels (see kernel source connlabel_mt_check()). Such a rule can be + * inserted from a root shell like this: + * + * $ iptables -A INPUT -m conntrack -m connlabel \ + * --ctstate NEW,ESTABLISHED,RELATED --label 127 -j ACCEPT + */ + +/* Some attributes were introduced in later kernels: with these + * definitions we should be able to compile up to 2.6.32. */ + +#define CTA_ZONE (CTA_SECMARK + 1) +#define CTA_SECCTX (CTA_SECMARK + 2) +#define CTA_TIMESTAMP (CTA_SECMARK + 3) +#define CTA_MARK_MASK (CTA_SECMARK + 4) +#define CTA_LABELS (CTA_SECMARK + 5) +#define CTA_LABELS_MASK (CTA_SECMARK + 6) + +#define CTA_TIMESTAMP_START 1 +#define CTA_TIMESTAMP_STOP 2 + +#define IPS_TEMPLATE_BIT 11 +#define IPS_TEMPLATE (1 << IPS_TEMPLATE_BIT) + +#define IPS_UNTRACKED_BIT 12 +#define IPS_UNTRACKED (1 << IPS_UNTRACKED_BIT) + +static const struct nl_policy nfnlgrp_conntrack_policy[] = { + [CTA_TUPLE_ORIG] = { .type = NL_A_NESTED, .optional = false }, + [CTA_TUPLE_REPLY] = { .type = NL_A_NESTED, .optional = false }, + [CTA_ZONE] = { .type = NL_A_BE16, .optional = true }, + [CTA_STATUS] = { .type = NL_A_BE32, .optional = false }, + [CTA_TIMESTAMP] = { .type = NL_A_NESTED, .optional = true }, + [CTA_TIMEOUT] = { .type = NL_A_BE32, .optional = true }, + [CTA_COUNTERS_ORIG] = { .type = NL_A_NESTED, .optional = true }, + [CTA_COUNTERS_REPLY] = { .type = NL_A_NESTED, .optional = true }, + [CTA_PROTOINFO] = { .type = NL_A_NESTED, .optional = true }, + [CTA_HELP] = { .type = NL_A_NESTED, .optional = true }, + [CTA_MARK] = { .type = NL_A_BE32, .optional = true }, + [CTA_SECCTX] = { .type = NL_A_NESTED, .optional = true }, + [CTA_ID] = { .type = NL_A_BE32, .optional = false }, + [CTA_USE] = { .type = NL_A_BE32, .optional = true }, + [CTA_TUPLE_MASTER] = { .type = NL_A_NESTED, .optional = true }, + [CTA_NAT_SEQ_ADJ_ORIG] = { .type = NL_A_NESTED, .optional = true }, + [CTA_NAT_SEQ_ADJ_REPLY] = { .type = NL_A_NESTED, .optional = true }, + [CTA_LABELS] = { .type = NL_A_UNSPEC, .optional = true }, + /* CTA_NAT_SRC, CTA_NAT_DST, CTA_TIMESTAMP, CTA_MARK_MASK, and + * CTA_LABELS_MASK are not received from kernel. */ +}; + +/* Declarations for conntrack netlink dumping. */ +static void nl_msg_put_nfgenmsg(struct ofpbuf *msg, size_t expected_payload, + int family, uint8_t subsystem, uint8_t cmd, + uint32_t flags); + +static bool nl_ct_parse_header_policy(struct ofpbuf *buf, + enum nl_ct_event_type *event_type, + uint8_t *nfgen_family, + struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]); + +static bool nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry, + struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)], + uint8_t nfgen_family); + +struct nl_ct_dump_state { + struct nl_dump dump; + struct ofpbuf buf; + bool filter_zone; + uint16_t zone; +}; + +/* Conntrack netlink dumping. */ + +/* Initialize a conntrack netlink dump. */ +int +nl_ct_dump_start(struct nl_ct_dump_state **statep, const uint16_t *zone) +{ + struct nl_ct_dump_state *state; + + *statep = state = xzalloc(sizeof *state); + ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE); + + if (zone) { + state->filter_zone = true; + state->zone = *zone; + } + + nl_msg_put_nfgenmsg(&state->buf, 0, 0, NFNL_SUBSYS_CTNETLINK, + IPCTNL_MSG_CT_GET, NLM_F_REQUEST); + nl_dump_start(&state->dump, NETLINK_NETFILTER, &state->buf); + ofpbuf_clear(&state->buf); + + return 0; +} + +/* Receive the next 'entry' from the conntrack netlink dump with 'state'. + * Returns 'EOF' when no more entries are available, 0 otherwise. 'entry' may + * be uninitilized memory on entry, and must be uninitialized with + * ct_dpif_entry_uninit() afterwards by the caller. In case the same 'entry' is + * passed to this function again, the entry must also be uninitialized before + * the next call. */ +int +nl_ct_dump_next(struct nl_ct_dump_state *state, struct ct_dpif_entry *entry) +{ + struct ofpbuf buf; + + memset(entry, 0, sizeof *entry); + for (;;) { + struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]; + enum nl_ct_event_type type; + uint8_t nfgen_family; + + if (!nl_dump_next(&state->dump, &buf, &state->buf)) { + return EOF; + } + + if (!nl_ct_parse_header_policy(&buf, &type, &nfgen_family, attrs)) { + continue; + }; + + if (state->filter_zone) { + uint16_t entry_zone = attrs[CTA_ZONE] + ? ntohs(nl_attr_get_be16(attrs[CTA_ZONE])) + : 0; + if (entry_zone != state->zone) { + continue; + } + } + + if (nl_ct_attrs_to_ct_dpif_entry(entry, attrs, nfgen_family)) { + break; + } + + ct_dpif_entry_uninit(entry); + memset(entry, 0, sizeof *entry); + /* Ignore the failed entry and get the next one. */ + } + + ofpbuf_uninit(&buf); + return 0; +} + +/* End a conntrack netlink dump. */ +int +nl_ct_dump_done(struct nl_ct_dump_state *state) +{ + int error = nl_dump_done(&state->dump); + + ofpbuf_uninit(&state->buf); + free(state); + return error; +} + +/* Format conntrack event 'entry' of 'type' to 'ds'. */ +void +nl_ct_format_event_entry(const struct ct_dpif_entry *entry, + enum nl_ct_event_type type, struct ds *ds, + bool verbose, bool print_stats) +{ + ds_put_format(ds, "%s ", + type == NL_CT_EVENT_NEW ? "NEW" + : type == NL_CT_EVENT_UPDATE ? "UPDATE" + : type == NL_CT_EVENT_DELETE ? "DELETE" + : "UNKNOWN"); + ct_dpif_format_entry(entry, ds, verbose, print_stats); +} + +int +nl_ct_flush(void) +{ + struct ofpbuf buf; + int err; + + ofpbuf_init(&buf, NL_DUMP_BUFSIZE); + + nl_msg_put_nfgenmsg(&buf, 0, 0, NFNL_SUBSYS_CTNETLINK, + IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST); + + err = nl_transact(NETLINK_NETFILTER, &buf, NULL); + ofpbuf_uninit(&buf); + + /* Expectations are flushed automatically, because they do not + * have a master connection anymore */ + + return err; +} + +int +nl_ct_flush_zone(uint16_t flush_zone) +{ + /* Apparently, there's no netlink interface to flush a specific zone. + * This code dumps every connection, checks the zone and eventually + * delete the entry. + * + * This is race-prone, but it is better than using shell scripts. */ + + struct nl_dump dump; + struct ofpbuf buf, reply, delete; + + ofpbuf_init(&buf, NL_DUMP_BUFSIZE); + ofpbuf_init(&delete, NL_DUMP_BUFSIZE); + + nl_msg_put_nfgenmsg(&buf, 0, 0, NFNL_SUBSYS_CTNETLINK, + IPCTNL_MSG_CT_GET, NLM_F_REQUEST); + nl_dump_start(&dump, NETLINK_NETFILTER, &buf); + ofpbuf_clear(&buf); + + for (;;) { + struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]; + enum nl_ct_event_type event_type; + uint8_t nfgen_family; + uint16_t zone = 0; + + if (!nl_dump_next(&dump, &reply, &buf)) { + break; + } + + if (!nl_ct_parse_header_policy(&reply, &event_type, &nfgen_family, + attrs)) { + continue; + }; + + if (attrs[CTA_ZONE]) { + zone = ntohs(nl_attr_get_be16(attrs[CTA_ZONE])); + } + + if (zone != flush_zone) { + /* The entry is not in the zone we're flushing. */ + continue; + } + nl_msg_put_nfgenmsg(&delete, 0, nfgen_family, NFNL_SUBSYS_CTNETLINK, + IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST); + + nl_msg_put_be16(&delete, CTA_ZONE, htons(zone)); + nl_msg_put_unspec(&delete, CTA_TUPLE_ORIG, attrs[CTA_TUPLE_ORIG] + 1, + attrs[CTA_TUPLE_ORIG]->nla_len - NLA_HDRLEN); + nl_msg_put_unspec(&delete, CTA_ID, attrs[CTA_ID] + 1, + attrs[CTA_ID]->nla_len - NLA_HDRLEN); + nl_transact(NETLINK_NETFILTER, &delete, NULL); + ofpbuf_clear(&delete); + } + + nl_dump_done(&dump); + + ofpbuf_uninit(&delete); + ofpbuf_uninit(&buf); + + /* Expectations are flushed automatically, because they do not + * have a master connection anymore */ + return 0; +} + +/* Conntrack netlink parsing. */ + +static bool +nl_ct_parse_counters(struct nlattr *nla, struct ct_dpif_counters *counters) +{ + static const struct nl_policy policy[] = { + [CTA_COUNTERS_PACKETS] = { .type = NL_A_BE64, .optional = false }, + [CTA_COUNTERS_BYTES] = { .type = NL_A_BE64, .optional = false }, + }; + struct nlattr *attrs[ARRAY_SIZE(policy)]; + bool parsed; + + parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); + + if (parsed) { + counters->packets + = ntohll(nl_attr_get_be64(attrs[CTA_COUNTERS_PACKETS])); + counters->bytes = ntohll(nl_attr_get_be64(attrs[CTA_COUNTERS_BYTES])); + } else { + VLOG_ERR_RL(&rl, "Could not parse nested counters. " + "Possibly incompatible Linux kernel version."); + } + + return parsed; +} + +static bool +nl_ct_parse_timestamp(struct nlattr *nla, struct ct_dpif_timestamp *timestamp) +{ + static const struct nl_policy policy[] = { + [CTA_TIMESTAMP_START] = { .type = NL_A_BE64, .optional = false }, + [CTA_TIMESTAMP_STOP] = { .type = NL_A_BE64, .optional = true }, + }; + struct nlattr *attrs[ARRAY_SIZE(policy)]; + bool parsed; + + parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); + + if (parsed) { + timestamp->start + = ntohll(nl_attr_get_be64(attrs[CTA_TIMESTAMP_START])); + if (attrs[CTA_TIMESTAMP_STOP]) { + timestamp->stop + = ntohll(nl_attr_get_be64(attrs[CTA_TIMESTAMP_STOP])); + } + } else { + VLOG_ERR_RL(&rl, "Could not parse nested timestamp. " + "Possibly incompatible Linux kernel version."); + } + + return parsed; +} + +static bool +nl_ct_parse_tuple_ip(struct nlattr *nla, struct ct_dpif_tuple *tuple) +{ + static const struct nl_policy policy[] = { + [CTA_IP_V4_SRC] = { .type = NL_A_BE32, .optional = true }, + [CTA_IP_V4_DST] = { .type = NL_A_BE32, .optional = true }, + [CTA_IP_V6_SRC] = { NL_POLICY_FOR(struct in6_addr), .optional = true }, + [CTA_IP_V6_DST] = { NL_POLICY_FOR(struct in6_addr), .optional = true }, + }; + struct nlattr *attrs[ARRAY_SIZE(policy)]; + bool parsed; + + parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); + + if (parsed) { + if (tuple->l3_type == AF_INET) { + if (attrs[CTA_IP_V4_SRC]) { + tuple->src.ip = nl_attr_get_be32(attrs[CTA_IP_V4_SRC]); + } + if (attrs[CTA_IP_V4_DST]) { + tuple->dst.ip = nl_attr_get_be32(attrs[CTA_IP_V4_DST]); + } + } else if (tuple->l3_type == AF_INET6) { + if (attrs[CTA_IP_V6_SRC]) { + memcpy(&tuple->src.in6, nl_attr_get(attrs[CTA_IP_V6_SRC]), + sizeof tuple->src.in6); + } + if (attrs[CTA_IP_V6_DST]) { + memcpy(&tuple->dst.in6, nl_attr_get(attrs[CTA_IP_V6_DST]), + sizeof tuple->dst.in6); + } + } else { + VLOG_WARN_RL(&rl, "Unsupported IP protocol: %u.", tuple->l3_type); + return false; + } + } else { + VLOG_ERR_RL(&rl, "Could not parse nested tuple IP options. " + "Possibly incompatible Linux kernel version."); + } + + return parsed; +} + +static bool +nl_ct_parse_tuple_proto(struct nlattr *nla, struct ct_dpif_tuple *tuple) +{ + static const struct nl_policy policy[] = { + [CTA_PROTO_NUM] = { .type = NL_A_U8, .optional = false }, + [CTA_PROTO_SRC_PORT] = { .type = NL_A_BE16, .optional = true }, + [CTA_PROTO_DST_PORT] = { .type = NL_A_BE16, .optional = true }, + [CTA_PROTO_ICMP_ID] = { .type = NL_A_BE16, .optional = true }, + [CTA_PROTO_ICMP_TYPE] = { .type = NL_A_U8, .optional = true }, + [CTA_PROTO_ICMP_CODE] = { .type = NL_A_U8, .optional = true }, + [CTA_PROTO_ICMPV6_ID] = { .type = NL_A_BE16, .optional = true }, + [CTA_PROTO_ICMPV6_TYPE] = { .type = NL_A_U8, .optional = true }, + [CTA_PROTO_ICMPV6_CODE] = { .type = NL_A_U8, .optional = true }, + }; + struct nlattr *attrs[ARRAY_SIZE(policy)]; + bool parsed; + + parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); + + if (parsed) { + tuple->ip_proto = nl_attr_get_u8(attrs[CTA_PROTO_NUM]); + + if (tuple->l3_type == AF_INET && tuple->ip_proto == IPPROTO_ICMP) { + if (!attrs[CTA_PROTO_ICMP_ID] || !attrs[CTA_PROTO_ICMP_TYPE] + || !attrs[CTA_PROTO_ICMP_CODE]) { + VLOG_ERR_RL(&rl, "Tuple ICMP data missing."); + return false; + } + tuple->icmp_id = nl_attr_get_be16(attrs[CTA_PROTO_ICMP_ID]); + tuple->icmp_type = nl_attr_get_u8(attrs[CTA_PROTO_ICMP_TYPE]); + tuple->icmp_code = nl_attr_get_u8(attrs[CTA_PROTO_ICMP_CODE]); + } else if (tuple->l3_type == AF_INET6 && + tuple->ip_proto == IPPROTO_ICMPV6) { + if (!attrs[CTA_PROTO_ICMPV6_ID] || !attrs[CTA_PROTO_ICMPV6_TYPE] + || !attrs[CTA_PROTO_ICMPV6_CODE]) { + VLOG_ERR_RL(&rl, "Tuple ICMPv6 data missing."); + return false; + } + tuple->icmp_id = nl_attr_get_be16(attrs[CTA_PROTO_ICMPV6_ID]); + tuple->icmp_type = nl_attr_get_u8(attrs[CTA_PROTO_ICMPV6_TYPE]); + tuple->icmp_code = nl_attr_get_u8(attrs[CTA_PROTO_ICMPV6_CODE]); + } else if (attrs[CTA_PROTO_SRC_PORT] && attrs[CTA_PROTO_DST_PORT]) { + tuple->src_port = nl_attr_get_be16(attrs[CTA_PROTO_SRC_PORT]); + tuple->dst_port = nl_attr_get_be16(attrs[CTA_PROTO_DST_PORT]); + } else { + /* Unsupported IPPROTO and no ports, leave them zeroed. + * We have parsed the ip_proto, so this is not a total failure. */ + VLOG_INFO_RL(&rl, "Unsupported L4 protocol: %u.", tuple->ip_proto); + } + } else { + VLOG_ERR_RL(&rl, "Could not parse nested tuple protocol options. " + "Possibly incompatible Linux kernel version."); + } + + return parsed; +} + +static bool +nl_ct_parse_tuple(struct nlattr *nla, struct ct_dpif_tuple *tuple, + uint16_t l3_type) +{ + static const struct nl_policy policy[] = { + [CTA_TUPLE_IP] = { .type = NL_A_NESTED, .optional = false }, + [CTA_TUPLE_PROTO] = { .type = NL_A_NESTED, .optional = false }, + }; + struct nlattr *attrs[ARRAY_SIZE(policy)]; + bool parsed; + + parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); + + memset(tuple, 0, sizeof *tuple); + + if (parsed) { + tuple->l3_type = l3_type; + + if (!nl_ct_parse_tuple_ip(attrs[CTA_TUPLE_IP], tuple) + || !nl_ct_parse_tuple_proto(attrs[CTA_TUPLE_PROTO], tuple)) { + struct ds ds; + + ds_init(&ds); + ct_dpif_format_tuple(&ds, tuple, true); + + VLOG_ERR_RL(&rl, "Failed tuple: %s", ds_cstr(&ds)); + ds_destroy(&ds); + + memset(tuple, 0, sizeof *tuple); + return false; + } + } else { + VLOG_ERR_RL(&rl, "Could not parse nested tuple options. " + "Possibly incompatible Linux kernel version."); + } + + return parsed; +} + +/* Translate netlink TCP state to CT_DPIF_TCP state. */ +static uint8_t +nl_ct_tcp_state_to_dpif(uint8_t state) +{ + switch (state) { + case TCP_CONNTRACK_NONE: + return CT_DPIF_TCPS_CLOSED; + case TCP_CONNTRACK_SYN_SENT: + return CT_DPIF_TCPS_SYN_SENT; + case TCP_CONNTRACK_SYN_SENT2: + return CT_DPIF_TCPS_SYN_SENT; + case TCP_CONNTRACK_SYN_RECV: + return CT_DPIF_TCPS_SYN_RECV; + case TCP_CONNTRACK_ESTABLISHED: + return CT_DPIF_TCPS_ESTABLISHED; + case TCP_CONNTRACK_FIN_WAIT: + return CT_DPIF_TCPS_FIN_WAIT_1; + case TCP_CONNTRACK_CLOSE_WAIT: + return CT_DPIF_TCPS_CLOSE_WAIT; + case TCP_CONNTRACK_LAST_ACK: + return CT_DPIF_TCPS_LAST_ACK; + case TCP_CONNTRACK_TIME_WAIT: + return CT_DPIF_TCPS_TIME_WAIT; + case TCP_CONNTRACK_CLOSE: + return CT_DPIF_TCPS_CLOSING; + default: + return CT_DPIF_TCPS_CLOSED; + } +} + +static uint8_t +ip_ct_tcp_flags_to_dpif(uint8_t flags) +{ + uint8_t ret = 0; +#define CT_DPIF_TCP_FLAG(FLAG) \ + ret |= (flags & IP_CT_TCP_FLAG_##FLAG) ? CT_DPIF_TCPF_##FLAG : 0; + CT_DPIF_TCP_FLAGS +#undef CT_DPIF_STATUS_FLAG + return ret; +} + +static bool +nl_ct_parse_protoinfo_tcp(struct nlattr *nla, + struct ct_dpif_protoinfo *protoinfo) +{ + static const struct nl_policy policy[] = { + [CTA_PROTOINFO_TCP_STATE] = { .type = NL_A_U8, .optional = false }, + [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NL_A_U8, + .optional = false }, + [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NL_A_U8, + .optional = false }, + [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .type = NL_A_U16, + .optional = false }, + [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .type = NL_A_U16, + .optional = false }, + }; + struct nlattr *attrs[ARRAY_SIZE(policy)]; + bool parsed; + + parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); + + if (parsed) { + const struct nf_ct_tcp_flags *flags_orig, *flags_reply; + uint8_t state; + protoinfo->proto = IPPROTO_TCP; + state = nl_ct_tcp_state_to_dpif( + nl_attr_get_u8(attrs[CTA_PROTOINFO_TCP_STATE])); + /* The connection tracker keeps only one tcp state for the + * connection, but our structures store a separate state for + * each endpoint. Here we duplicate the state. */ + protoinfo->tcp.state_orig = protoinfo->tcp.state_reply = state; + protoinfo->tcp.wscale_orig = nl_attr_get_u8( + attrs[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]); + protoinfo->tcp.wscale_reply = nl_attr_get_u8( + attrs[CTA_PROTOINFO_TCP_WSCALE_REPLY]); + flags_orig = + nl_attr_get_unspec(attrs[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL], + sizeof *flags_orig); + protoinfo->tcp.flags_orig = + ip_ct_tcp_flags_to_dpif(flags_orig->flags); + flags_reply = + nl_attr_get_unspec(attrs[CTA_PROTOINFO_TCP_FLAGS_REPLY], + sizeof *flags_reply); + protoinfo->tcp.flags_reply = + ip_ct_tcp_flags_to_dpif(flags_reply->flags); + } else { + VLOG_ERR_RL(&rl, "Could not parse nested TCP protoinfo options. " + "Possibly incompatible Linux kernel version."); + } + + return parsed; +} + +static bool +nl_ct_parse_protoinfo(struct nlattr *nla, struct ct_dpif_protoinfo *protoinfo) +{ + /* These are mutually exclusive. */ + static const struct nl_policy policy[] = { + [CTA_PROTOINFO_TCP] = { .type = NL_A_NESTED, .optional = true }, + [CTA_PROTOINFO_SCTP] = { .type = NL_A_NESTED, .optional = true }, + }; + struct nlattr *attrs[ARRAY_SIZE(policy)]; + bool parsed; + + parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); + + memset(protoinfo, 0, sizeof *protoinfo); + + if (parsed) { + if (attrs[CTA_PROTOINFO_TCP]) { + parsed = nl_ct_parse_protoinfo_tcp(attrs[CTA_PROTOINFO_TCP], + protoinfo); + } else if (attrs[CTA_PROTOINFO_SCTP]) { + VLOG_WARN_RL(&rl, "SCTP protoinfo not yet supported!"); + } else { + VLOG_WARN_RL(&rl, "Empty protoinfo!"); + } + } else { + VLOG_ERR_RL(&rl, "Could not parse nested protoinfo options. " + "Possibly incompatible Linux kernel version."); + } + + return parsed; +} + +static bool +nl_ct_parse_helper(struct nlattr *nla, struct ct_dpif_helper *helper) +{ + static const struct nl_policy policy[] = { + [CTA_HELP_NAME] = { .type = NL_A_STRING, .optional = false }, + }; + struct nlattr *attrs[ARRAY_SIZE(policy)]; + bool parsed; + + parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); + + memset(helper, 0, sizeof *helper); + + if (parsed) { + helper->name = xstrdup(nl_attr_get_string(attrs[CTA_HELP_NAME])); + } else { + VLOG_ERR_RL(&rl, "Could not parse nested helper options. " + "Possibly incompatible Linux kernel version."); + } + + return parsed; +} + +/* Translate netlink entry status flags to CT_DPIF_TCP status flags. */ +static uint32_t +ips_status_to_dpif_flags(uint32_t status) +{ + uint32_t ret = 0; +#define CT_DPIF_STATUS_FLAG(FLAG) \ + ret |= (status & IPS_##FLAG) ? CT_DPIF_STATUS_##FLAG : 0; + CT_DPIF_STATUS_FLAGS +#undef CT_DPIF_STATUS_FLAG + return ret; +} + +static bool +nl_ct_parse_header_policy(struct ofpbuf *buf, + enum nl_ct_event_type *event_type, + uint8_t *nfgen_family, + struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfm; + uint8_t type; + + nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN); + nfm = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *nfm); + if (!nfm) { + VLOG_ERR_RL(&rl, "Received bad nfnl message (no nfgenmsg)."); + return false; + } + if (NFNL_SUBSYS_ID(nlh->nlmsg_type) != NFNL_SUBSYS_CTNETLINK) { + VLOG_ERR_RL(&rl, "Received non-conntrack message (subsystem: %u).", + NFNL_SUBSYS_ID(nlh->nlmsg_type)); + return false; + } + if (nfm->version != NFNETLINK_V0) { + VLOG_ERR_RL(&rl, "Received unsupported nfnetlink version (%u).", + NFNL_MSG_TYPE(nfm->version)); + return false; + } + + if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof *nfm, + nfnlgrp_conntrack_policy, attrs, + ARRAY_SIZE(nfnlgrp_conntrack_policy))) { + VLOG_ERR_RL(&rl, "Received bad nfnl message (policy)."); + return false; + } + + type = NFNL_MSG_TYPE(nlh->nlmsg_type); + *nfgen_family = nfm->nfgen_family; + + switch (type) { + case IPCTNL_MSG_CT_NEW: + *event_type = nlh->nlmsg_flags & NLM_F_CREATE + ? NL_CT_EVENT_NEW : NL_CT_EVENT_UPDATE; + break; + case IPCTNL_MSG_CT_DELETE: + *event_type = NL_CT_EVENT_DELETE; + break; + default: + VLOG_ERR_RL(&rl, "Can't parse conntrack event type."); + return false; + } + + return true; +} + +static bool +nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry, + struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)], + uint8_t nfgen_family) +{ + if (!nl_ct_parse_tuple(attrs[CTA_TUPLE_ORIG], &entry->tuple_orig, + nfgen_family)) { + return false; + } + if (!nl_ct_parse_tuple(attrs[CTA_TUPLE_REPLY], &entry->tuple_reply, + nfgen_family)) { + return false; + } + if (attrs[CTA_COUNTERS_ORIG] && + !nl_ct_parse_counters(attrs[CTA_COUNTERS_ORIG], + &entry->counters_orig)) { + return false; + } + if (attrs[CTA_COUNTERS_REPLY] && + !nl_ct_parse_counters(attrs[CTA_COUNTERS_REPLY], + &entry->counters_reply)) { + return false; + } + if (attrs[CTA_TIMESTAMP] && + !nl_ct_parse_timestamp(attrs[CTA_TIMESTAMP], &entry->timestamp)) { + return false; + } + if (attrs[CTA_ID]) { + entry->id = ntohl(nl_attr_get_be32(attrs[CTA_ID])); + } + if (attrs[CTA_ZONE]) { + entry->zone = ntohs(nl_attr_get_be16(attrs[CTA_ZONE])); + } + if (attrs[CTA_STATUS]) { + entry->status = ips_status_to_dpif_flags( + ntohl(nl_attr_get_be32(attrs[CTA_STATUS]))); + } + if (attrs[CTA_TIMEOUT]) { + entry->timeout = ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT])); + } + if (attrs[CTA_MARK]) { + entry->mark = ntohl(nl_attr_get_be32(attrs[CTA_MARK])); + } + if (attrs[CTA_LABELS]) { + memcpy(&entry->labels, nl_attr_get(attrs[CTA_LABELS]), + MIN(sizeof entry->labels, nl_attr_get_size(attrs[CTA_LABELS]))); + } + if (attrs[CTA_PROTOINFO] && + !nl_ct_parse_protoinfo(attrs[CTA_PROTOINFO], &entry->protoinfo)) { + return false; + } + if (attrs[CTA_HELP] && + !nl_ct_parse_helper(attrs[CTA_HELP], &entry->helper)) { + return false; + } + if (attrs[CTA_TUPLE_MASTER] && + !nl_ct_parse_tuple(attrs[CTA_TUPLE_MASTER], &entry->tuple_master, + nfgen_family)) { + return false; + } + return true; +} + +bool +nl_ct_parse_entry(struct ofpbuf *buf, struct ct_dpif_entry *entry, + enum nl_ct_event_type *event_type) +{ + struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]; + uint8_t nfgen_family; + + memset(entry, 0, sizeof *entry); + if (!nl_ct_parse_header_policy(buf, event_type, &nfgen_family, attrs)) { + return false; + }; + + if (!nl_ct_attrs_to_ct_dpif_entry(entry, attrs, nfgen_family)) { + ct_dpif_entry_uninit(entry); + memset(entry, 0, sizeof *entry); + return false; + } + + return true; +} + +/* NetFilter utility functions. */ + +/* Puts a nlmsghdr and nfgenmsg at the beginning of 'msg', which must be + * initially empty. 'expected_payload' should be an estimate of the number of + * payload bytes to be supplied; if the size of the payload is unknown a value + * of 0 is acceptable. + * + * Non-zero 'family' is the address family of items to get (e.g. AF_INET). + * + * 'flags' is a bit-mask that indicates what kind of request is being made. It + * is often NLM_F_REQUEST indicating that a request is being made, commonly + * or'd with NLM_F_ACK to request an acknowledgement. NLM_F_DUMP flag reguests + * a dump of the table. + * + * 'subsystem' is a netfilter subsystem id, e.g., NFNL_SUBSYS_CTNETLINK. + * + * 'cmd' is an enumerated value specific to the 'subsystem'. + * + * Sets the new nlmsghdr's nlmsg_pid field to 0 for now. nl_sock_send() will + * fill it in just before sending the message. + * + * nl_msg_put_nlmsghdr() should be used to compose Netlink messages that are + * not NetFilter Netlink messages. */ +static void +nl_msg_put_nfgenmsg(struct ofpbuf *msg, size_t expected_payload, + int family, uint8_t subsystem, uint8_t cmd, + uint32_t flags) +{ + struct nfgenmsg *nfm; + + nl_msg_put_nlmsghdr(msg, sizeof *nfm + expected_payload, + subsystem << 8 | cmd, flags); + ovs_assert(msg->size == NLMSG_HDRLEN); + nfm = nl_msg_put_uninit(msg, sizeof *nfm); + nfm->nfgen_family = family; + nfm->version = NFNETLINK_V0; + nfm->res_id = 0; +} diff --git a/lib/netlink-conntrack.h b/lib/netlink-conntrack.h new file mode 100644 index 0000000..f1a4150 --- /dev/null +++ b/lib/netlink-conntrack.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2015 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef NETLINK_CONNTRACK_H +#define NETLINK_CONNTRACK_H + +#include "byte-order.h" +#include "compiler.h" +#include "ct-dpif.h" +#include "dynamic-string.h" +#include "hmap.h" +#include "ofpbuf.h" +#include "timeval.h" +#include "unixctl.h" +#include "util.h" + +enum nl_ct_event_type { + NL_CT_EVENT_NEW = 1 << 0, + NL_CT_EVENT_UPDATE = 1 << 1, + NL_CT_EVENT_DELETE = 1 << 2, +}; + +struct nl_ct_dump_state; + +int nl_ct_dump_start(struct nl_ct_dump_state **, const uint16_t *zone); +int nl_ct_dump_next(struct nl_ct_dump_state *, struct ct_dpif_entry *); +int nl_ct_dump_done(struct nl_ct_dump_state *); + +int nl_ct_flush(void); +int nl_ct_flush_zone(uint16_t zone); + +bool nl_ct_parse_entry(struct ofpbuf *, struct ct_dpif_entry *, + enum nl_ct_event_type *); +void nl_ct_format_event_entry(const struct ct_dpif_entry *, + enum nl_ct_event_type, struct ds *, + bool verbose, bool print_stats); + +#endif /* NETLINK_CONNTRACK_H */ -- 2.1.4 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev