From: Justin Pettit <jpet...@nicira.com> An RFC to get back early feedback on exposing Linux's kernel connection tracker to OVS. The code has a few rough spots that will be addressed in the next version:
- Need better interface than setting individual flags for the state. - Need support for IP frags. - Need support for zones. - Should have ability to match on "invalid" connection states. - Should only allow conntrack() if conn_state is 0x00 to prevent loops. I'd be interested in hearing back suggestion on improvements other than those mentioned above. Here's a simple example flow table to allow outbound TCP traffic from port 1; drop traffic from port 2 that was not initiated by port 1: ovs-ofctl add-flow br0 \ "in_port=1,conn_state=0x00/0x80,tcp,action=conntrack(zone=0),normal" ovs-ofctl add-flow br0 \ "in_port=2,conn_state=0x00/0x80,tcp,action=conntrack(flags=1,zone=0)" ovs-ofctl add-flow br0 in_port=2,conn_state=0x82/0x83,tcp,action=1 ovs-ofctl add-flow br0 in_port=2,conn_state=0x81/0x83,tcp,action=drop ovs-ofctl add-flow br0 priority=10,action=normal --- datapath/actions.c | 45 ++++++++++++ datapath/datapath.c | 9 +++ datapath/flow.c | 25 +++++++ datapath/flow.h | 3 + datapath/flow_netlink.c | 21 +++++- datapath/linux/compat/include/linux/openvswitch.h | 3 + include/openflow/nicira-ext.h | 22 ++++++ lib/dpif-netdev.c | 1 + lib/dpif.c | 1 + lib/flow.c | 61 +++++++++++++-- lib/flow.h | 9 ++- lib/match.c | 40 +++++++++- lib/match.h | 3 + lib/meta-flow.c | 36 +++++++++ lib/meta-flow.h | 1 + lib/nx-match.c | 6 +- lib/odp-execute.c | 8 ++ lib/odp-util.c | 41 +++++++++++ lib/odp-util.h | 5 +- lib/ofp-actions.c | 90 +++++++++++++++++++++++ lib/ofp-actions.h | 18 +++++ lib/ofp-print.c | 4 + lib/ofp-util.c | 7 +- lib/packets.h | 1 + ofproto/ofproto-dpif-xlate.c | 19 ++++- ofproto/ofproto-unixctl.man | 2 + utilities/ovs-ofctl.8.in | 20 +++++ 27 files changed, 483 insertions(+), 18 deletions(-) diff --git a/datapath/actions.c b/datapath/actions.c index 8d18848..46bdf53 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -32,6 +32,7 @@ #include <net/ipv6.h> #include <net/checksum.h> #include <net/dsfield.h> +#include <net/netfilter/nf_conntrack_core.h> #include <net/sctp/checksum.h> #include "datapath.h" @@ -824,6 +825,46 @@ static void execute_hash(struct sk_buff *skb, const struct nlattr *attr) key->ovs_flow_hash = hash; } +static int conntrack(struct sk_buff *skb, uint16_t zone) +{ + struct sw_flow_key *key = OVS_CB(skb)->pkt_key; + int nh_ofs = skb_network_offset(skb); + struct vport *vport; + struct net *net; + + if (skb->nfct) { + pr_warn_once("Attempt to run through conntrack again\n"); + return 0; + } + +#ifdef CONFIG_NET_NS + vport = OVS_CB(skb)->input_vport; + if (!vport) + return EINVAL; + + net = vport->dp->net; +#else + net = &init_net; +#endif + + /* The conntrack module expects to be working at L3. */ + skb_pull(skb, nh_ofs); + + /* xxx What's the best return val? */ + if (nf_conntrack_in(net, PF_INET, NF_INET_PRE_ROUTING, skb) != NF_ACCEPT) + return EINVAL; + + if (nf_conntrack_confirm(skb) != NF_ACCEPT) + return EINVAL; + + /* Point back to L2, which OVS expects. */ + skb_push(skb, nh_ofs); + + key->phy.conn_state = ovs_map_nfctinfo(skb); + + return 0; +} + static int execute_set_action(struct sk_buff *skb, const struct nlattr *nested_attr) { @@ -986,6 +1027,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_SAMPLE: err = sample(dp, skb, a); break; + + case OVS_ACTION_ATTR_CONNTRACK: + err = conntrack(skb, nla_get_u16(a)); + break; } if (unlikely(err)) { diff --git a/datapath/datapath.c b/datapath/datapath.c index 08bac42..982ed9d 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -263,8 +263,17 @@ void ovs_dp_process_packet(struct sk_buff *skb) stats = this_cpu_ptr(dp->stats_percpu); /* Look up flow. */ + /* xxx Are we better off resetting the SKB hash, since we've changed + * xxx the value of a field? Will we always have collision for packets + * xxx that only vary based on the conn_state? */ +#if 0 flow = ovs_flow_tbl_lookup_stats(&dp->table, pkt_key, skb_get_hash(skb), &n_mask_hit); +#else + /* xxx Gross, clearing hash. */ + flow = ovs_flow_tbl_lookup_stats(&dp->table, pkt_key, 0, + &n_mask_hit); +#endif if (unlikely(!flow)) { struct dp_upcall_info upcall; int error; diff --git a/datapath/flow.c b/datapath/flow.c index af9c227..16aa989 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -42,6 +42,7 @@ #include <net/ip.h> #include <net/ipv6.h> #include <net/ndisc.h> +#include <linux/netfilter/nf_conntrack_common.h> #include "datapath.h" #include "flow.h" @@ -678,6 +679,29 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key) return key_extract(skb, key); } +/* Map SKB connection state into the values used by flow definition. */ +u8 ovs_map_nfctinfo(struct sk_buff *skb) +{ + if (!skb->nfct) + return 0; + + /* xxx This should use #defines instead of numbers. */ + if (skb->nfctinfo == IP_CT_ESTABLISHED) + return 0x82; + else if (skb->nfctinfo == IP_CT_RELATED) + return 0x84; + else if (skb->nfctinfo == IP_CT_NEW) + return 0x81; + else if (skb->nfctinfo == IP_CT_ESTABLISHED_REPLY) + return 0xc2; + else if (skb->nfctinfo == IP_CT_RELATED_REPLY) + return 0xc4; + else if (skb->nfctinfo == IP_CT_NEW_REPLY) + return 0xc1; + else + return 0x80; +} + int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key) @@ -704,6 +728,7 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, key->phy.priority = skb->priority; key->phy.in_port = OVS_CB(skb)->input_vport->port_no; key->phy.skb_mark = skb->mark; + key->phy.conn_state = ovs_map_nfctinfo(skb); key->ovs_flow_hash = 0; key->recirc_id = 0; diff --git a/datapath/flow.h b/datapath/flow.h index 44ed10d..22f2c83 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -130,6 +130,7 @@ struct sw_flow_key { u32 priority; /* Packet QoS priority. */ u32 skb_mark; /* SKB mark. */ u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ + u8 conn_state; /* Connection state. */ } __packed phy; /* Safe when right after 'tun_key'. */ u32 ovs_flow_hash; /* Datapath computed hash value. */ u32 recirc_id; /* Recirculation ID. */ @@ -252,6 +253,8 @@ void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *, void ovs_flow_stats_clear(struct sw_flow *); u64 ovs_flow_used_time(unsigned long flow_jiffies); +u8 ovs_map_nfctinfo(struct sk_buff *skb); + int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key); /* Extract key from packet coming from userspace. */ diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index 6c74841..e0ed56b 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -267,7 +267,7 @@ size_t ovs_key_attr_size(void) { /* Whenever adding new OVS_KEY_ FIELDS, we should consider * updating this function. */ - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22); + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 23); return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ @@ -276,6 +276,7 @@ size_t ovs_key_attr_size(void) + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ + + nla_total_size(1) /* OVS_KEY_ATTR_CONN_STATE */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ @@ -292,6 +293,7 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), + [OVS_KEY_ATTR_CONN_STATE] = sizeof(u8), [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), [OVS_KEY_ATTR_VLAN] = sizeof(__be16), [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), @@ -663,6 +665,13 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, return -EINVAL; *attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL); } + + if (*attrs & (1ULL << OVS_KEY_ATTR_CONN_STATE)) { + uint8_t conn_state = nla_get_u8(a[OVS_KEY_ATTR_CONN_STATE]); + + SW_FLOW_KEY_PUT(match, phy.conn_state, conn_state, is_mask); + *attrs &= ~(1ULL << OVS_KEY_ATTR_CONN_STATE); + } return 0; } @@ -1146,6 +1155,9 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) goto nla_put_failure; + if (nla_put_u8(skb, OVS_KEY_ATTR_CONN_STATE, output->phy.conn_state)) + goto nla_put_failure; + nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); if (!nla) goto nla_put_failure; @@ -1616,6 +1628,7 @@ static int validate_set(const struct nlattr *a, case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: + case OVS_KEY_ATTR_CONN_STATE: case OVS_KEY_ATTR_ETHERNET: break; @@ -1750,7 +1763,8 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, [OVS_ACTION_ATTR_POP_VLAN] = 0, [OVS_ACTION_ATTR_SET] = (u32)-1, [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, - [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash) + [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), + [OVS_ACTION_ATTR_CONNTRACK] = sizeof(u16) }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -1856,6 +1870,9 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, skip_copy = true; break; + case OVS_ACTION_ATTR_CONNTRACK: + break; + default: return -EINVAL; } diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h index 306ea86..08372c4 100644 --- a/datapath/linux/compat/include/linux/openvswitch.h +++ b/datapath/linux/compat/include/linux/openvswitch.h @@ -329,6 +329,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_MPLS, /* array of struct ovs_key_mpls. * The implementation may restrict * the accepted length of the array. */ + OVS_KEY_ATTR_CONN_STATE,/* u8 conn state */ #ifdef __KERNEL__ /* Only used within kernel data path. */ @@ -609,6 +610,7 @@ struct ovs_action_hash { * indicate the new packet contents. This could potentially still be * %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there * is no MPLS label stack, as determined by ethertype, no action is taken. + * @OVS_ACTION_ATTR_CONNTRACK: Track the connection. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -631,6 +633,7 @@ enum ovs_action_attr { * data immediately followed by a mask. * The data must be zero for the unmasked * bits. */ + OVS_ACTION_ATTR_CONNTRACK, /* u16 zone. */ __OVS_ACTION_ATTR_MAX }; diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h index bbf3388..57f0133 100644 --- a/include/openflow/nicira-ext.h +++ b/include/openflow/nicira-ext.h @@ -968,6 +968,28 @@ OFP_ASSERT(sizeof(struct nx_async_config) == 24); * Masking: not maskable. */ #define NXM_NX_RECIRC_ID NXM_HEADER (0x0001, 36, 4) +/* Connection tracking state. + * + * The connection tracking state is populated by the NXAST_CONNTRACK + * action. The following flags are defined: + * + * - CONN_STATE_TRACKED (0x80): Connection tracking has occurred. + * - CONN_STATE_REPLY (0x40): This flow did not initiate the connection. + * + * The following values describe the state of the connection: + * + * - New (0x01): This is the beginning of a new connection. + * - Established (0x02): This is part of an already existing connection. + * - Related (0x04): This is a new connection that is "expected". + * + * Prereqs: None. + * + * Format: 8-bit fully maskable + * + * Masking: Fully maskable. */ +#define NXM_NX_CONN_STATE NXM_HEADER (0x0001, 37, 1) +#define NXM_NX_CONN_STATE_W NXM_HEADER_W(0x0001, 37, 1) + /* ## --------------------- ## */ /* ## Requests and replies. ## */ /* ## --------------------- ## */ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 3f69219..418cefb 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2726,6 +2726,7 @@ dp_execute_cb(void *aux_, struct dpif_packet **packets, int cnt, case OVS_ACTION_ATTR_SET_MASKED: case OVS_ACTION_ATTR_SAMPLE: case OVS_ACTION_ATTR_UNSPEC: + case OVS_ACTION_ATTR_CONNTRACK: case __OVS_ACTION_ATTR_MAX: OVS_NOT_REACHED(); } diff --git a/lib/dpif.c b/lib/dpif.c index bf2c5f9..08a94f6 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1044,6 +1044,7 @@ dpif_execute_helper_cb(void *aux_, struct dpif_packet **packets, int cnt, case OVS_ACTION_ATTR_SET_MASKED: case OVS_ACTION_ATTR_SAMPLE: case OVS_ACTION_ATTR_UNSPEC: + case OVS_ACTION_ATTR_CONNTRACK: case __OVS_ACTION_ATTR_MAX: OVS_NOT_REACHED(); } diff --git a/lib/flow.c b/lib/flow.c index b9f1820..d9056c6 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -70,11 +70,12 @@ BUILD_ASSERT_DECL(offsetof(struct flow, nw_frag) + 3 offsetof(struct flow, nw_proto) / 4 == offsetof(struct flow, nw_tos) / 4); -/* TCP flags in the first half of a BE32, zeroes in the other half. */ +/* TCP flags in the first half of a BE32, 'conn_state' and pad in the + * other half. */ BUILD_ASSERT_DECL(offsetof(struct flow, tcp_flags) + 2 - == offsetof(struct flow, pad) && + == offsetof(struct flow, conn_state) && offsetof(struct flow, tcp_flags) / 4 - == offsetof(struct flow, pad) / 4); + == offsetof(struct flow, conn_state) / 4); #if WORDS_BIGENDIAN #define TCP_FLAGS_BE32(tcp_ctl) ((OVS_FORCE ovs_be32)TCP_FLAGS_BE16(tcp_ctl) \ << 16) @@ -141,6 +142,19 @@ BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime " #define miniflow_push_be32_(MF, OFS, VALUE) \ miniflow_push_uint32_(MF, OFS, (OVS_FORCE uint32_t)(VALUE)) +/* xxx Possibly clean this up. Assert if another value has been pushed. */ +/* Caller must have previously called a miniflow_push_* macro for "OFS" + * with no other push calls in between. */ +#define miniflow_update_uint32_(MF, OFS, VALUE, MASK) \ +{ \ + MINIFLOW_ASSERT(MF.data < MF.end && (OFS) % 4 == 0); \ + *(MF.data-1) |= (VALUE & MASK); \ +} + +#define miniflow_update_be32_(MF, OFS, VALUE, MASK) \ + miniflow_update_uint32_(MF, OFS, (OVS_FORCE uint32_t)(VALUE), \ + (OVS_FORCE uint32_t)(MASK)) + #define miniflow_push_uint16_(MF, OFS, VALUE) \ { \ MINIFLOW_ASSERT(MF.data < MF.end && \ @@ -191,6 +205,12 @@ BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime " } \ } +#define miniflow_update_uint32(MF, FIELD, VALUE, MASK) \ + miniflow_update_uint32_(MF, offsetof(struct flow, FIELD), VALUE, MASK) + +#define miniflow_update_be32(MF, FIELD, VALUE, MASK) \ + miniflow_update_be32_(MF, offsetof(struct flow, FIELD), VALUE, MASK) + #define miniflow_push_uint16(MF, FIELD, VALUE) \ miniflow_push_uint16_(MF, offsetof(struct flow, FIELD), VALUE) @@ -573,13 +593,28 @@ miniflow_extract(struct ofpbuf *packet, const struct pkt_metadata *md, miniflow_push_be32(mf, nw_frag, BYTES_TO_BE32(nw_frag, nw_tos, nw_ttl, nw_proto)); + /* xxx This is hacky to get around ICMPv6 issues. */ + if ((nw_frag & FLOW_NW_FRAG_LATER) || (nw_proto != IPPROTO_ICMPV6)) { + if (md) { + /* xxx Can't be use _check() version, since state may be 0 */ + miniflow_push_be32(mf, tcp_flags, + BYTES_TO_BE32(0, 0, md->conn_state, 0)); + } else { + /* xxx Hack so tcp_flags always has pushed entry */ + miniflow_push_be32(mf, tcp_flags, + BYTES_TO_BE32(0, 0, 0, 0)); + } + } + if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) { if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) { if (OVS_LIKELY(size >= TCP_HEADER_LEN)) { const struct tcp_header *tcp = data; - miniflow_push_be32(mf, tcp_flags, - TCP_FLAGS_BE32(tcp->tcp_ctl)); + miniflow_update_be32(mf, tcp_flags, + TCP_FLAGS_BE32(tcp->tcp_ctl), + htonl(0xffff0000)); + miniflow_push_words(mf, tp_src, &tcp->tcp_src, 1); } } else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) { @@ -625,6 +660,17 @@ miniflow_extract(struct ofpbuf *packet, const struct pkt_metadata *md, miniflow_push_words(mf, nd_target, nd_target, sizeof *nd_target / 4); } + /* xxx This is gross. */ + if (md) { + /* xxx Can't be use _check() version, since + * xxx state may be 0 */ + miniflow_push_be32(mf, tcp_flags, + BYTES_TO_BE32(0, 0, md->conn_state, 0)); + } else { + /* xxx Hack so tcp_flags always has pushed entry */ + miniflow_push_be32(mf, tcp_flags, + BYTES_TO_BE32(0, 0, 0, 0)); + } miniflow_push_be16(mf, tp_src, htons(icmp->icmp6_type)); miniflow_push_be16(mf, tp_dst, htons(icmp->icmp6_code)); } @@ -668,7 +714,7 @@ flow_unwildcard_tp_ports(const struct flow *flow, struct flow_wildcards *wc) void flow_get_metadata(const struct flow *flow, struct flow_metadata *fmd) { - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); fmd->dp_hash = flow->dp_hash; fmd->recirc_id = flow->recirc_id; @@ -678,6 +724,7 @@ flow_get_metadata(const struct flow *flow, struct flow_metadata *fmd) fmd->metadata = flow->metadata; memcpy(fmd->regs, flow->regs, sizeof fmd->regs); fmd->pkt_mark = flow->pkt_mark; + fmd->conn_state = flow->conn_state; fmd->in_port = flow->in_port.ofp_port; } @@ -1338,7 +1385,7 @@ flow_push_mpls(struct flow *flow, int n, ovs_be16 mpls_eth_type, flow->mpls_lse[0] = set_mpls_lse_values(ttl, tc, 1, htonl(label)); /* Clear all L3 and L4 fields. */ - BUILD_ASSERT(FLOW_WC_SEQ == 27); + BUILD_ASSERT(FLOW_WC_SEQ == 28); memset((char *) flow + FLOW_SEGMENT_2_ENDS_AT, 0, sizeof(struct flow) - FLOW_SEGMENT_2_ENDS_AT); } diff --git a/lib/flow.h b/lib/flow.h index e4c3b34..37801c8 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -38,7 +38,7 @@ struct pkt_metadata; /* This sequence number should be incremented whenever anything involving flows * or the wildcarding of flows changes. This will cause build assertion * failures in places which likely need to be updated. */ -#define FLOW_WC_SEQ 27 +#define FLOW_WC_SEQ 28 /* Number of Open vSwitch extension 32-bit registers. */ #define FLOW_N_REGS 8 @@ -124,7 +124,8 @@ struct flow { uint8_t arp_tha[6]; /* ARP/ND target hardware address. */ struct in6_addr nd_target; /* IPv6 neighbor discovery (ND) target. */ ovs_be16 tcp_flags; /* TCP flags. With L3 to avoid matching L4. */ - ovs_be16 pad; /* Padding. */ + uint8_t conn_state ; /* Connection state. */ + uint8_t pad; /* Padding. */ /* L4 */ ovs_be16 tp_src; /* TCP/UDP/SCTP source port. */ @@ -141,7 +142,7 @@ BUILD_ASSERT_DECL(sizeof(struct flow) % 4 == 0); /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */ BUILD_ASSERT_DECL(offsetof(struct flow, dp_hash) + sizeof(uint32_t) == sizeof(struct flow_tnl) + 176 - && FLOW_WC_SEQ == 27); + && FLOW_WC_SEQ == 28); /* Incremental points at which flow classification may be performed in * segments. @@ -174,6 +175,7 @@ struct flow_metadata { ovs_be64 metadata; /* OpenFlow 1.1+ metadata field. */ uint32_t regs[FLOW_N_REGS]; /* Registers. */ uint32_t pkt_mark; /* Packet mark. */ + uint8_t conn_state; /* Connection state. */ ofp_port_t in_port; /* OpenFlow port or zero. */ }; @@ -669,6 +671,7 @@ pkt_metadata_from_flow(const struct flow *flow) md.skb_priority = flow->skb_priority; md.pkt_mark = flow->pkt_mark; md.in_port = flow->in_port; + md.conn_state = flow->conn_state; return md; } diff --git a/lib/match.c b/lib/match.c index c4edbfb..cba569c 100644 --- a/lib/match.c +++ b/lib/match.c @@ -60,6 +60,10 @@ match_wc_init(struct match *match, const struct flow *flow) memset(&wc->masks.pkt_mark, 0xff, sizeof wc->masks.pkt_mark); } + if (flow->conn_state) { + memset(&wc->masks.conn_state, 0xff, sizeof wc->masks.conn_state); + } + for (i = 0; i < FLOW_N_REGS; i++) { if (flow->regs[i]) { memset(&wc->masks.regs[i], 0xff, sizeof wc->masks.regs[i]); @@ -335,6 +339,20 @@ match_set_pkt_mark_masked(struct match *match, uint32_t pkt_mark, uint32_t mask) } void +match_set_conn_state(struct match *match, uint8_t conn_state) +{ + match_set_conn_state_masked(match, conn_state, UINT8_MAX); +} + +void +match_set_conn_state_masked(struct match *match, uint8_t conn_state, + uint8_t mask) +{ + match->flow.conn_state = conn_state & mask; + match->wc.masks.conn_state = mask; +} + +void match_set_dl_type(struct match *match, ovs_be16 dl_type) { match->wc.masks.dl_type = OVS_BE16_MAX; @@ -867,6 +885,19 @@ format_ipv6_netmask(struct ds *s, const char *name, } static void +format_uint8_masked(struct ds *s, const char *name, + uint8_t value, uint8_t mask) +{ + if (mask) { + ds_put_format(s, "%s=%#"PRIx8, name, value); + if (mask != UINT8_MAX) { + ds_put_format(s, "/%#"PRIx8, mask); + } + ds_put_char(s, ','); + } +} + +static void format_be16_masked(struct ds *s, const char *name, ovs_be16 value, ovs_be16 mask) { @@ -959,7 +990,7 @@ match_format(const struct match *match, struct ds *s, unsigned int priority) int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); if (priority != OFP_DEFAULT_PRIORITY) { ds_put_format(s, "priority=%u,", priority); @@ -981,6 +1012,13 @@ match_format(const struct match *match, struct ds *s, unsigned int priority) ds_put_format(s, "skb_priority=%#"PRIx32",", f->skb_priority); } + if (wc->masks.conn_state) { + /* xxx Spell out the flags? To be prettier? */ + /* xxx If pretty print, remove format_uint8_masked(). */ + format_uint8_masked(s, "conn_state", f->conn_state, + wc->masks.conn_state); + } + if (wc->masks.dl_type) { skip_type = true; if (f->dl_type == htons(ETH_TYPE_IP)) { diff --git a/lib/match.h b/lib/match.h index ce9fb28..b8e3745 100644 --- a/lib/match.h +++ b/lib/match.h @@ -71,6 +71,9 @@ void match_set_tun_flags_masked(struct match *match, uint16_t flags, uint16_t ma void match_set_in_port(struct match *, ofp_port_t ofp_port); void match_set_pkt_mark(struct match *, uint32_t pkt_mark); void match_set_pkt_mark_masked(struct match *, uint32_t pkt_mark, uint32_t mask); +void match_set_conn_state(struct match *, uint8_t conn_state); +void match_set_conn_state_masked(struct match *, uint8_t conn_state, + uint8_t mask); void match_set_skb_priority(struct match *, uint32_t skb_priority); void match_set_dl_type(struct match *, ovs_be16); void match_set_dl_src(struct match *, const uint8_t[6]); diff --git a/lib/meta-flow.c b/lib/meta-flow.c index 3b82e62..e81a49f 100644 --- a/lib/meta-flow.c +++ b/lib/meta-flow.c @@ -207,6 +207,18 @@ const struct mf_field mf_fields[MFF_N_IDS] = { OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NXM_OXM_ANY, -1, + }, { + MFF_CONN_STATE, "conn_state", NULL, + MF_FIELD_SIZES(u8), + MFM_FULLY, + MFS_HEXADECIMAL, + MFP_NONE, + true, + NXM_NX_CONN_STATE, "NXM_NX_CONN_STATE", + NXM_NX_CONN_STATE, "NXM_NX_CONN_STATE", 0, + OFPUTIL_P_NXM_OXM_ANY, + OFPUTIL_P_NXM_OXM_ANY, + -1, }, #define REGISTER(IDX) \ @@ -943,6 +955,8 @@ mf_is_all_wild(const struct mf_field *mf, const struct flow_wildcards *wc) return !wc->masks.skb_priority; case MFF_PKT_MARK: return !wc->masks.pkt_mark; + case MFF_CONN_STATE: + return !wc->masks.conn_state; CASE_MFF_REGS: return !wc->masks.regs[mf->id - MFF_REG0]; CASE_MFF_XREGS: @@ -1184,6 +1198,7 @@ mf_is_value_valid(const struct mf_field *mf, const union mf_value *value) case MFF_IN_PORT: case MFF_SKB_PRIORITY: case MFF_PKT_MARK: + case MFF_CONN_STATE: CASE_MFF_REGS: CASE_MFF_XREGS: case MFF_ETH_SRC: @@ -1312,6 +1327,10 @@ mf_get_value(const struct mf_field *mf, const struct flow *flow, value->be32 = htonl(flow->pkt_mark); break; + case MFF_CONN_STATE: + value->u8 = flow->conn_state; + break; + CASE_MFF_REGS: value->be32 = htonl(flow->regs[mf->id - MFF_REG0]); break; @@ -1518,6 +1537,10 @@ mf_set_value(const struct mf_field *mf, match_set_pkt_mark(match, ntohl(value->be32)); break; + case MFF_CONN_STATE: + match_set_conn_state(match, value->u8); + break; + CASE_MFF_REGS: match_set_reg(match, mf->id - MFF_REG0, ntohl(value->be32)); break; @@ -1741,6 +1764,10 @@ mf_set_flow_value(const struct mf_field *mf, flow->pkt_mark = ntohl(value->be32); break; + case MFF_CONN_STATE: + flow->conn_state = value->u8; + break; + CASE_MFF_REGS: flow->regs[mf->id - MFF_REG0] = ntohl(value->be32); break; @@ -1962,6 +1989,11 @@ mf_set_wild(const struct mf_field *mf, struct match *match) match->wc.masks.pkt_mark = 0; break; + case MFF_CONN_STATE: + match->flow.conn_state = 0; + match->wc.masks.conn_state = 0; + break; + CASE_MFF_REGS: match_set_reg_masked(match, mf->id - MFF_REG0, 0, 0); break; @@ -2203,6 +2235,10 @@ mf_set(const struct mf_field *mf, ntohl(mask->be32)); break; + case MFF_CONN_STATE: + match_set_conn_state_masked(match, value->u8, mask->u8); + break; + case MFF_ETH_DST: match_set_dl_dst_masked(match, value->mac, mask->mac); break; diff --git a/lib/meta-flow.h b/lib/meta-flow.h index c11f7ab..865ce59 100644 --- a/lib/meta-flow.h +++ b/lib/meta-flow.h @@ -46,6 +46,7 @@ enum OVS_PACKED_ENUM mf_field_id { MFF_IN_PORT_OXM, /* be32 */ MFF_SKB_PRIORITY, /* be32 */ MFF_PKT_MARK, /* be32 */ + MFF_CONN_STATE, /* u8 */ #if FLOW_N_REGS == 8 MFF_REG0, /* be32 */ diff --git a/lib/nx-match.c b/lib/nx-match.c index 05be3b5..2d879b1 100644 --- a/lib/nx-match.c +++ b/lib/nx-match.c @@ -617,7 +617,7 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match, int match_len; int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); /* Metadata. */ if (match->wc.masks.dp_hash) { @@ -741,6 +741,10 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match, nxm_put_32m(b, mf_oxm_header(MFF_PKT_MARK, oxm), htonl(flow->pkt_mark), htonl(match->wc.masks.pkt_mark)); + /* Connection state. */ + nxm_put_8m(b, NXM_NX_CONN_STATE, flow->conn_state, + match->wc.masks.conn_state); + /* OpenFlow 1.1+ Metadata. */ nxm_put_64m(b, mf_oxm_header(MFF_METADATA, oxm), flow->metadata, match->wc.masks.metadata); diff --git a/lib/odp-execute.c b/lib/odp-execute.c index 78b1f24..7785232 100644 --- a/lib/odp-execute.c +++ b/lib/odp-execute.c @@ -198,6 +198,10 @@ odp_execute_set_action(struct dpif_packet *packet, const struct nlattr *a, md->pkt_mark = nl_attr_get_u32(a); break; + case OVS_KEY_ATTR_CONN_STATE: + md->conn_state = nl_attr_get_u8(a); + break; + case OVS_KEY_ATTR_ETHERNET: odp_eth_set_addrs(&packet->ofpbuf, nl_attr_get(a), NULL); break; @@ -519,6 +523,10 @@ odp_execute_actions__(void *dp, struct dpif_packet **packets, int cnt, } break; + case OVS_ACTION_ATTR_CONNTRACK: + /* xxx I don't think there's anything we can do here. */ + break; + case OVS_ACTION_ATTR_UNSPEC: case __OVS_ACTION_ATTR_MAX: OVS_NOT_REACHED(); diff --git a/lib/odp-util.c b/lib/odp-util.c index 77e6ec5..420017c 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -84,6 +84,7 @@ odp_action_len(uint16_t type) case OVS_ACTION_ATTR_SET: return -2; case OVS_ACTION_ATTR_SET_MASKED: return -2; case OVS_ACTION_ATTR_SAMPLE: return -2; + case OVS_ACTION_ATTR_CONNTRACK: return sizeof(uint16_t); case OVS_ACTION_ATTR_UNSPEC: case __OVS_ACTION_ATTR_MAX: @@ -105,6 +106,7 @@ ovs_key_attr_to_string(enum ovs_key_attr attr, char *namebuf, size_t bufsize) case OVS_KEY_ATTR_ENCAP: return "encap"; case OVS_KEY_ATTR_PRIORITY: return "skb_priority"; case OVS_KEY_ATTR_SKB_MARK: return "skb_mark"; + case OVS_KEY_ATTR_CONN_STATE: return "conn_state"; case OVS_KEY_ATTR_TUNNEL: return "tunnel"; case OVS_KEY_ATTR_IN_PORT: return "in_port"; case OVS_KEY_ATTR_ETHERNET: return "eth"; @@ -589,6 +591,11 @@ format_odp_action(struct ds *ds, const struct nlattr *a) case OVS_ACTION_ATTR_SAMPLE: format_odp_sample_action(ds, a); break; + case OVS_ACTION_ATTR_CONNTRACK: { + uint16_t zone = nl_attr_get_u16(a); + ds_put_format(ds, "conntrack(zone=%"PRIu16")", zone); + break; + } case OVS_ACTION_ATTR_UNSPEC: case __OVS_ACTION_ATTR_MAX: default: @@ -877,6 +884,16 @@ parse_odp_action(const char *s, const struct simap *port_names, } } + { + int zone; + int n = -1; + + if (ovs_scan(s, "conntrack(zone=%i)%n", &zone, &n)) { + nl_msg_put_u16(actions, OVS_ACTION_ATTR_CONNTRACK, zone); + return n; + } + } + return -EINVAL; } @@ -931,6 +948,7 @@ odp_flow_key_attr_len(uint16_t type) case OVS_KEY_ATTR_SKB_MARK: return 4; case OVS_KEY_ATTR_DP_HASH: return 4; case OVS_KEY_ATTR_RECIRC_ID: return 4; + case OVS_KEY_ATTR_CONN_STATE: return 1; case OVS_KEY_ATTR_TUNNEL: return -2; case OVS_KEY_ATTR_IN_PORT: return 4; case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet); @@ -1520,6 +1538,13 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, } break; + case OVS_KEY_ATTR_CONN_STATE: + ds_put_format(ds, "%#"PRIx8, nl_attr_get_u8(a)); + if (!is_exact) { + ds_put_format(ds, "/%#"PRIx8, nl_attr_get_u8(ma)); + } + break; + case OVS_KEY_ATTR_TUNNEL: { struct flow_tnl key, mask_; struct flow_tnl *mask = ma ? &mask_ : NULL; @@ -2346,6 +2371,7 @@ parse_odp_key_mask_attr(const char *s, const struct simap *port_names, SCAN_SINGLE("skb_mark(", uint32_t, u32, OVS_KEY_ATTR_SKB_MARK); SCAN_SINGLE_NO_MASK("recirc_id(", uint32_t, u32, OVS_KEY_ATTR_RECIRC_ID); SCAN_SINGLE("dp_hash(", uint32_t, u32, OVS_KEY_ATTR_DP_HASH); + SCAN_SINGLE("conn_state(", uint8_t, u8, OVS_KEY_ATTR_CONN_STATE); SCAN_BEGIN("tunnel(", struct flow_tnl) { SCAN_FIELD("tun_id=", be64, tun_id); @@ -2574,6 +2600,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow, } nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, data->pkt_mark); + nl_msg_put_u8(buf, OVS_KEY_ATTR_CONN_STATE, data->conn_state); if (recirc) { nl_msg_put_u32(buf, OVS_KEY_ATTR_RECIRC_ID, data->recirc_id); @@ -2771,6 +2798,9 @@ odp_key_from_pkt_metadata(struct ofpbuf *buf, const struct pkt_metadata *md) } nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, md->pkt_mark); +#if 0 + nl_msg_put_u8(buf, OVS_KEY_ATTR_CONN_STATE, md->conn_state); +#endif /* Add an ingress port attribute if 'odp_in_port' is not the magical * value "ODPP_NONE". */ @@ -2818,6 +2848,12 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len, md->pkt_mark = nl_attr_get_u32(nla); wanted_attrs &= ~(1u << OVS_KEY_ATTR_SKB_MARK); break; +#if 0 + case OVS_KEY_ATTR_CONN_STATE: + md->conn_state = nl_attr_get_u8(nla); + wanted_attrs &= ~(1u << OVS_KEY_ATTR_CONN_STATE); + break; +#endif case OVS_KEY_ATTR_TUNNEL: { enum odp_key_fitness res; @@ -3374,6 +3410,11 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_SKB_MARK; } + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_CONN_STATE)) { + flow->conn_state = nl_attr_get_u8(attrs[OVS_KEY_ATTR_CONN_STATE]); + expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_CONN_STATE; + } + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_TUNNEL)) { enum odp_key_fitness res; diff --git a/lib/odp-util.h b/lib/odp-util.h index 11b54dd..14570ce 100644 --- a/lib/odp-util.h +++ b/lib/odp-util.h @@ -118,6 +118,7 @@ void odp_portno_names_destroy(struct hmap *portno_names); * OVS_KEY_ATTR_SKB_MARK 4 -- 4 8 * OVS_KEY_ATTR_DP_HASH 4 -- 4 8 * OVS_KEY_ATTR_RECIRC_ID 4 -- 4 8 + * OVS_KEY_ATTR_CONN_STATE 1 3 4 8 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype) * OVS_KEY_ATTR_VLAN 2 2 4 8 @@ -127,13 +128,13 @@ void odp_portno_names_destroy(struct hmap *portno_names); * OVS_KEY_ATTR_ICMPV6 2 2 4 8 * OVS_KEY_ATTR_ND 28 -- 4 32 * ---------------------------------------------------------- - * total 488 + * total 496 * * We include some slack space in case the calculation isn't quite right or we * add another field and forget to adjust this value. */ #define ODPUTIL_FLOW_KEY_BYTES 512 -BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27); +BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); /* A buffer with sufficient size and alignment to hold an nlattr-formatted flow * key. An array of "struct nlattr" might not, in theory, be sufficiently diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c index 5df36a2..d8c00bb 100644 --- a/lib/ofp-actions.c +++ b/lib/ofp-actions.c @@ -267,6 +267,9 @@ enum ofp_raw_action_type { /* NX1.0+(29): struct nx_action_sample. */ NXAST_RAW_SAMPLE, + + /* NX1.0+(32): struct nx_action_conntrack. */ + NXAST_RAW_CONNTRACK, }; /* OpenFlow actions are always a multiple of 8 bytes in length. */ @@ -3924,6 +3927,86 @@ format_SAMPLE(const struct ofpact_sample *a, struct ds *s) a->obs_domain_id, a->obs_point_id); } +/* Action structure for NXAST_CONNTRACK. + * + * Pass traffic to the connection tracker. If 'flags' is + * NX_CONNTRACK_F_RECIRC, traffic is recirculated back to flow table + * with the NXM_NX_CONN_STATE and NXM_NX_CONN_STATE_W matches set. A + * standard "resubmit" action is not sufficient, since connection + * tracking occurs outside of the classifier. The 'zone' argument + * specifies a context within which the tracking is done. */ +struct nx_action_conntrack { + ovs_be16 type; /* OFPAT_VENDOR. */ + ovs_be16 len; /* 16. */ + ovs_be32 vendor; /* NX_VENDOR_ID. */ + ovs_be16 subtype; /* NXAST_CONNTRACK. */ + ovs_be16 flags; /* Either 0 or NX_CONNTRACK_F_RECIRC. */ + ovs_be16 zone; /* Connection tracking context. */ + uint8_t pad[2]; +}; +OFP_ASSERT(sizeof(struct nx_action_conntrack) == 16); + +static enum ofperr +decode_NXAST_RAW_CONNTRACK(const struct nx_action_conntrack *nac, + struct ofpbuf *out) +{ + struct ofpact_conntrack *conntrack; + + conntrack = ofpact_put_CONNTRACK(out); + conntrack->flags = ntohs(nac->flags); + conntrack->zone = ntohs(nac->zone); + + return 0; +} + +static void +encode_CONNTRACK(const struct ofpact_conntrack *conntrack, + enum ofp_version ofp_version OVS_UNUSED, struct ofpbuf *out) +{ + struct nx_action_conntrack *nac; + + nac = put_NXAST_CONNTRACK(out); + nac->flags = htons(conntrack->flags); + nac->zone = htons(conntrack->zone); +} + +/* Parses 'arg' as the argument to a "conntrack" action, and appends such an + * action to 'ofpacts'. + * + * Returns NULL if successful, otherwise a malloc()'d string describing the + * error. The caller is responsible for freeing the returned string. */ +static char * WARN_UNUSED_RESULT +parse_CONNTRACK(char *arg, struct ofpbuf *ofpacts, + enum ofputil_protocol *usable_protocols OVS_UNUSED) +{ + struct ofpact_conntrack *oc = ofpact_put_CONNTRACK(ofpacts); + char *key, *value; + + while (ofputil_parse_key_value(&arg, &key, &value)) { + char *error = NULL; + + if (!strcmp(key, "flags")) { + error = str_to_u16(value, "flags", &oc->flags); + } else if (!strcmp(key, "zone")) { + error = str_to_u16(value, "zone", &oc->zone); + } else { + error = xasprintf("invalid key \"%s\" in \"conntrack\" argument", + key); + } + if (error) { + return error; + } + } + return NULL; +} + +static void +format_CONNTRACK(const struct ofpact_conntrack *a, struct ds *s) +{ + ds_put_format(s, "conntrack(flags=%"PRIu16",zone=%"PRIu16")", + a->flags, a->zone); +} + /* Meter instruction. */ static void @@ -4304,6 +4387,7 @@ ofpact_is_set_or_move_action(const struct ofpact *a) return true; case OFPACT_BUNDLE: case OFPACT_CLEAR_ACTIONS: + case OFPACT_CONNTRACK: case OFPACT_CONTROLLER: case OFPACT_DEC_MPLS_TTL: case OFPACT_DEC_TTL: @@ -4376,6 +4460,7 @@ ofpact_is_allowed_in_actions_set(const struct ofpact *a) * in the action set is undefined. */ case OFPACT_BUNDLE: case OFPACT_CONTROLLER: + case OFPACT_CONNTRACK: case OFPACT_ENQUEUE: case OFPACT_EXIT: case OFPACT_FIN_TIMEOUT: @@ -4600,6 +4685,7 @@ ovs_instruction_type_from_ofpact_type(enum ofpact_type type) case OFPACT_NOTE: case OFPACT_EXIT: case OFPACT_SAMPLE: + case OFPACT_CONNTRACK: default: return OVSINST_OFPIT11_APPLY_ACTIONS; } @@ -5161,6 +5247,9 @@ ofpact_check__(enum ofputil_protocol *usable_protocols, struct ofpact *a, case OFPACT_SAMPLE: return 0; + case OFPACT_CONNTRACK: + return 0; + case OFPACT_CLEAR_ACTIONS: return 0; @@ -5580,6 +5669,7 @@ ofpact_outputs_to_port(const struct ofpact *ofpact, ofp_port_t port) case OFPACT_GOTO_TABLE: case OFPACT_METER: case OFPACT_GROUP: + case OFPACT_CONNTRACK: default: return false; } diff --git a/lib/ofp-actions.h b/lib/ofp-actions.h index 5436f24..a3a4b41 100644 --- a/lib/ofp-actions.h +++ b/lib/ofp-actions.h @@ -105,6 +105,7 @@ OFPACT(NOTE, ofpact_note, data, "note") \ OFPACT(EXIT, ofpact_null, ofpact, "exit") \ OFPACT(SAMPLE, ofpact_sample, ofpact, "sample") \ + OFPACT(CONNTRACK, ofpact_conntrack, ofpact, "conntrack") \ \ /* Instructions. */ \ OFPACT(METER, ofpact_meter, ofpact, "meter") \ @@ -472,6 +473,23 @@ BUILD_ASSERT_DECL(offsetof(struct ofpact_nest, actions) % OFPACT_ALIGNTO == 0); BUILD_ASSERT_DECL(offsetof(struct ofpact_nest, actions) == sizeof(struct ofpact_nest)); +/* Bits for 'flags' in struct nx_action_conntrack. + * + * If NX_CONNTRACK_F_RECIRC is set, then the packet will be recirculated + * through the datapath after running through the connection tracker. */ +enum nx_conntrack_flags { + NX_CONNTRACK_F_RECIRC = 1 << 0 +}; + +/* OFPACT_CONNTRACK. + * + * Used for NXAST_CONNTRACK. */ +struct ofpact_conntrack { + struct ofpact ofpact; + uint16_t flags; + uint16_t zone; +}; + static inline size_t ofpact_nest_get_action_len(const struct ofpact_nest *on) { diff --git a/lib/ofp-print.c b/lib/ofp-print.c index 43bfa17..12e0f6c 100644 --- a/lib/ofp-print.c +++ b/lib/ofp-print.c @@ -141,6 +141,10 @@ ofp_print_packet_in(struct ds *string, const struct ofp_header *oh, ds_put_format(string, " pkt_mark=0x%"PRIx32, pin.fmd.pkt_mark); } + if (pin.fmd.conn_state != 0) { + ds_put_format(string, " conn_state=0x%"PRIx8, pin.fmd.conn_state); + } + ds_put_format(string, " (via %s)", ofputil_packet_in_reason_to_string(pin.reason, reasonbuf, sizeof reasonbuf)); diff --git a/lib/ofp-util.c b/lib/ofp-util.c index c8d38e8..f352336 100644 --- a/lib/ofp-util.c +++ b/lib/ofp-util.c @@ -185,7 +185,7 @@ ofputil_netmask_to_wcbits(ovs_be32 netmask) void ofputil_wildcard_from_ofpfw10(uint32_t ofpfw, struct flow_wildcards *wc) { - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); /* Initialize most of wc. */ flow_wildcards_init_catchall(wc); @@ -3276,6 +3276,7 @@ ofputil_decode_packet_in_finish(struct ofputil_packet_in *pin, pin->fmd.metadata = match->flow.metadata; memcpy(pin->fmd.regs, match->flow.regs, sizeof pin->fmd.regs); pin->fmd.pkt_mark = match->flow.pkt_mark; + pin->fmd.conn_state = match->flow.conn_state; } enum ofperr @@ -3412,6 +3413,10 @@ ofputil_packet_in_to_match(const struct ofputil_packet_in *pin, match_set_pkt_mark(match, pin->fmd.pkt_mark); } + if (pin->fmd.conn_state != 0) { + match_set_conn_state(match, pin->fmd.conn_state); + } + match_set_in_port(match, pin->fmd.in_port); } diff --git a/lib/packets.h b/lib/packets.h index 26c6ff1..8408b36 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -62,6 +62,7 @@ struct pkt_metadata { uint32_t skb_priority; /* Packet priority for QoS. */ uint32_t pkt_mark; /* Packet mark. */ union flow_in_port in_port; /* Input port. */ + uint8_t conn_state; /* Connection state. */ }; #define PKT_METADATA_INITIALIZER(PORT) \ diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 57afa80..1f1f1d0 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -2477,13 +2477,14 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, struct flow *flow = &ctx->xin->flow; ovs_be16 flow_vlan_tci; uint32_t flow_pkt_mark; + uint8_t flow_conn_state; uint8_t flow_nw_tos; odp_port_t out_port, odp_port; uint8_t dscp; /* If 'struct flow' gets additional metadata, we'll need to zero it out * before traversing a patch port. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); if (!xport) { xlate_report(ctx, "Nonexistent output port"); @@ -2576,6 +2577,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, flow_vlan_tci = flow->vlan_tci; flow_pkt_mark = flow->pkt_mark; + flow_conn_state = flow->conn_state; flow_nw_tos = flow->nw_tos; if (dscp_from_skb_priority(xport, flow->skb_priority, &dscp)) { @@ -2663,6 +2665,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, /* Restore flow */ flow->vlan_tci = flow_vlan_tci; flow->pkt_mark = flow_pkt_mark; + flow->conn_state = flow_conn_state; flow->nw_tos = flow_nw_tos; } @@ -3583,6 +3586,7 @@ ofpact_needs_recirculation_after_mpls(const struct xlate_ctx *ctx, case OFPACT_WRITE_ACTIONS: case OFPACT_CLEAR_ACTIONS: case OFPACT_SAMPLE: + case OFPACT_CONNTRACK: return false; case OFPACT_SET_IPV4_SRC: @@ -3922,6 +3926,19 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, case OFPACT_SAMPLE: xlate_sample_action(ctx, ofpact_get_SAMPLE(a)); break; + + case OFPACT_CONNTRACK: { + struct ofpact_conntrack *ofc = ofpact_get_CONNTRACK(a); + + nl_msg_put_u16(ctx->xout->odp_actions, + OVS_ACTION_ATTR_CONNTRACK, ofc->zone); + /* xxx Need to put the recirc here. */ + if (ofc->flags & NX_CONNTRACK_F_RECIRC) { + nl_msg_put_u32(ctx->xout->odp_actions, OVS_ACTION_ATTR_RECIRC, + 0); /* xxx Choose real recird id */ + } + break; + } } } } diff --git a/ofproto/ofproto-unixctl.man b/ofproto/ofproto-unixctl.man index 89013d9..83820ee 100644 --- a/ofproto/ofproto-unixctl.man +++ b/ofproto/ofproto-unixctl.man @@ -103,6 +103,8 @@ only metadata. The metadata can be: Packet QoS priority. .IP \fIpkt_mark\fR Mark of the packet. +.IP \fIconn_state\fR +Connection state of the packet. .IP \fItun_id\fR The tunnel ID on which the packet arrived. .IP \fIin_port\fR diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in index 64171e7..7323086 100644 --- a/utilities/ovs-ofctl.8.in +++ b/utilities/ovs-ofctl.8.in @@ -1108,6 +1108,26 @@ system components in order to facilitate interaction between subsystems. On Linux this corresponds to the skb mark but the exact implementation is platform-dependent. . +.IP \fBconn_state=\fIvalue\fR[\fB/\fImask\fR] +Matches packet connection state \fIvalue\fR either exactly or with optional +\fImask\fR. The following flags are defined: +.RS +.IP \fB0x80\fR +Connection tracking has occurred. +.IP \fB0x40\fR +The flow did not initiate the connection. +.RE +.IP +The following values describe the state of the connection: +.RS +.IP \fB0x01\fR +This is the beginning of a new connection. +.IP \fB0x02\fR +This is part of an already existing connection. +.IP \fB0x04\fR +This is a new connection that is "expected". +.RE +. .PP Defining IPv6 flows (those with \fBdl_type\fR equal to 0x86dd) requires support for NXM. The following shorthand notations are available for -- 1.9.3 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev