This patch adds support for rewriting SCTP src,dst ports similar to the functionality already available for TCP/UDP.
Rewriting SCTP ports is expensive due to double-recalculation of the SCTP checksums; this is performed to ensure that packets traversing OVS with invalid checksums will continue to the destination with any checksum corruption intact. Reviewed-by: Simon Horman <ho...@verge.net.au> Signed-off-by: Joe Stringer <j...@wand.net.nz> --- This patch introduces sparse warnings when calling sctp_end_cksum(), due to a bug that was fixed in v3.10: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=eee1d5a14780b9391ec51f3feaf4cffb521ddbb1 I've now tested this through use of sctp_test from lksctp, linking two VMs via the OVS bridge. I added the following flows: "sctp,tp_dst=7777 actions=mod_tp_dst:8888,normal" "sctp,tp_src=8888 actions=mod_tp_src:7777,normal" Then, with an lksctp server running on one host on port 8888, I ran the lksctp client with the destination port 7777. The connections establish and transfer data successfully. I took pcaps from VM interfaces and verified with wireshark that the checksums were correct. v3: Rebase Refactor sctp checksum computation Handle skb fragments for checksum calculation Fix sparse errors v2: Remove SCTP checksum recalculation when changing IP address Calculate checksums as delta from incoming checksum --- datapath/actions.c | 40 +++++++++++++++++++ datapath/checksum.c | 3 ++ datapath/checksum.h | 15 +++++++ datapath/datapath.c | 6 +++ datapath/flow.c | 60 ++++++++++++++++++++++++++++ datapath/flow.h | 8 ++-- datapath/linux/Modules.mk | 1 + datapath/linux/compat/include/linux/sctp.h | 17 ++++++++ 8 files changed, 146 insertions(+), 4 deletions(-) create mode 100644 datapath/linux/compat/include/linux/sctp.h diff --git a/datapath/actions.c b/datapath/actions.c index 0dac658..d4fdd65 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -22,11 +22,13 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/openvswitch.h> +#include <linux/sctp.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/in6.h> #include <linux/if_arp.h> #include <linux/if_vlan.h> +#include <linux/crc32c.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/checksum.h> @@ -352,6 +354,40 @@ static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key) return 0; } +static int set_sctp(struct sk_buff *skb, + const struct ovs_key_sctp *sctp_port_key) +{ + struct sctphdr *sh; + int err; + + err = make_writable(skb, skb_transport_offset(skb) + + sizeof(struct sctphdr)); + if (unlikely(err)) + return err; + + sh = sctp_hdr(skb); + if (sctp_port_key->sctp_src != sh->source || + sctp_port_key->sctp_dst != sh->dest) { + __le32 old_correct_csum, new_csum, old_csum; + + old_csum = sh->checksum; + old_correct_csum = compute_sctp_csum(skb); + + sh->source = sctp_port_key->sctp_src; + sh->dest = sctp_port_key->sctp_dst; + + new_csum = compute_sctp_csum(skb); + + /* Carry any checksum errors through. */ + sh->checksum = old_csum ^ old_correct_csum ^ new_csum; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb_clear_rxhash(skb); + } + + return 0; +} + static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port) { struct vport *vport; @@ -459,6 +495,10 @@ static int execute_set_action(struct sk_buff *skb, case OVS_KEY_ATTR_UDP: err = set_udp(skb, nla_data(nested_attr)); break; + + case OVS_KEY_ATTR_SCTP: + err = set_sctp(skb, nla_data(nested_attr)); + break; } return err; diff --git a/datapath/checksum.c b/datapath/checksum.c index 5146c65..bfa75a7 100644 --- a/datapath/checksum.c +++ b/datapath/checksum.c @@ -59,6 +59,9 @@ static int vswitch_skb_checksum_setup(struct sk_buff *skb) case IPPROTO_UDP: csum_offset = offsetof(struct udphdr, check); break; + case IPPROTO_SCTP: + csum_offset = offsetof(struct sctphdr, check); + break; default: if (net_ratelimit()) pr_err("Attempting to checksum a non-TCP/UDP packet, " diff --git a/datapath/checksum.h b/datapath/checksum.h index a440c59..a97d47b 100644 --- a/datapath/checksum.h +++ b/datapath/checksum.h @@ -23,6 +23,7 @@ #include <linux/version.h> #include <net/checksum.h> +#include <net/sctp/checksum.h> #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) || \ (defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)) @@ -170,4 +171,18 @@ static inline unsigned char *rpl__pskb_pull_tail(struct sk_buff *skb, #define __pskb_pull_tail rpl__pskb_pull_tail #endif +static inline __le32 compute_sctp_csum(const struct sk_buff *skb) +{ + const struct sk_buff *iter; + __u32 crc; + __u16 tp_len = skb_headlen(skb) - skb_transport_offset(skb); + + crc = sctp_start_cksum((__u8 *)sctp_hdr(skb), tp_len); + skb_walk_frags(skb, iter) + crc = sctp_update_cksum((u8 *) iter->data, skb_headlen(iter), + crc); + + return sctp_end_cksum(crc); +} + #endif /* checksum.h */ diff --git a/datapath/datapath.c b/datapath/datapath.c index 42af315..8839213 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -723,6 +723,12 @@ static int validate_set(const struct nlattr *a, return validate_tp_port(flow_key); + case OVS_KEY_ATTR_SCTP: + if (flow_key->ip.proto != IPPROTO_SCTP) + return -EINVAL; + + return validate_tp_port(flow_key); + default: return -EINVAL; } diff --git a/datapath/flow.c b/datapath/flow.c index 7f897bd..40be695 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -34,6 +34,7 @@ #include <linux/if_arp.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/sctp.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/icmp.h> @@ -103,6 +104,12 @@ static bool udphdr_ok(struct sk_buff *skb) sizeof(struct udphdr)); } +static bool sctphdr_ok(struct sk_buff *skb) +{ + return pskb_may_pull(skb, skb_transport_offset(skb) + + sizeof(struct sctphdr)); +} + static bool icmphdr_ok(struct sk_buff *skb) { return pskb_may_pull(skb, skb_transport_offset(skb) + @@ -698,6 +705,13 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, key->ipv4.tp.src = udp->source; key->ipv4.tp.dst = udp->dest; } + } else if (key->ip.proto == IPPROTO_SCTP) { + key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); + if (sctphdr_ok(skb)) { + struct sctphdr *sctp = sctp_hdr(skb); + key->ipv4.tp.src = sctp->source; + key->ipv4.tp.dst = sctp->dest; + } } else if (key->ip.proto == IPPROTO_ICMP) { key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); if (icmphdr_ok(skb)) { @@ -762,6 +776,13 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, key->ipv6.tp.src = udp->source; key->ipv6.tp.dst = udp->dest; } + } else if (key->ip.proto == NEXTHDR_SCTP) { + key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); + if (sctphdr_ok(skb)) { + struct sctphdr *sctp = sctp_hdr(skb); + key->ipv6.tp.src = sctp->source; + key->ipv6.tp.dst = sctp->dest; + } } else if (key->ip.proto == NEXTHDR_ICMP) { key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); if (icmp6hdr_ok(skb)) { @@ -843,6 +864,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), + [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), @@ -856,6 +878,7 @@ static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, const struct ovs_key_icmp *icmp_key; const struct ovs_key_tcp *tcp_key; const struct ovs_key_udp *udp_key; + const struct ovs_key_sctp *sctp_key; switch (swkey->ip.proto) { case IPPROTO_TCP: @@ -880,6 +903,17 @@ static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, swkey->ipv4.tp.dst = udp_key->udp_dst; break; + case IPPROTO_SCTP: + if (!(*attrs & (1 << OVS_KEY_ATTR_SCTP))) + return -EINVAL; + *attrs &= ~(1 << OVS_KEY_ATTR_SCTP); + + *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); + sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); + swkey->ipv4.tp.src = sctp_key->sctp_src; + swkey->ipv4.tp.dst = sctp_key->sctp_dst; + break; + case IPPROTO_ICMP: if (!(*attrs & (1 << OVS_KEY_ATTR_ICMP))) return -EINVAL; @@ -901,6 +935,7 @@ static int ipv6_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, const struct ovs_key_icmpv6 *icmpv6_key; const struct ovs_key_tcp *tcp_key; const struct ovs_key_udp *udp_key; + const struct ovs_key_sctp *sctp_key; switch (swkey->ip.proto) { case IPPROTO_TCP: @@ -925,6 +960,17 @@ static int ipv6_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, swkey->ipv6.tp.dst = udp_key->udp_dst; break; + case IPPROTO_SCTP: + if (!(*attrs & (1 << OVS_KEY_ATTR_SCTP))) + return -EINVAL; + *attrs &= ~(1 << OVS_KEY_ATTR_SCTP); + + *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); + sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); + swkey->ipv6.tp.src = sctp_key->sctp_src; + swkey->ipv6.tp.dst = sctp_key->sctp_dst; + break; + case IPPROTO_ICMPV6: if (!(*attrs & (1 << OVS_KEY_ATTR_ICMPV6))) return -EINVAL; @@ -1454,6 +1500,20 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) udp_key->udp_src = swkey->ipv6.tp.src; udp_key->udp_dst = swkey->ipv6.tp.dst; } + } else if (swkey->ip.proto == IPPROTO_SCTP) { + struct ovs_key_sctp *sctp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); + if (!nla) + goto nla_put_failure; + sctp_key = nla_data(nla); + if (swkey->eth.type == htons(ETH_P_IP)) { + sctp_key->sctp_src = swkey->ipv4.tp.src; + sctp_key->sctp_dst = swkey->ipv4.tp.dst; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + sctp_key->sctp_src = swkey->ipv6.tp.src; + sctp_key->sctp_dst = swkey->ipv6.tp.dst; + } } else if (swkey->eth.type == htons(ETH_P_IP) && swkey->ip.proto == IPPROTO_ICMP) { struct ovs_key_icmp *icmp_key; diff --git a/datapath/flow.h b/datapath/flow.h index dba66cf..8fc4899 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -86,8 +86,8 @@ struct sw_flow_key { } addr; union { struct { - __be16 src; /* TCP/UDP source port. */ - __be16 dst; /* TCP/UDP destination port. */ + __be16 src; /* TCP/UDP/SCTP source port. */ + __be16 dst; /* TCP/UDP/SCTP destination port. */ } tp; struct { u8 sha[ETH_ALEN]; /* ARP source hardware address. */ @@ -102,8 +102,8 @@ struct sw_flow_key { } addr; __be32 label; /* IPv6 flow label. */ struct { - __be16 src; /* TCP/UDP source port. */ - __be16 dst; /* TCP/UDP destination port. */ + __be16 src; /* TCP/UDP/SCTP source port. */ + __be16 dst; /* TCP/UDP/SCTP destination port. */ } tp; struct { struct in6_addr target; /* ND target address. */ diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk index 1434a2d..a435d1f 100644 --- a/datapath/linux/Modules.mk +++ b/datapath/linux/Modules.mk @@ -49,6 +49,7 @@ openvswitch_headers += \ linux/compat/include/linux/rcupdate.h \ linux/compat/include/linux/reciprocal_div.h \ linux/compat/include/linux/rtnetlink.h \ + linux/compat/include/linux/sctp.h \ linux/compat/include/linux/skbuff.h \ linux/compat/include/linux/slab.h \ linux/compat/include/linux/stddef.h \ diff --git a/datapath/linux/compat/include/linux/sctp.h b/datapath/linux/compat/include/linux/sctp.h new file mode 100644 index 0000000..e6b9174 --- /dev/null +++ b/datapath/linux/compat/include/linux/sctp.h @@ -0,0 +1,17 @@ +#ifndef __LINUX_SCTP_WRAPPER_H +#define __LINUX_SCTP_WRAPPER_H 1 + +#include_next <linux/sctp.h> + +#ifndef NEXTHDR_SCTP +#define NEXTHDR_SCTP 132 /* Stream Control Transport Protocol */ +#endif + +#ifndef HAVE_SKBUFF_HEADER_HELPERS +static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb) +{ + return (struct sctphdr *)skb_transport_header(skb); +} +#endif /* HAVE_SKBUFF_HEADER_HELPERS */ + +#endif -- 1.7.10.4 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev