On Mon, Mar 04, 2013 at 11:37:32PM -0500, Lawrence Teo wrote:
> Brief background: divert(4) sockets can be used to send packets to a
> userspace program. The program can inspect the packets and decide to
> either reinject them back into the kernel or drop them.
>
> According to the divert(4) man page, "The packets' checksums are
> recalculated upon reinjection." This makes sense, because the userspace
> program could have modified the packet.
>
> However, in my tests, I found that the checksums are not actually
> recalculated upon reinjection. I ran into this bug when trying to use
> divert-packet PF rules together with nat-to and rdr-to, e.g.:
>
> match out on em5 inet nat-to (em5:0)
> pass out on em5 proto tcp to port 80 divert-packet port 700
> pass in on em5 proto tcp to port 22 divert-packet port 700 rdr-to
> 192.168.30.8
>
> With the above rules, inbound packets would drop and "netstat -p ip -s"
> shows that "bad header checksums" consistently increments by one for
> every inbound packet.
>
> The diff below fixes this bug by making divert(4) recalculate the IP(v4)
> and TCP/UDP/ICMP/ICMPv6 checksums of reinjected packets on both IPv4 and
> IPv6 (done with a ton of help and feedback from blambert@ who reviewed
> many versions of this diff, thank you!).
>
> If you are using divert(4), could you please test this diff to ensure
> that it does not break your setup? Note that this diff only applies to
> divert-packet PF rules, not divert-to/divert-reply.
>
> I am also looking for feedback from developers to review the approach
> and code that was used to fix this bug.
This revised diff uses ip6_lasthdr() to walk the IPv6 header chain,
which makes the diff a lot shorter and simpler.
The IPv4 part remains unchanged from the original diff that I sent last
week.
Thank you to those who have tested so far; more tests and feedback
welcome!
Lawrence
Index: netinet/ip_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.10
diff -u -p -r1.10 ip_divert.c
--- netinet/ip_divert.c 21 Oct 2012 13:06:03 -0000 1.10
+++ netinet/ip_divert.c 12 Mar 2013 03:50:15 -0000
@@ -37,6 +37,9 @@
#include <netinet/ip_var.h>
#include <netinet/in_pcb.h>
#include <netinet/ip_divert.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
struct inpcbtable divbtable;
struct divstat divstat;
@@ -83,8 +86,12 @@ divert_output(struct mbuf *m, ...)
struct sockaddr_in *sin;
struct socket *so;
struct ifaddr *ifa;
- int s, error = 0;
+ int s, error = 0, p_hdrlen = 0;
va_list ap;
+ struct ip *ip;
+ u_int16_t off, csum = 0;
+ u_int8_t nxt;
+ size_t p_off = 0;
va_start(ap, m);
inp = va_arg(ap, struct inpcb *);
@@ -102,15 +109,68 @@ divert_output(struct mbuf *m, ...)
sin = mtod(nam, struct sockaddr_in *);
so = inp->inp_socket;
+ /* Do basic sanity checks. */
+ if (m->m_pkthdr.len < sizeof(struct ip))
+ goto fail;
+ if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
+ /* m_pullup() has freed the mbuf, so just return. */
+ divstat.divs_errors++;
+ return (ENOBUFS);
+ }
+ ip = mtod(m, struct ip *);
+ if (ip->ip_v != IPVERSION)
+ goto fail;
+ off = ip->ip_hl << 2;
+ if (off < sizeof(struct ip) || ntohs(ip->ip_len) < off ||
+ m->m_pkthdr.len < ntohs(ip->ip_len))
+ goto fail;
+
+ /*
+ * Recalculate IP and protocol checksums since the userspace application
+ * may have modified the packet prior to reinjection.
+ */
+ ip->ip_sum = 0;
+ ip->ip_sum = in_cksum(m, off);
+ nxt = ip->ip_p;
+ switch (ip->ip_p) {
+ case IPPROTO_TCP:
+ p_hdrlen = sizeof(struct tcphdr);
+ p_off = offsetof(struct tcphdr, th_sum);
+ break;
+ case IPPROTO_UDP:
+ p_hdrlen = sizeof(struct udphdr);
+ p_off = offsetof(struct udphdr, uh_sum);
+ break;
+ case IPPROTO_ICMP:
+ p_hdrlen = sizeof(struct icmp);
+ p_off = offsetof(struct icmp, icmp_cksum);
+ nxt = 0;
+ break;
+ default:
+ /* nothing */
+ break;
+ }
+ if (p_hdrlen) {
+ if (m->m_pkthdr.len < off + p_hdrlen)
+ goto fail;
+
+ if ((error = m_copyback(m, off + p_off, sizeof(csum), &csum,
M_NOWAIT)))
+ goto fail;
+ csum = in4_cksum(m, nxt, off, m->m_pkthdr.len - off);
+ if (ip->ip_p == IPPROTO_UDP && csum == 0)
+ csum = 0xffff;
+ if ((error = m_copyback(m, off + p_off, sizeof(csum), &csum,
M_NOWAIT)))
+ goto fail;
+ }
+
m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED_PACKET;
if (sin->sin_addr.s_addr != INADDR_ANY) {
ipaddr.sin_addr = sin->sin_addr;
ifa = ifa_ifwithaddr(sintosa(&ipaddr), m->m_pkthdr.rdomain);
if (ifa == NULL) {
- divstat.divs_errors++;
- m_freem(m);
- return (EADDRNOTAVAIL);
+ error = EADDRNOTAVAIL;
+ goto fail;
}
m->m_pkthdr.rcvif = ifa->ifa_ifp;
@@ -131,6 +191,11 @@ divert_output(struct mbuf *m, ...)
divstat.divs_opackets++;
return (error);
+
+fail:
+ m_freem(m);
+ divstat.divs_errors++;
+ return (error ? error : EINVAL);
}
int
Index: netinet6/ip6_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_divert.c,v
retrieving revision 1.8
diff -u -p -r1.8 ip6_divert.c
--- netinet6/ip6_divert.c 6 Nov 2012 12:32:42 -0000 1.8
+++ netinet6/ip6_divert.c 12 Mar 2013 15:36:52 -0000
@@ -38,8 +38,10 @@
#include <netinet/in_pcb.h>
#include <netinet/ip6.h>
#include <netinet6/in6_var.h>
-#include <netinet6/in6_var.h>
#include <netinet6/ip6_divert.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/icmp6.h>
struct inpcbtable divb6table;
struct div6stat div6stat;
@@ -88,8 +90,11 @@ divert6_output(struct mbuf *m, ...)
struct sockaddr_in6 *sin6;
struct socket *so;
struct ifaddr *ifa;
- int s, error = 0;
+ int s, error = 0, p_hdrlen = 0, nxt = 0, off;
va_list ap;
+ struct ip6_hdr *ip6;
+ u_int16_t csum = 0;
+ size_t p_off = 0;
va_start(ap, m);
inp = va_arg(ap, struct inpcb *);
@@ -107,15 +112,65 @@ divert6_output(struct mbuf *m, ...)
sin6 = mtod(nam, struct sockaddr_in6 *);
so = inp->inp_socket;
+ /* Do basic sanity checks. */
+ if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
+ goto fail;
+ if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
+ /* m_pullup() has freed the mbuf, so just return. */
+ div6stat.divs_errors++;
+ return (ENOBUFS);
+ }
+ ip6 = mtod(m, struct ip6_hdr *);
+ if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
+ goto fail;
+ if (m->m_pkthdr.len < sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen))
+ goto fail;
+
+ /*
+ * Recalculate the protocol checksum since the userspace application
+ * may have modified the packet prior to reinjection.
+ */
+ off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
+ if (off < sizeof(struct ip6_hdr))
+ goto fail;
+ switch (nxt) {
+ case IPPROTO_TCP:
+ p_hdrlen = sizeof(struct tcphdr);
+ p_off = offsetof(struct tcphdr, th_sum);
+ break;
+ case IPPROTO_UDP:
+ p_hdrlen = sizeof(struct udphdr);
+ p_off = offsetof(struct udphdr, uh_sum);
+ break;
+ case IPPROTO_ICMPV6:
+ p_hdrlen = sizeof(struct icmp6_hdr);
+ p_off = offsetof(struct icmp6_hdr, icmp6_cksum);
+ break;
+ default:
+ /* nothing */
+ break;
+ }
+ if (p_hdrlen) {
+ if (m->m_pkthdr.len < off + p_hdrlen)
+ goto fail;
+
+ if ((error = m_copyback(m, off + p_off, sizeof(csum), &csum,
M_NOWAIT)))
+ goto fail;
+ csum = in6_cksum(m, nxt, off, m->m_pkthdr.len - off);
+ if (nxt == IPPROTO_UDP && csum == 0)
+ csum = 0xffff;
+ if ((error = m_copyback(m, off + p_off, sizeof(csum), &csum,
M_NOWAIT)))
+ goto fail;
+ }
+
m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED_PACKET;
if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
ip6addr.sin6_addr = sin6->sin6_addr;
ifa = ifa_ifwithaddr(sin6tosa(&ip6addr), m->m_pkthdr.rdomain);
if (ifa == NULL) {
- div6stat.divs_errors++;
- m_freem(m);
- return (EADDRNOTAVAIL);
+ error = EADDRNOTAVAIL;
+ goto fail;
}
m->m_pkthdr.rcvif = ifa->ifa_ifp;
@@ -133,6 +188,11 @@ divert6_output(struct mbuf *m, ...)
div6stat.divs_opackets++;
return (error);
+
+fail:
+ div6stat.divs_errors++;
+ m_freem(m);
+ return (error ? error : EINVAL);
}
int