Hi,

Here is my double linkage feature between pf states and sockets.
Henning has already implemented much of it.

The additional part is:
- The pf state lookup for outgoing packets is optimized by using
  mbuf->inp->state when possible.
- Outgoing packets from sockets transfer their inp in the mbuf.
  That allows the linkage beginning with the first packet for
  outgoing connections.
- In case of divert states, delete the state when the socket closes.
  Otherwise new connections could match on old states instead of
  being diverted to the listen socket.  (Should we also do this for
  regular non-divert states?)
- For incomming tcp, udp, raw, raw6 the socket lookup always is
  optimized by using the mbuf->state->inp.
- All protocols establish the link for incomming packets.
- All protocols set the inp in the mbuf for outgoing packets.

ok?

bluhm


Index: net/pf.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
retrieving revision 1.740
diff -u -p -r1.740 pf.c
--- net/pf.c    12 Apr 2011 10:47:29 -0000      1.740
+++ net/pf.c    21 Apr 2011 20:58:40 -0000
@@ -953,6 +953,9 @@ pf_find_state(struct pfi_kif *kif, struc
        if (dir == PF_OUT && m->m_pkthdr.pf.statekey &&
            ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse)
                sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse;
+       else if (dir == PF_OUT && m->m_pkthdr.pf.inp &&
+          ((struct inpcb *)m->m_pkthdr.pf.inp)->inp_pf_sk)
+              sk = ((struct inpcb *)m->m_pkthdr.pf.inp)->inp_pf_sk;
        else {
                if ((sk = RB_FIND(pf_state_tree, &pf_statetbl,
                    (struct pf_state_key *)key)) == NULL)
@@ -963,11 +966,16 @@ pf_find_state(struct pfi_kif *kif, struc
                        ((struct pf_state_key *)
                            m->m_pkthdr.pf.statekey)->reverse = sk;
                        sk->reverse = m->m_pkthdr.pf.statekey;
+               } else if (dir == PF_OUT && m->m_pkthdr.pf.inp && !sk->inp) {
+                       ((struct inpcb *)m->m_pkthdr.pf.inp)->inp_pf_sk = sk;
+                       sk->inp = m->m_pkthdr.pf.inp;
                }
        }
 
-       if (dir == PF_OUT)
+       if (dir == PF_OUT) {
                m->m_pkthdr.pf.statekey = NULL;
+               m->m_pkthdr.pf.inp = NULL;
+       }
 
        /* list is sorted, if-bound states before floating ones */
        TAILQ_FOREACH(si, &sk->states, entry)
@@ -5938,6 +5946,13 @@ done:
 
        if (dir == PF_IN && s && s->key[PF_SK_STACK])
                m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK];
+       if (dir == PF_OUT && m->m_pkthdr.pf.inp &&
+           !((struct inpcb *)m->m_pkthdr.pf.inp)->inp_pf_sk &&
+           s && s->key[PF_SK_STACK] && !s->key[PF_SK_STACK]->inp) {
+               ((struct inpcb *)m->m_pkthdr.pf.inp)->inp_pf_sk =
+                   s->key[PF_SK_STACK];
+               s->key[PF_SK_STACK]->inp = m->m_pkthdr.pf.inp;
+       }
 
 #ifdef ALTQ
        if (action == PF_PASS && qid) {
@@ -6223,6 +6238,13 @@ done:
 
        if (dir == PF_IN && s && s->key[PF_SK_STACK])
                m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK];
+       if (dir == PF_OUT && m->m_pkthdr.pf.inp &&
+           !((struct inpcb *)m->m_pkthdr.pf.inp)->inp_pf_sk &&
+           s && s->key[PF_SK_STACK] && !s->key[PF_SK_STACK]->inp) {
+               ((struct inpcb *)m->m_pkthdr.pf.inp)->inp_pf_sk =
+                   s->key[PF_SK_STACK];
+               s->key[PF_SK_STACK]->inp = m->m_pkthdr.pf.inp;
+       }
 
 #ifdef ALTQ
        if (action == PF_PASS && qid) {
@@ -6319,4 +6341,5 @@ void
 pf_pkt_addr_changed(struct mbuf *m)
 {
        m->m_pkthdr.pf.statekey = NULL;
+       m->m_pkthdr.pf.inp = NULL;
 }
Index: netinet/in_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.116
diff -u -p -r1.116 in_pcb.c
--- netinet/in_pcb.c    19 Apr 2011 03:47:29 -0000      1.116
+++ netinet/in_pcb.c    21 Apr 2011 17:24:11 -0000
@@ -509,8 +509,23 @@ in_pcbdetach(v)
        splx(s);
 #endif
 #if NPF > 0
-       if (inp->inp_pf_sk)
-               ((struct pf_state_key *)inp->inp_pf_sk)->inp = NULL;
+       if (inp->inp_pf_sk) {
+               struct pf_state_key     *sk;
+               struct pf_state_item    *si;
+
+               s = splsoftnet();
+               sk = (struct pf_state_key *)inp->inp_pf_sk;
+               TAILQ_FOREACH(si, &sk->states, entry)
+                       if (sk == si->s->key[PF_SK_STACK] && si->s->rule.ptr &&
+                           si->s->rule.ptr->divert.port) {
+                               pf_unlink_state(si->s);
+                               break;
+                       }
+               /* pf_unlink_state() may have detached the state */
+               if (inp->inp_pf_sk)
+                       ((struct pf_state_key *)inp->inp_pf_sk)->inp = NULL;
+               splx(s);
+       }
 #endif
        s = splnet();
        LIST_REMOVE(inp, inp_lhash);
Index: netinet/raw_ip.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/raw_ip.c,v
retrieving revision 1.54
diff -u -p -r1.54 raw_ip.c
--- netinet/raw_ip.c    19 Apr 2011 03:47:29 -0000      1.54
+++ netinet/raw_ip.c    21 Apr 2011 17:24:12 -0000
@@ -157,6 +157,16 @@ rip_input(struct mbuf *m, ...)
                if (inp->inp_faddr.s_addr &&
                    inp->inp_faddr.s_addr != ip->ip_src.s_addr)
                        continue;
+#if NPF > 0
+               if (m->m_pkthdr.pf.statekey && !inp->inp_pf_sk &&
+                   !((struct pf_state_key *)m->m_pkthdr.pf.statekey)->inp &&
+                   (inp->inp_socket->so_state & SS_ISCONNECTED) &&
+                   ip->ip_p != IPPROTO_ICMP) {
+                       ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->inp =
+                           inp;
+                       inp->inp_pf_sk = m->m_pkthdr.pf.statekey;
+               }
+#endif
                if (last) {
                        struct mbuf *n;
 
@@ -277,6 +287,11 @@ rip_output(struct mbuf *m, ...)
        /* force routing domain */
        m->m_pkthdr.rdomain = inp->inp_rtableid;
 
+#if NPF > 0
+       if (inp->inp_socket->so_state & SS_ISCONNECTED &&
+           ip->ip_p != IPPROTO_ICMP)
+               m->m_pkthdr.pf.inp = inp;
+#endif
        error = ip_output(m, inp->inp_options, &inp->inp_route, flags,
            inp->inp_moptions, inp);
        if (error == EACCES)    /* translate pf(4) error for userland */
Index: netinet/tcp_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.245
diff -u -p -r1.245 tcp_input.c
--- netinet/tcp_input.c 12 Apr 2011 10:47:29 -0000      1.245
+++ netinet/tcp_input.c 21 Apr 2011 17:24:12 -0000
@@ -895,7 +895,8 @@ after_listen:
 #endif
 
 #if NPF > 0
-       if (m->m_pkthdr.pf.statekey) {
+       if (m->m_pkthdr.pf.statekey && !inp->inp_pf_sk &&
+           !((struct pf_state_key *)m->m_pkthdr.pf.statekey)->inp) {
                ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->inp = inp;
                inp->inp_pf_sk = m->m_pkthdr.pf.statekey;
        }
@@ -1338,6 +1339,19 @@ trimthenstep6:
                    ((opti.ts_present &&
                    TSTMP_LT(tp->ts_recent, opti.ts_val)) ||
                    SEQ_GT(th->th_seq, tp->rcv_nxt))) {
+#if NPF > 0
+                       /*
+                        * The socket will be recreated but the new state
+                        * has already been linked to the socket.  Remove the
+                        * link between old socket and new state.  Otherwise
+                        * closing the socket would remove the state.
+                        */
+                       if (inp->inp_pf_sk) {
+                               ((struct pf_state_key *)inp->inp_pf_sk)->inp =
+                                   NULL;
+                               inp->inp_pf_sk = NULL;
+                       }
+#endif
                        /*
                        * Advance the iss by at least 32768, but
                        * clear the msb in order to make sure
Index: netinet/tcp_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_output.c,v
retrieving revision 1.94
diff -u -p -r1.94 tcp_output.c
--- netinet/tcp_output.c        5 Apr 2011 18:01:21 -0000       1.94
+++ netinet/tcp_output.c        11 Apr 2011 17:52:21 -0000
@@ -98,6 +98,8 @@
 #include <netinet6/in6_var.h>
 #endif /* INET6 */
 
+#include "pf.h"
+
 #ifdef notyet
 extern struct mbuf *m_copypack();
 #endif
@@ -1076,6 +1078,10 @@ send:
 
        /* force routing domain */
        m->m_pkthdr.rdomain = tp->t_inpcb->inp_rtableid;
+
+#if NPF > 0
+       m->m_pkthdr.pf.inp = tp->t_inpcb;
+#endif
 
        switch (tp->pf) {
        case 0: /*default to PF_INET*/
Index: netinet/udp_usrreq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.140
diff -u -p -r1.140 udp_usrreq.c
--- netinet/udp_usrreq.c        5 Apr 2011 18:01:21 -0000       1.140
+++ netinet/udp_usrreq.c        11 Apr 2011 17:52:21 -0000
@@ -560,7 +560,7 @@ udp_input(struct mbuf *m, ...)
        /*
         * Locate pcb for datagram.
         */
-#if 0
+#if NPF > 0
        if (m->m_pkthdr.pf.statekey)
                inp = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->inp;
 #endif
@@ -618,6 +618,15 @@ udp_input(struct mbuf *m, ...)
                }
        }
 
+#if NPF > 0
+       if (m->m_pkthdr.pf.statekey && !inp->inp_pf_sk &&
+           !((struct pf_state_key *)m->m_pkthdr.pf.statekey)->inp &&
+           (inp->inp_socket->so_state & SS_ISCONNECTED)) {
+               ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->inp = inp;
+               inp->inp_pf_sk = m->m_pkthdr.pf.statekey;
+       }
+#endif
+
 #ifdef IPSEC
        mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
        s = splnet();
@@ -1032,6 +1041,10 @@ udp_output(struct mbuf *m, ...)
        /* force routing domain */
        m->m_pkthdr.rdomain = inp->inp_rtableid;
 
+#if NPF > 0
+       if (inp->inp_socket->so_state & SS_ISCONNECTED)
+               m->m_pkthdr.pf.inp = inp;
+#endif
        error = ip_output(m, inp->inp_options, &inp->inp_route,
            inp->inp_socket->so_options &
            (SO_DONTROUTE | SO_BROADCAST | SO_JUMBO),
Index: netinet6/raw_ip6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/raw_ip6.c,v
retrieving revision 1.41
diff -u -p -r1.41 raw_ip6.c
--- netinet6/raw_ip6.c  4 Apr 2011 11:07:18 -0000       1.41
+++ netinet6/raw_ip6.c  11 Apr 2011 17:52:22 -0000
@@ -61,6 +61,8 @@
  *     @(#)raw_ip.c    8.2 (Berkeley) 1/4/94
  */
 
+#include "pf.h"
+
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
@@ -75,6 +77,9 @@
 #include <net/if.h>
 #include <net/route.h>
 #include <net/if_types.h>
+#if NPF > 0
+#include <net/pfvar.h>
+#endif
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
@@ -200,6 +205,16 @@ rip6_input(struct mbuf **mp, int *offp, 
                                continue;
                        }
                }
+#if NPF > 0
+               if (m->m_pkthdr.pf.statekey && !in6p->inp_pf_sk &&
+                   !((struct pf_state_key *)m->m_pkthdr.pf.statekey)->inp &&
+                   (in6p->inp_socket->so_state & SS_ISCONNECTED) &&
+                   proto != IPPROTO_ICMPV6) {
+                       ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->inp =
+                           in6p;
+                       in6p->inp_pf_sk = m->m_pkthdr.pf.statekey;
+               }
+#endif
                if (last) {
                        struct  mbuf *n;
                        if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
@@ -487,6 +502,11 @@ rip6_output(struct mbuf *m, ...)
        if (in6p->in6p_flags & IN6P_MINMTU)
                flags |= IPV6_MINMTU;
 
+#if NPF > 0
+       if (in6p->inp_socket->so_state & SS_ISCONNECTED &&
+           so->so_proto->pr_protocol != IPPROTO_ICMPV6)
+               m->m_pkthdr.pf.inp = in6p;
+#endif
        error = ip6_output(m, optp, &in6p->in6p_route, flags,
            in6p->in6p_moptions, &oifp, in6p);
        if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
Index: sys/mbuf.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.150
diff -u -p -r1.150 mbuf.h
--- sys/mbuf.h  6 Apr 2011 19:15:34 -0000       1.150
+++ sys/mbuf.h  11 Apr 2011 18:05:58 -0000
@@ -78,6 +78,7 @@ struct m_hdr {
 struct pkthdr_pf {
        void            *hdr;           /* saved hdr pos in mbuf, for ECN */
        void            *statekey;      /* pf stackside statekey */
+       void            *inp;           /* connected pcb for outgoing packet */
        u_int32_t        qid;           /* queue id */
        u_int16_t        tag;           /* tag id */
        u_int8_t         flags;

Reply via email to