Hello,
thank you for taking a look at it.
</snip>
>
> I think there is a use after free in you diff. After you return
> from pfsync_delete_tdb() you must not access the TDB again.
>
> Comments inline.
>
</snip>
> >
> > TAILQ_INIT(&sn->sn_upd_req_list);
> > - TAILQ_CONCAT(&sn->sn_upd_req_list, &sc->sc_upd_req_list, ur_entry);
> > + while (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
> > + ur = TAILQ_FIRST(&sc->sc_upd_req_list);
>
> Other loops have this idiom
> while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list) != NULL) {
>
new diff version uses 'TAILQ_FIRST()'
</snip>
> > @@ -1827,18 +1853,20 @@ pfsync_sendout(void)
> > subh = (struct pfsync_subheader *)(m->m_data + offset);
> > offset += sizeof(*subh);
> >
> > - mtx_enter(&sc->sc_tdb_mtx);
> > count = 0;
> > while ((t = TAILQ_FIRST(&sn.sn_tdb_q)) != NULL) {
> > - TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_entry);
> > + TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_snap);
>
> I think the TDB in tdb_sync_snap list may be freed. Maybe
> you should grab a refcount if you store them into a list.
>
I see. pfsync must grab the reference count in pfsync_update_tdb(),
where tdb entry is inserted into queue. new diff fixes that.
> > pfsync_out_tdb(t, m->m_data + offset);
> > offset += sizeof(struct pfsync_tdb);
> > +#ifdef PFSYNC_DEBUG
> > + KASSERT(t->tdb_snapped == 1);
> > +#endif
> > + t->tdb_snapped = 0;
>
> This may also be use after free.
new diffs drops the reference as soon as pfsync(4) dispatches
the tdb into pfsync packet.
</snip>
> > @@ -2525,7 +2565,17 @@ pfsync_delete_tdb(struct tdb *t)
> >
> > mtx_enter(&sc->sc_tdb_mtx);
> >
> > + /*
> > + * if tdb entry is just being processed (found in snapshot),
> > + * then it can not be deleted. we just came too late
> > + */
> > + if (t->tdb_snapped) {
> > + mtx_leave(&sc->sc_tdb_mtx);
>
> You must not access the TDB after this point. I thnik you cannot
> guarantee that. The memory will freed after return.
new diff fixes that
</snip>
> > diff --git a/sys/netinet/ip_ipsp.h b/sys/netinet/ip_ipsp.h
> > index c697994047b..ebdb6ada1ae 100644
> > --- a/sys/netinet/ip_ipsp.h
> > +++ b/sys/netinet/ip_ipsp.h
> > @@ -405,6 +405,7 @@ struct tdb { /* tunnel
> > descriptor block */
> > u_int8_t tdb_wnd; /* Replay window */
> > u_int8_t tdb_satype; /* SA type (RFC2367, PF_KEY) */
> > u_int8_t tdb_updates; /* pfsync update counter */
> > + u_int8_t tdb_snapped; /* dispatched by pfsync(4) */
>
> u_int8_t is not atomic. I you want to change tdb_snapped, you need
> a mutex that also protects the othere fields in the same 32 bit
> word everywhere. I think a new flag TDBF_PFSYNC_SNAPPED in tdb_flags
> would be easier. The tdb_flags are protected by tdb_mtx.
>
I like your idea.
updated diff is below.
thanks and
regards
sashan
--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/net/if_pfsync.c b/sys/net/if_pfsync.c
index cb0f3fbdf52..536c3f9cb70 100644
--- a/sys/net/if_pfsync.c
+++ b/sys/net/if_pfsync.c
@@ -181,6 +181,7 @@ void pfsync_q_del(struct pf_state *);
struct pfsync_upd_req_item {
TAILQ_ENTRY(pfsync_upd_req_item) ur_entry;
+ TAILQ_ENTRY(pfsync_upd_req_item) ur_snap;
struct pfsync_upd_req ur_msg;
};
TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
@@ -295,7 +296,7 @@ void pfsync_bulk_update(void *);
void pfsync_bulk_fail(void *);
void pfsync_grab_snapshot(struct pfsync_snapshot *, struct pfsync_softc *);
-void pfsync_drop_snapshot(struct pfsync_snapshot *, struct pfsync_softc *);
+void pfsync_drop_snapshot(struct pfsync_snapshot *);
void pfsync_send_dispatch(void *);
void pfsync_send_pkt(struct mbuf *);
@@ -422,8 +423,7 @@ pfsync_clone_destroy(struct ifnet *ifp)
sc->sc_deferred = 0;
mtx_leave(&sc->sc_deferrals_mtx);
- while (!TAILQ_EMPTY(&deferrals)) {
- pd = TAILQ_FIRST(&deferrals);
+ while ((pd = TAILQ_FIRST(&deferrals)) != NULL) {
TAILQ_REMOVE(&deferrals, pd, pd_entry);
pfsync_undefer(pd, 0);
}
@@ -1574,6 +1574,9 @@ void
pfsync_grab_snapshot(struct pfsync_snapshot *sn, struct pfsync_softc *sc)
{
int q;
+ struct pf_state *st;
+ struct pfsync_upd_req_item *ur;
+ struct tdb *tdb;
sn->sn_sc = sc;
@@ -1583,14 +1586,36 @@ pfsync_grab_snapshot(struct pfsync_snapshot *sn, struct
pfsync_softc *sc)
for (q = 0; q < PFSYNC_S_COUNT; q++) {
TAILQ_INIT(&sn->sn_qs[q]);
- TAILQ_CONCAT(&sn->sn_qs[q], &sc->sc_qs[q], sync_list);
+
+ while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) {
+#ifdef PFSYNC_DEBUG
+ KASSERT(st->snapped == 0);
+#endif
+ TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
+ TAILQ_INSERT_TAIL(&sn->sn_qs[q], st, sync_snap);
+ st->snapped = 1;
+ }
}
TAILQ_INIT(&sn->sn_upd_req_list);
- TAILQ_CONCAT(&sn->sn_upd_req_list, &sc->sc_upd_req_list, ur_entry);
+ while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
+ TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
+ TAILQ_INSERT_TAIL(&sn->sn_upd_req_list, ur, ur_snap);
+ }
TAILQ_INIT(&sn->sn_tdb_q);
- TAILQ_CONCAT(&sn->sn_tdb_q, &sc->sc_tdb_q, tdb_sync_entry);
+ while ((tdb = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) {
+ TAILQ_REMOVE(&sc->sc_tdb_q, tdb, tdb_sync_entry);
+ TAILQ_INSERT_TAIL(&sn->sn_tdb_q, tdb, tdb_sync_snap);
+
+ mtx_enter(&tdb->tdb_mtx);
+#ifdef PFSYNC_DEBUG
+ KASSERT(!ISSET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED));
+#endif
+ /* SET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED); */
+ SET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED);
+ mtx_leave(&tdb->tdb_mtx);
+ }
sn->sn_len = sc->sc_len;
sc->sc_len = PFSYNC_MINPKT;
@@ -1606,41 +1631,44 @@ pfsync_grab_snapshot(struct pfsync_snapshot *sn, struct
pfsync_softc *sc)
}
void
-pfsync_drop_snapshot(struct pfsync_snapshot *sn, struct pfsync_softc * sc)
+pfsync_drop_snapshot(struct pfsync_snapshot *sn)
{
struct pf_state *st;
struct pfsync_upd_req_item *ur;
struct tdb *t;
int q;
-
for (q = 0; q < PFSYNC_S_COUNT; q++) {
if (TAILQ_EMPTY(&sn->sn_qs[q]))
continue;
while ((st = TAILQ_FIRST(&sn->sn_qs[q])) != NULL) {
- TAILQ_REMOVE(&sn->sn_qs[q], st, sync_list);
#ifdef PFSYNC_DEBUG
KASSERT(st->sync_state == q);
+ KASSERT(st->snapped == 1);
#endif
+ TAILQ_REMOVE(&sn->sn_qs[q], st, sync_snap);
st->sync_state = PFSYNC_S_NONE;
+ st->snapped = 0;
pf_state_unref(st);
}
}
while ((ur = TAILQ_FIRST(&sn->sn_upd_req_list)) != NULL) {
- TAILQ_REMOVE(&sn->sn_upd_req_list, ur, ur_entry);
+ TAILQ_REMOVE(&sn->sn_upd_req_list, ur, ur_snap);
pool_put(&sn->sn_sc->sc_pool, ur);
}
- mtx_enter(&sc->sc_tdb_mtx);
while ((t = TAILQ_FIRST(&sn->sn_tdb_q)) != NULL) {
- TAILQ_REMOVE(&sn->sn_tdb_q, t, tdb_sync_entry);
+ TAILQ_REMOVE(&sn->sn_tdb_q, t, tdb_sync_snap);
mtx_enter(&t->tdb_mtx);
+#ifdef PFSYNC_DEBUG
+ KASSERT(ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED));
+#endif
+ CLR(t->tdb_flags, TDBF_PFSYNC_SNAPPED);
CLR(t->tdb_flags, TDBF_PFSYNC);
mtx_leave(&t->tdb_mtx);
}
- mtx_leave(&sc->sc_tdb_mtx);
}
int
@@ -1667,7 +1695,7 @@ pfsync_drop(struct pfsync_softc *sc)
struct pfsync_snapshot sn;
pfsync_grab_snapshot(&sn, sc);
- pfsync_drop_snapshot(&sn, sc);
+ pfsync_drop_snapshot(&sn);
}
void
@@ -1759,7 +1787,7 @@ pfsync_sendout(void)
if (m == NULL) {
sc->sc_if.if_oerrors++;
pfsyncstat_inc(pfsyncs_onomem);
- pfsync_drop_snapshot(&sn, sc);
+ pfsync_drop_snapshot(&sn);
return;
}
@@ -1769,7 +1797,7 @@ pfsync_sendout(void)
m_free(m);
sc->sc_if.if_oerrors++;
pfsyncstat_inc(pfsyncs_onomem);
- pfsync_drop_snapshot(&sn, sc);
+ pfsync_drop_snapshot(&sn);
return;
}
}
@@ -1799,7 +1827,7 @@ pfsync_sendout(void)
count = 0;
while ((ur = TAILQ_FIRST(&sn.sn_upd_req_list)) != NULL) {
- TAILQ_REMOVE(&sn.sn_upd_req_list, ur, ur_entry);
+ TAILQ_REMOVE(&sn.sn_upd_req_list, ur, ur_snap);
bcopy(&ur->ur_msg, m->m_data + offset,
sizeof(ur->ur_msg));
@@ -1827,18 +1855,21 @@ pfsync_sendout(void)
subh = (struct pfsync_subheader *)(m->m_data + offset);
offset += sizeof(*subh);
- mtx_enter(&sc->sc_tdb_mtx);
count = 0;
while ((t = TAILQ_FIRST(&sn.sn_tdb_q)) != NULL) {
- TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_entry);
+ TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_snap);
pfsync_out_tdb(t, m->m_data + offset);
offset += sizeof(struct pfsync_tdb);
mtx_enter(&t->tdb_mtx);
+#ifdef PFSYNC_DEBUG
+ KASSERT(ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED));
+#endif
+ CLR(t->tdb_flags, TDBF_PFSYNC_SNAPPED);
CLR(t->tdb_flags, TDBF_PFSYNC);
mtx_leave(&t->tdb_mtx);
+ tdb_unref(t);
count++;
}
- mtx_leave(&sc->sc_tdb_mtx);
bzero(subh, sizeof(*subh));
subh->action = PFSYNC_ACT_TDB;
@@ -1856,11 +1887,13 @@ pfsync_sendout(void)
count = 0;
while ((st = TAILQ_FIRST(&sn.sn_qs[q])) != NULL) {
- TAILQ_REMOVE(&sn.sn_qs[q], st, sync_list);
+ TAILQ_REMOVE(&sn.sn_qs[q], st, sync_snap);
#ifdef PFSYNC_DEBUG
KASSERT(st->sync_state == q);
+ KASSERT(st->snapped == 1);
#endif
st->sync_state = PFSYNC_S_NONE;
+ st->snapped = 0;
pfsync_qs[q].write(st, m->m_data + offset);
offset += pfsync_qs[q].len;
@@ -2411,8 +2444,9 @@ pfsync_q_ins(struct pf_state *st, int q)
mtx_enter(&sc->sc_st_mtx);
/*
- * If two threads are competing to insert the same state, then
- * there must be just single winner.
+ * There are either two threads trying to update the
+ * the same state, or the state is just being processed
+ * (is on snapshot queue).
*/
if (st->sync_state != PFSYNC_S_NONE) {
mtx_leave(&sc->sc_st_mtx);
@@ -2450,6 +2484,15 @@ pfsync_q_del(struct pf_state *st)
mtx_enter(&sc->sc_st_mtx);
q = st->sync_state;
+ /*
+ * re-check under mutex
+ * if state is snapped already, then just bail out, because we came
+ * too late, the state is being just processed/dispatched to peer.
+ */
+ if ((q == PFSYNC_S_NONE) || (st->snapped)) {
+ mtx_leave(&sc->sc_st_mtx);
+ return;
+ }
atomic_sub_long(&sc->sc_len, pfsync_qs[q].len);
TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
if (TAILQ_EMPTY(&sc->sc_qs[q]))
@@ -2495,6 +2538,7 @@ pfsync_update_tdb(struct tdb *t, int output)
}
TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
+ tdb_ref(t);
SET(t->tdb_flags, TDBF_PFSYNC);
mtx_leave(&t->tdb_mtx);
@@ -2525,11 +2569,23 @@ pfsync_delete_tdb(struct tdb *t)
mtx_enter(&sc->sc_tdb_mtx);
+ /*
+ * if tdb entry is just being processed (found in snapshot),
+ * then it can not be deleted. we just came too late
+ */
+ if (ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED)) {
+ mtx_leave(&sc->sc_tdb_mtx);
+ return;
+ }
+
TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
+
mtx_enter(&t->tdb_mtx);
CLR(t->tdb_flags, TDBF_PFSYNC);
mtx_leave(&t->tdb_mtx);
+ tdb_unref(t);
+
nlen = sizeof(struct pfsync_tdb);
if (TAILQ_EMPTY(&sc->sc_tdb_q))
nlen += sizeof(struct pfsync_subheader);
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index bd7ec1d88e7..558618a0f14 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -749,6 +749,7 @@ struct pf_state {
u_int8_t pad[3];
TAILQ_ENTRY(pf_state) sync_list;
+ TAILQ_ENTRY(pf_state) sync_snap;
TAILQ_ENTRY(pf_state) entry_list;
SLIST_ENTRY(pf_state) gc_list;
RB_ENTRY(pf_state) entry_id;
@@ -797,6 +798,7 @@ struct pf_state {
pf_refcnt_t refcnt;
u_int16_t delay;
u_int8_t rt;
+ u_int8_t snapped;
};
/*
diff --git a/sys/netinet/ip_ipsp.h b/sys/netinet/ip_ipsp.h
index c697994047b..fa46a1e1282 100644
--- a/sys/netinet/ip_ipsp.h
+++ b/sys/netinet/ip_ipsp.h
@@ -355,6 +355,7 @@ struct tdb { /* tunnel
descriptor block */
#define TDBF_PFSYNC 0x40000 /* TDB will be synced */
#define TDBF_PFSYNC_RPL 0x80000 /* Replay counter should be
bumped */
#define TDBF_ESN 0x100000 /* 64-bit sequence numbers
(ESN) */
+#define TDBF_PFSYNC_SNAPPED 0x200000 /* entry is being dispatched
to peer */
#define TDBF_BITS ("\20" \
"\1UNIQUE\2TIMER\3BYTES\4ALLOCATIONS" \
@@ -439,6 +440,7 @@ struct tdb { /* tunnel
descriptor block */
TAILQ_HEAD(tdb_policy_head, ipsec_policy) tdb_policy_head; /* [p] */
TAILQ_ENTRY(tdb) tdb_sync_entry;
+ TAILQ_ENTRY(tdb) tdb_sync_snap;
};
enum tdb_counters {