hi,
one of the last pfsync "improvements" (r1.180) by yours truly
introduced some nasty regressions for those running pfsync over
the crossover cable. i apologize for all the inconveniences.
peter hessler, kapeatanakis giannis and myself have been trying
very hard to get this fixed.
we have identified several problems introduced by the change:
1) failover when backup is rebooted: master has it's link down
so it's demoted, backup comes up, its link comes up, master
requests bulk update and demotes by 1 to 2, backup requests
bulk update and its demotion counter becomes 129; then rc
undemotes it by 128 and it stays at 1 until bulk update
finishes. here it preempts w/o having all the state info
from the master.
2) race between link state update on master and backup when
plugging back the cable can trigger failovers: because of
the demotion dance on the "link up" event, master can
notice the link coming up faster then the backup and request
a bulk update which would demote it further and trigger a
failover. when backup notices the link it'll demote itself
to the same level as the master and failover back again, but
this flipping will produce a considerable disruption in the
traffic flow.
to address this two problems we have developed two mechanisms:
1) initial demotion by 32 (arbitrary large number) on the first
bulk update that will last longer than the rc demotion and
prevent failovers w/o having the full state table.
2) not doing any demotion adjustments on the link up event and
treating a link down demotion as the bulk update start one
and undemoting when bulk update finishes (either normally or
with a timeout.
this change was thorously tested by phesseler and kapeatanakis.
more tests and oks are welcome.
this is a bit newer version of the diff that has the bulk update
cancelation code factored out in a separate function
pfsync_cancel_full_update for readability. it's also reused in
the code bringing the interface down.
diff --git sys/net/if_pfsync.c sys/net/if_pfsync.c
index 534c9d3..60f9939 100644
--- sys/net/if_pfsync.c
+++ sys/net/if_pfsync.c
@@ -210,6 +210,9 @@ struct pfsync_softc {
struct pfsync_upd_reqs sc_upd_req_list;
+ int sc_initial_bulk;
+ int sc_link_demoted;
+
int sc_defer;
struct pfsync_deferrals sc_deferrals;
u_int sc_deferred;
@@ -254,6 +257,7 @@ void pfsync_deferred(struct pf_state *, int);
void pfsync_undefer(struct pfsync_deferral *, int);
void pfsync_defer_tmo(void *);
+void pfsync_cancel_full_update(struct pfsync_softc *);
void pfsync_request_full_update(struct pfsync_softc *);
void pfsync_request_update(u_int32_t, u_int64_t);
void pfsync_update_state_req(struct pf_state *);
@@ -357,6 +361,8 @@ pfsync_clone_destroy(struct ifnet *ifp)
#if NCARP > 0
if (!pfsync_sync_ok)
carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
+ if (sc->sc_link_demoted)
+ carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
#endif
if (sc->sc_lhcookie != NULL)
hook_disestablish(
@@ -414,29 +420,28 @@ pfsync_syncdev_state(void *arg)
{
struct pfsync_softc *sc = arg;
- if (!sc->sc_sync_if)
+ if (!sc->sc_sync_if && !(sc->sc_if.if_flags & IFF_UP))
return;
- if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN ||
- !(sc->sc_sync_if->if_flags & IFF_UP)) {
+ if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN) {
sc->sc_if.if_flags &= ~IFF_RUNNING;
+ if (!sc->sc_link_demoted) {
#if NCARP > 0
- carp_group_demote_adj(&sc->sc_if, 1, "pfsyncdev");
+ carp_group_demote_adj(&sc->sc_if, 1,
+ "pfsync link state down");
#endif
+ sc->sc_link_demoted = 1;
+ }
+
/* drop everything */
timeout_del(&sc->sc_tmo);
pfsync_drop(sc);
- /* cancel bulk update */
- timeout_del(&sc->sc_bulk_tmo);
- sc->sc_bulk_next = NULL;
- sc->sc_bulk_last = NULL;
- } else {
+ pfsync_cancel_full_update(sc);
+ } else if (sc->sc_link_demoted) {
sc->sc_if.if_flags |= IFF_RUNNING;
+
pfsync_request_full_update(sc);
-#if NCARP > 0
- carp_group_demote_adj(&sc->sc_if, -1, "pfsyncdev");
-#endif
}
}
@@ -1149,9 +1154,17 @@ pfsync_in_bus(caddr_t buf, int len, int count, int flags)
#if NCARP > 0
if (!pfsync_sync_ok)
carp_group_demote_adj(&sc->sc_if, -1,
+ sc->sc_link_demoted ?
+ "pfsync link state up" :
"pfsync bulk done");
+ if (sc->sc_initial_bulk) {
+ carp_group_demote_adj(&sc->sc_if, -32,
+ "pfsync init");
+ sc->sc_initial_bulk = 0;
+ }
#endif
pfsync_sync_ok = 1;
+ sc->sc_link_demoted = 0;
DPFPRINTF(LOG_INFO, "received valid bulk update end");
} else {
DPFPRINTF(LOG_WARNING, "received invalid "
@@ -1270,20 +1283,27 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
if ((ifp->if_flags & IFF_RUNNING) == 0 &&
(ifp->if_flags & IFF_UP)) {
ifp->if_flags |= IFF_RUNNING;
+
+ sc->sc_initial_bulk = 1;
+ carp_group_demote_adj(&sc->sc_if, 32, "pfsync init");
+
pfsync_request_full_update(sc);
}
if ((ifp->if_flags & IFF_RUNNING) &&
(ifp->if_flags & IFF_UP) == 0) {
ifp->if_flags &= ~IFF_RUNNING;
+ if (sc->sc_initial_bulk) {
+ carp_group_demote_adj(&sc->sc_if, -32,
+ "pfsync init");
+ sc->sc_initial_bulk = 0;
+ }
+
/* drop everything */
timeout_del(&sc->sc_tmo);
pfsync_drop(sc);
- /* cancel bulk update */
- timeout_del(&sc->sc_bulk_tmo);
- sc->sc_bulk_next = NULL;
- sc->sc_bulk_last = NULL;
+ pfsync_cancel_full_update(sc);
}
splx(s);
break;
@@ -1880,13 +1900,27 @@ pfsync_update_state(struct pf_state *st)
}
void
+pfsync_cancel_full_update(struct pfsync_softc *sc)
+{
+ if (timeout_pending(&sc->sc_bulkfail_tmo) ||
+ timeout_pending(&sc->sc_bulk_tmo))
+ DPFPRINTF(LOG_INFO, "cancelling bulk update");
+ timeout_del(&sc->sc_bulkfail_tmo);
+ timeout_del(&sc->sc_bulk_tmo);
+ sc->sc_bulk_next = NULL;
+ sc->sc_bulk_last = NULL;
+ sc->sc_ureq_sent = 0;
+ sc->sc_bulk_tries = 0;
+}
+
+void
pfsync_request_full_update(struct pfsync_softc *sc)
{
if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
/* Request a full state table update. */
sc->sc_ureq_sent = time_uptime;
#if NCARP > 0
- if (pfsync_sync_ok)
+ if (!sc->sc_link_demoted && pfsync_sync_ok)
carp_group_demote_adj(&sc->sc_if, 1,
"pfsync bulk start");
#endif
@@ -2274,9 +2308,17 @@ pfsync_bulk_fail(void *arg)
#if NCARP > 0
if (!pfsync_sync_ok)
carp_group_demote_adj(&sc->sc_if, -1,
+ sc->sc_link_demoted ?
+ "pfsync link state up" :
"pfsync bulk fail");
+ if (sc->sc_initial_bulk) {
+ carp_group_demote_adj(&sc->sc_if, -32,
+ "pfsync init");
+ sc->sc_initial_bulk = 0;
+ }
#endif
pfsync_sync_ok = 1;
+ sc->sc_link_demoted = 0;
DPFPRINTF(LOG_ERR, "failed to receive bulk update");
}