If I understand this patch:
Rule removal requires the consistency lock but to acquire it one must be
able to sleep. pf_test_rule() cannot sleep as it executes in a (soft)
interrupt handler, so it passes the expired rule to a thread which can.
However, the code to remove a 'once' rule may wish to also remove its
parent rule, now computed in the other thread. The updated patch below
avoids passing that state between threads in struct pf_rule by explictly
passing the parent rule to be purged when necessary.
There also seem to me a couple of issues, one hairier than the other:
- r->rule_flag should be updated atomically with respect to other
(potential) updates (included in the attached patch).
- Multiple rule-matching threads may match the same 'once' rule before
one of them reaches the code to set PFRULE_EXPIRED. A thread may even
match a 'once' rule after PFRULE_EXPIRED is set, if that occurs during
the 'once' rule's candidacy as a potential match. (This issue is not
addressed by the attached patch.)
This creates two problems:
a) rules may be purged more than once
b) what to do with packets matched by expired 'once' rules
In general, we do not know whether or not a 'once' rule candidate is
the match until a new candidate rule replaces it or the rule matching
process completes. Serialising this window of candicacy would prevent a
'once' rule becoming a candidate, and possibly the match, for multiple
packets at once. That's one approach.
A weaker approach instead permits multiple matches of one 'once' rule
but processes these serially in a critical section afterwards. The
first thread to reach it is deemed the first match (note this isn't
necessarily the first matching packet received at the interface!). It
then atomically marks it PFRULE_EXPIRED and places it on the purge
queue. Packets for matches of expired 'once' rule, it seems to me, can
be dropped as if we never saw them. An alternative might be to retry
them against the updated rules.
Another possibility would be to require 'once' rules to be 'quick'.
This closes the candidacy window and makes its serialisation, to
preclude multiple matches, more feasible.
Yet another possibility is to drop 'once' rules as too complex to
implement for multiprocessor but I have no idea if this is viable.
best,
Richard.
Tested via pf.conf
set skip on lo
pass
block in proto icmp
anchor selfdestruct {
pass in proto icmp once no state
}
Index: net/pf.c
===================================================================
--- net.orig/pf.c
+++ net/pf.c
@@ -295,6 +295,9 @@ RB_GENERATE(pf_state_tree, pf_state_key,
RB_GENERATE(pf_state_tree_id, pf_state,
entry_id, pf_state_compare_id);
+SLIST_HEAD(pf_rule_gcl, pf_rule) pf_rule_gcl =
+ SLIST_HEAD_INITIALIZER(pf_rule_gcl);
+
__inline int
pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
{
@@ -1137,6 +1140,24 @@ pf_state_export(struct pfsync_state *sp,
/* END state table stuff */
void
+pf_purge_expired_rules(void)
+{
+ struct pf_rule *r;
+
+ if (SLIST_EMPTY(&pf_rule_gcl)) {
+ return;
+ }
+
+ rw_enter_write(&pf_consistency_lock);
+ while ((r = SLIST_FIRST(&pf_rule_gcl)) != NULL) {
+ SLIST_REMOVE(&pf_rule_gcl, r, pf_rule, gcle);
+ KASSERT(r->rule_flag & PFRULE_EXPIRED);
+ pf_purge_rule(r);
+ }
+ rw_exit_write(&pf_consistency_lock);
+}
+
+void
pf_purge_thread(void *v)
{
int nloops = 0, s;
@@ -1154,6 +1175,7 @@ pf_purge_thread(void *v)
if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
pf_purge_expired_fragments();
pf_purge_expired_src_nodes(0);
+ pf_purge_expired_rules();
nloops = 0;
}
@@ -3135,6 +3157,10 @@ pf_test_rule(struct pf_pdesc *pd, struct
ruleset = &pf_main_ruleset;
r = TAILQ_FIRST(pf_main_ruleset.rules.active.ptr);
while (r != NULL) {
+ if (r->rule_flag & PFRULE_EXPIRED) {
+ r = TAILQ_NEXT(r, entries);
+ goto nextrule;
+ }
r->evaluations++;
PF_TEST_ATTRIB((pfi_kif_match(r->kif, pd->kif) == r->ifnot),
r->skip[PF_SKIP_IFP].ptr);
@@ -3433,8 +3459,18 @@ pf_test_rule(struct pf_pdesc *pd, struct
}
#endif /* NPFSYNC > 0 */
- if (r->rule_flag & PFRULE_ONCE)
- pf_purge_rule(ruleset, r, aruleset, a);
+ if (r->rule_flag & PFRULE_ONCE) {
+ int only_rule = (TAILQ_NEXT(r, entries) == NULL &&
+ TAILQ_PREV(r, pf_rulequeue, entries) == NULL);
+
+ if (only_rule && a) {
+ atomic_setbits_int(&a->rule_flag, PFRULE_EXPIRED);
+ SLIST_INSERT_HEAD(&pf_rule_gcl, a, gcle);
+ }
+
+ atomic_setbits_int(&r->rule_flag, PFRULE_EXPIRED);
+ SLIST_INSERT_HEAD(&pf_rule_gcl, r, gcle);
+ }
#ifdef INET6
if (rewrite && skw->af != sks->af)
Index: net/pf_ioctl.c
===================================================================
--- net.orig/pf_ioctl.c
+++ net/pf_ioctl.c
@@ -301,12 +301,13 @@ pf_rm_rule(struct pf_rulequeue *rulequeu
}
void
-pf_purge_rule(struct pf_ruleset *ruleset, struct pf_rule *rule,
- struct pf_ruleset *aruleset, struct pf_rule *arule)
+pf_purge_rule(struct pf_rule *rule)
{
u_int32_t nr = 0;
+ struct pf_ruleset *ruleset;
- KASSERT(ruleset != NULL && rule != NULL);
+ KASSERT((rule != NULL) && (rule->myruleset != NULL));
+ ruleset = rule->myruleset;
pf_rm_rule(ruleset->rules.active.ptr, rule);
ruleset->rules.active.rcount--;
@@ -314,16 +315,6 @@ pf_purge_rule(struct pf_ruleset *ruleset
rule->nr = nr++;
ruleset->rules.active.ticket++;
pf_calc_skip_steps(ruleset->rules.active.ptr);
-
- /* remove the parent anchor rule */
- if (nr == 0 && arule && aruleset) {
- pf_rm_rule(aruleset->rules.active.ptr, arule);
- aruleset->rules.active.rcount--;
- TAILQ_FOREACH(rule, aruleset->rules.active.ptr, entries)
- rule->nr = nr++;
- aruleset->rules.active.ticket++;
- pf_calc_skip_steps(aruleset->rules.active.ptr);
- }
}
u_int16_t
@@ -1209,6 +1200,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a
}
TAILQ_INSERT_TAIL(ruleset->rules.inactive.ptr,
rule, entries);
+ rule->myruleset = ruleset;
ruleset->rules.inactive.rcount++;
break;
}
Index: net/pfvar.h
===================================================================
--- net.orig/pfvar.h
+++ net/pfvar.h
@@ -563,6 +563,9 @@ struct pf_rule {
struct pf_addr addr;
u_int16_t port;
} divert, divert_packet;
+
+ SLIST_ENTRY(pf_rule) gcle;
+ struct pf_ruleset *myruleset;
};
/* rule flags */
@@ -581,6 +584,7 @@ struct pf_rule {
#define PFRULE_PFLOW 0x00040000
#define PFRULE_ONCE 0x00100000 /* one shot rule */
#define PFRULE_AFTO 0x00200000 /* af-to rule */
+#define PFRULE_EXPIRED 0x00400000 /* one shot rule hit by
pkt */
#define PFSTATE_HIWAT 10000 /* default state table size */
#define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */
@@ -1701,9 +1705,7 @@ extern void pf_addrcpy(struct
pf_addr
sa_family_t);
void pf_rm_rule(struct pf_rulequeue *,
struct pf_rule *);
-void pf_purge_rule(struct pf_ruleset *,
- struct pf_rule *, struct pf_ruleset *,
- struct pf_rule *);
+void pf_purge_rule(struct pf_rule *);
struct pf_divert *pf_find_divert(struct mbuf *);
int pf_setup_pdesc(struct pf_pdesc *, void *,
sa_family_t, int, struct pfi_kif *,