On Mon, Jun 26, 2017 at 10:29:24AM +0200, Alexandr Nedvedicky wrote:
> > +#define PF_FRAG_STALE 200 /* Limit fragments per second per
> > connection */
> I did not get how we arrived to 'Limit fragments per second per
> connection.'
Actually I was looking at markus@'s algorithm and tried to write
the idea of the value in a single line comment. Maybe I got it
wrong. To increase confusion the existing comment had a calculation
error 60*200 = 18,000; that is only 12,000.
If we find a fragment that is 12,000 generation numbers behind, it
is considered stale. Fragments that are older than 60 seconds are
removed from the queue. So if new fragments arrive at a rate of
less than 200 fragments per second, they never get stale. 200 is
the maximum fragment rate per second per connection in avarage over
one minute. If we change the timeout, the interval over which the
avarage is created changes, but the rate per second is constant.
Perhaps it gets clear with a longer comment.
Should we use a different value for IPv6? There the id is 32 bit,
but the 8 bit proto is irrelevant. To keep it simple, I currently
use the same value for both IP versions.
ok?
bluhm
Index: net/pf_norm.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf_norm.c,v
retrieving revision 1.207
diff -u -p -r1.207 pf_norm.c
--- net/pf_norm.c 24 Jun 2017 20:32:39 -0000 1.207
+++ net/pf_norm.c 26 Jun 2017 15:26:07 -0000
@@ -84,6 +84,7 @@ struct pf_frnode {
u_int8_t fn_proto; /* protocol for fragments in fn_tree */
u_int8_t fn_direction; /* pf packet direction */
u_int32_t fn_fragments; /* number of entries in fn_tree */
+ u_int32_t fn_gen; /* fr_gen of newest entry in fn_tree */
RB_ENTRY(pf_frnode) fn_entry;
struct pf_frag_tree fn_tree; /* matching fragments, lookup by id */
@@ -96,6 +97,7 @@ struct pf_fragment {
TAILQ_ENTRY(pf_fragment) frag_next;
TAILQ_HEAD(pf_fragq, pf_frent) fr_queue;
int32_t fr_timeout;
+ u_int32_t fr_gen; /* generation number (per pf_frnode) */
u_int16_t fr_maxlen; /* maximum length of single fragment */
struct pf_frnode *fr_node; /* ip src/dst/proto/af for fragments */
};
@@ -273,6 +275,7 @@ pf_find_fragment(struct pf_frnode *key,
{
struct pf_fragment *frag, idkey;
struct pf_frnode *frnode;
+ u_int32_t stale;
frnode = RB_FIND(pf_frnode_tree, &pf_frnode_tree, key);
if (frnode == NULL)
@@ -282,6 +285,24 @@ pf_find_fragment(struct pf_frnode *key,
frag = RB_FIND(pf_frag_tree, &frnode->fn_tree, &idkey);
if (frag == NULL)
return (NULL);
+ /*
+ * Limit the number of fragments we accept for each (proto,src,dst,af)
+ * combination (aka pf_frnode), so we can deal better with a high rate
+ * of fragments. Problem analysis is in RFC 4963.
+ * Store the current generation for each pf_frnode in fn_gen and on
+ * lookup discard 'stale' fragments (pf_fragment, based on the fr_gen
+ * member). Instead of adding another button interpret the pf fragment
+ * timeout in multiples of 200 fragments. This way the default of 60s
+ * means: pf_fragment objects older than 60*200 = 12,000 generations
+ * are considered stale.
+ */
+ stale = pf_default_rule.timeout[PFTM_FRAG] * PF_FRAG_STALE;
+ if ((frnode->fn_gen - frag->fr_gen) >= stale) {
+ DPFPRINTF(LOG_NOTICE, "stale fragment %d(%p), gen %u, num %u",
+ frag->fr_id, frag, frag->fr_gen, frnode->fn_fragments);
+ pf_free_fragment(frag);
+ return (NULL);
+ }
TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
@@ -369,9 +390,11 @@ pf_fillup_fragment(struct pf_frnode *key
*frnode = *key;
RB_INIT(&frnode->fn_tree);
frnode->fn_fragments = 0;
+ frnode->fn_gen = 0;
}
TAILQ_INIT(&frag->fr_queue);
frag->fr_timeout = time_uptime;
+ frag->fr_gen = frnode->fn_gen++;
frag->fr_maxlen = frent->fe_len;
frag->fr_id = id;
frag->fr_node = frnode;
Index: net/pfvar.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfvar.h,v
retrieving revision 1.457
diff -u -p -r1.457 pfvar.h
--- net/pfvar.h 30 May 2017 19:40:54 -0000 1.457
+++ net/pfvar.h 26 Jun 2017 15:44:10 -0000
@@ -109,6 +109,14 @@ enum { PFTM_TCP_FIRST_PACKET, PFTM_TCP_O
#define PFTM_SRC_NODE_VAL 0 /* Source tracking */
#define PFTM_TS_DIFF_VAL 30 /* Allowed TS diff */
+/*
+ * For each connection (combination of proto,src,dst,af) the number
+ * of fragments is limited. Over the PFTM_FRAG interval the average
+ * rate must be less than PF_FRAG_STALE fragments per second.
+ * Otherwise older fragments are considered stale and are dropped.
+ */
+#define PF_FRAG_STALE 200
+
enum { PF_NOPFROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO };
enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS,
PF_LIMIT_TABLES, PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX };