On Tue, May 04, 2021 at 09:55:32AM +0200, Claudio Jeker wrote:
> Noticed by the arouteserver author Pier Carlo Chiodi the new rde evaluate
> all feature has a bug when a 2nd best route is withdrawn. In that case
> that route got not withdrawn from the adj-rib-out.
>
> The problem is in up_generate_updates() and the fact that 'rde evaluate
> all' calls up_generate_updates() with old = NULL. For 'rde evaluate all'
> the code traverses the prefix list in new and once the last element is hit
> (new == NULL) the withdraw should be triggered. But since old == NULL it
> would just return without doing the withdraw.
>
> The fix is to remove the old == NULL check from the withdraw case but to
> do that the prefix address and prefixlen needs to be extracted before
> entering the selection loop.
>
> So extract the address once at start based on new or old (whichever is
> set). In case new and old are NULL just return (like before), since there
> is nothing todo.
>
There is another bug that causes this to not work properly. This time it
is rde_evaluate_all() which is returning 0 (as in default mode) even
though there are peers with 'rde evaluate all' set. The problem is that
the rde_eval_all flag is tracked in the wrong spot and so if you change
the flag after prefixes have loaded and then withdraw a prefix it will not
realize that a peer needs evaluate all and skip to process this withdraw.
The following diff (rde.c part) changes the tracking of rde_eval_all. It
resets the flag during reload but also sets rde_eval_all when a new peer
is added.
The diff also includes the rde_update.c change from before.
--
:wq Claudio
Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.520
diff -u -p -r1.520 rde.c
--- rde.c 6 May 2021 09:18:54 -0000 1.520
+++ rde.c 11 May 2021 08:28:58 -0000
@@ -113,6 +113,7 @@ volatile sig_atomic_t rde_quit = 0;
struct filter_head *out_rules, *out_rules_tmp;
struct rde_memstats rdemem;
int softreconfig;
+static int rde_eval_all;
extern struct rde_peer_head peerlist;
extern struct rde_peer *peerself;
@@ -400,6 +402,9 @@ rde_dispatch_imsg_session(struct imsgbuf
fatalx("incorrect size of session request");
memcpy(&pconf, imsg.data, sizeof(pconf));
peer_add(imsg.hdr.peerid, &pconf);
+ /* make sure rde_eval_all is on if needed. */
+ if (pconf.flags & PEERFLAG_EVALUATE_ALL)
+ rde_eval_all = 1;
break;
case IMSG_NETWORK_ADD:
if (imsg.hdr.len - IMSG_HEADER_SIZE !=
@@ -2884,8 +2889,6 @@ rde_send_kroute(struct rib *rib, struct
/*
* update specific functions
*/
-static int rde_eval_all;
-
int
rde_evaluate_all(void)
{
@@ -2915,17 +2918,13 @@ rde_generate_updates(struct rib *rib, st
else
aid = old->pt->aid;
- rde_eval_all = 0;
LIST_FOREACH(peer, &peerlist, peer_l) {
/* skip ourself */
if (peer == peerself)
continue;
if (peer->state != PEER_UP)
continue;
- /* handle evaluate all, keep track if it is needed */
- if (peer->flags & PEERFLAG_EVALUATE_ALL)
- rde_eval_all = 1;
- else if (eval_all)
+ if ((peer->flags & PEERFLAG_EVALUATE_ALL) == 0 && eval_all)
/* skip default peers if the best path didn't change */
continue;
/* skip peers using a different rib */
@@ -3287,9 +3286,12 @@ rde_reload_done(void)
rde_filter_calc_skip_steps(out_rules);
+ /* make sure that rde_eval_all is correctly set after a config change */
+ rde_eval_all = 0;
+
/* check if filter changed */
LIST_FOREACH(peer, &peerlist, peer_l) {
- if (peer->conf.id == 0)
+ if (peer->conf.id == 0) /* ignore peerself*/
continue;
peer->reconf_out = 0;
peer->reconf_rib = 0;
@@ -3319,6 +3321,8 @@ rde_reload_done(void)
}
peer->export_type = peer->conf.export_type;
peer->flags = peer->conf.flags;
+ if (peer->flags & PEERFLAG_EVALUATE_ALL)
+ rde_eval_all = 1;
if (peer->reconf_rib) {
if (prefix_dump_new(peer, AID_UNSPEC,
@@ -3336,6 +3340,7 @@ rde_reload_done(void)
peer->reconf_out = 1;
}
}
+
/* bring ribs in sync */
for (rid = 0; rid < rib_size; rid++) {
struct rib *rib = rib_byid(rid);
Index: rde_update.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
retrieving revision 1.127
diff -u -p -r1.127 rde_update.c
--- rde_update.c 6 May 2021 09:18:54 -0000 1.127
+++ rde_update.c 7 May 2021 15:27:58 -0000
@@ -101,17 +101,23 @@ up_generate_updates(struct filter_head *
struct filterstate state;
struct bgpd_addr addr;
int need_withdraw;
+ uint8_t prefixlen;
-again:
if (new == NULL) {
if (old == NULL)
- /* no prefix to withdraw */
+ /* no prefix to update or withdraw */
return;
+ pt_getaddr(old->pt, &addr);
+ prefixlen = old->pt->prefixlen;
+ } else {
+ pt_getaddr(new->pt, &addr);
+ prefixlen = new->pt->prefixlen;
+ }
+again:
+ if (new == NULL) {
/* withdraw prefix */
- pt_getaddr(old->pt, &addr);
- if (prefix_adjout_withdraw(peer, &addr,
- old->pt->prefixlen) == 1) {
+ if (prefix_adjout_withdraw(peer, &addr, prefixlen) == 1) {
peer->prefix_out_cnt--;
peer->up_wcnt++;
}
@@ -143,10 +149,8 @@ again:
rde_filterstate_prep(&state, prefix_aspath(new),
prefix_communities(new), prefix_nexthop(new),
prefix_nhflags(new));
- pt_getaddr(new->pt, &addr);
if (rde_filter(rules, peer, prefix_peer(new), &addr,
- new->pt->prefixlen, prefix_vstate(new), &state) ==
- ACTION_DENY) {
+ prefixlen, prefix_vstate(new), &state) == ACTION_DENY) {
rde_filterstate_clean(&state);
if (peer->flags & PEERFLAG_EVALUATE_ALL)
new = LIST_NEXT(new, entry.list.rib);