Hi Toke, On Sun, May 08, 2022 at 09:06:43PM +0200, Toke Høiland-Jørgensen wrote: > Daniel Gröber <d...@darkboxed.org> writes: > > --- > > The FIB walks in babel_reconfigure_iface are a bit ugly, I tried to > > loop over ifa->neigh_list instead but couldn't get it to work. I'm > > open to suggestions :) > > > > doc/bird.sgml | 8 +++++ > > proto/babel/babel.c | 70 +++++++++++++++++++++++++++++++++++++------- > > proto/babel/babel.h | 3 ++ > > proto/babel/config.Y | 3 ++ > > 4 files changed, 73 insertions(+), 11 deletions(-) > > > > diff --git a/doc/bird.sgml b/doc/bird.sgml > > index 1580facd..5e85d8ec 100644 > > --- a/doc/bird.sgml > > +++ b/doc/bird.sgml > > @@ -1879,6 +1879,7 @@ protocol babel [<name>] { > > check link <switch>; > > next hop ipv4 <address>; > > next hop ipv6 <address>; > > + ecmp <switch> [limit <num>]; > > authentication none|mac [permissive]; > > password "<text>"; > > password "<text>" { > > @@ -1983,6 +1984,13 @@ protocol babel [<name>] { > > source for Babel packets will be used. In normal operation, it > > should not > > be necessary to set this option. > > > > + <tag><label id="babel-ecmp">ecmp <m>switch</m> [limit > > <m>number</m>]</tag> > > + > > + Determines whether babel will emit ECMP (equal-cost multipath) > > + routes, allowing to load-balancing traffic across multiple paths. If > > + enabled the maximum number of next-hops to allow can be specified, > > + defaulting to 16. > > This should probably explain the constraints: i.e., that only > equal-weight routes are added (which in practice limits it to interfaces > of type 'wired', or wireless interfaces with no packet loss, I suppose?) > and that the routes all have to be feasible (though I'm not sure if we > explain feasibility in the docs anywhere).
Right, I have this now: When neibours are using a dynamic link-quality metric this is unlikely to be useful. For best results <ref id="babel-type" name="type wired"> should be used throughout the network to get what amounts to a hop count metric. Once RTT metric are merged I'm going to add another paragraph touting the virtues of using (the equivalents of rtt-min/max from babeld) to kick bufferbloated or weirdly re-routed paths out of the ECMP set. That's my main use-case anyway. > > <tag><label id="babel-authentication">authentication none|mac > > [permissive]</tag> > > Selects authentication method to be used. <cf/none/ means that > > packets > > are not authenticated at all, <cf/mac/ means MAC authentication is > > diff --git a/proto/babel/babel.c b/proto/babel/babel.c > > index 4a7d550f..4d9c657c 100644 > > --- a/proto/babel/babel.c > > +++ b/proto/babel/babel.c > > @@ -623,6 +623,34 @@ done: > > } > > } > > > > +/** > > + * babel_nexthop_insert - add next_hop of route to nexthop list > > + * @p: Babel protocol instance > > + * @r: Babel route > > + * @nhs: nexthop list head to append onto > > + * @nh: freshly allocated buffer to fill > > + */ > > +static void > > +babel_nexthop_insert( > > + struct babel_proto *p, > > + struct babel_route *r, > > + struct nexthop **nhs, > > + struct nexthop *nh) > > +{ > > + nh->gw = r->next_hop; > > + nh->iface = r->neigh->ifa->iface; > > + > > + /* > > + * If we cannot find a reachable neighbour, set the entry to be onlink. > > This > > + * makes it possible to, e.g., assign /32 addresses on a mesh interface > > and > > + * have routing work. > > + */ > > + if (!neigh_find(&p->p, r->next_hop, r->neigh->ifa->iface, 0)) > > + nh->flags = RNF_ONLINK; > > + > > + nexthop_insert(nhs, nh); > > +} > > + > > /** > > * babel_announce_rte - announce selected route to the core > > * @p: Babel protocol instance > > @@ -635,6 +663,7 @@ done: > > static void > > babel_announce_rte(struct babel_proto *p, struct babel_entry *e) > > { > > + struct babel_config *cf = (void *) p->p.cf; > > struct babel_route *r = e->selected; > > struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : > > p->ip6_channel; > > > > @@ -645,18 +674,24 @@ babel_announce_rte(struct babel_proto *p, struct > > babel_entry *e) > > .source = RTS_BABEL, > > .scope = SCOPE_UNIVERSE, > > .dest = RTD_UNICAST, > > - .from = r->neigh->addr, > > - .nh.gw = r->next_hop, > > - .nh.iface = r->neigh->ifa->iface, > > }; > > > > - /* > > - * If we cannot find a reachable neighbour, set the entry to be > > onlink. This > > - * makes it possible to, e.g., assign /32 addresses on a mesh > > interface and > > - * have routing work. > > - */ > > - if (!neigh_find(&p->p, r->next_hop, r->neigh->ifa->iface, 0)) > > - a0.nh.flags = RNF_ONLINK; > > + struct nexthop *nhs = NULL; > > + babel_nexthop_insert(p, r, &nhs, allocz(sizeof(struct nexthop))); > > + int num_nexthops = 1; > > + > > + struct babel_route *cr; > > + WALK_LIST(cr, e->routes) { > > + if (num_nexthops++ >= cf->ecmp) > > + break; > > + > > + if (cr == r || !cr->feasible || cr->metric != r->metric) > > + continue; > > You should probably switch the order of those two checks? You're > increasing the counter even if the route is subsequently discarded... Whoops, last minute change and brainfart. > > + > > + babel_nexthop_insert(p, cr, &nhs, allocz(sizeof(struct nexthop))); > > + } > > + > > + a0.nh = *nhs; > > > > rta *a = rta_lookup(&a0); > > rte *rte = rte_get_temp(a); > > @@ -736,6 +771,7 @@ babel_announce_retraction(struct babel_proto *p, struct > > babel_entry *e) > > static void > > babel_select_route(struct babel_proto *p, struct babel_entry *e, struct > > babel_route *mod) > > { > > + struct babel_config *cf = (void *) p->p.cf; > > struct babel_route *r, *best = e->selected; > > > > /* Shortcut if only non-best was modified */ > > @@ -744,8 +780,10 @@ babel_select_route(struct babel_proto *p, struct > > babel_entry *e, struct babel_ro > > /* Either select modified route, or keep old best route */ > > if ((mod->metric < (best ? best->metric : BABEL_INFINITY)) && > > mod->feasible) > > best = mod; > > - else > > + else if (cf->ecmp==1) > > This looks like you're returning if ECMP is enabled; I think finding a > better name for that config variable would be good; maybe just > "max_nexthops"? Yeah that's neater. Fixed. > > return; > > + /* With ecmp one of the non-selected but equal metric routes might have > > + * changed so contine on with the announcement in that case. */ > > } > > else > > { > > @@ -1879,6 +1917,16 @@ babel_reconfigure_iface(struct babel_proto *p, > > struct babel_iface *ifa, struct b > > if (ifa->up) > > babel_iface_kick_timer(ifa); > > > > + /* Update all routes to refresh ecmp settings. */ > > + FIB_WALK(&p->ip6_rtable, struct babel_entry, e) { > > + babel_select_route(p, e, NULL); > > + } > > + FIB_WALK_END; > > + FIB_WALK(&p->ip4_rtable, struct babel_entry, e) { > > + babel_select_route(p, e, NULL); > > + } > > + FIB_WALK_END; > > + > > I don't think this is the right thing to do; you should probably just > refuse the reconfiguration if ECMP settings changed (see the 0-return at > the top when TOS changes)? Why not? Works fine AFAICT. I tested the reload stuff and the nexthops get updated when the limit changes just as I would expect. If I refuse the reconfiguration the user would have to restart bird to get this to apply, right? I don't want that. > > return 1; > > } > > > > diff --git a/proto/babel/babel.h b/proto/babel/babel.h > > index 84feb085..3868dcb2 100644 > > --- a/proto/babel/babel.h > > +++ b/proto/babel/babel.h > > @@ -62,6 +62,8 @@ > > #define BABEL_OVERHEAD (IP6_HEADER_LENGTH+UDP_HEADER_LENGTH) > > #define BABEL_MIN_MTU (512 + BABEL_OVERHEAD) > > > > +#define BABEL_DEFAULT_ECMP_LIMIT 16 > > + > > #define BABEL_AUTH_NONE 0 > > #define BABEL_AUTH_MAC 1 > > > > @@ -120,6 +122,7 @@ struct babel_config { > > list iface_list; /* List of iface configs (struct > > babel_iface_config) */ > > uint hold_time; /* Time to hold stale entries and > > unreachable routes */ > > u8 randomize_router_id; > > + u8 ecmp; /* Maximum number of nexthops to > > install */ > > See above re: the naming of this; 'max_nexthops'? > > > struct channel_config *ip4_channel; > > struct channel_config *ip6_channel; > > diff --git a/proto/babel/config.Y b/proto/babel/config.Y > > index 05210fa4..8f18c790 100644 > > --- a/proto/babel/config.Y > > +++ b/proto/babel/config.Y > > @@ -36,6 +36,7 @@ babel_proto_start: proto_start BABEL > > this_proto = proto_config_new(&proto_babel, $1); > > init_list(&BABEL_CFG->iface_list); > > BABEL_CFG->hold_time = 1 S_; > > + BABEL_CFG->ecmp = 1; > > }; > > > > babel_proto_item: > > @@ -43,6 +44,8 @@ babel_proto_item: > > | proto_channel > > | INTERFACE babel_iface > > | RANDOMIZE ROUTER ID bool { BABEL_CFG->randomize_router_id = $4; } > > + | ECMP bool { BABEL_CFG->ecmp = $2 ? BABEL_DEFAULT_ECMP_LIMIT : 1; } > > + | ECMP bool LIMIT expr { BABEL_CFG->ecmp = $2 ? $4 : 1; } > > ; > > > > babel_proto_opts: > > -- > > 2.30.2 --Daniel