* Thomas Bader <[EMAIL PROTECTED]> [2006-06-14 09:02]: > In one case the fail-over does not work well: If the > BGP-peering on r0a to the upstream goes down all traffic > will be routed from r0a via $pfsync_if to r0b
yhis case requires bgpd to actively take influence on teh carp state. now, lucky you, I have a diff for current doing exactly that :) you need -current from after the hackathon, as this needs the carp group demotion stuff. you then basically add "demote carp" to the session you cae about. when that session goes down, bgpd increases the demotion counter for said group (really only makes sense for carp groups). yes, manpages missing so far... hardcore testing welcome. also, as for everybody successfully using openbgpd, we welcome testimonials for http://www.openbgpd.org/users.html :) Index: Makefile =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/Makefile,v retrieving revision 1.24 diff -u -p -r1.24 Makefile --- Makefile 3 Jan 2006 22:19:59 -0000 1.24 +++ Makefile 14 Jun 2006 16:19:03 -0000 @@ -6,7 +6,7 @@ PROG= bgpd SRCS= bgpd.c buffer.c session.c log.c parse.y config.c imsg.c \ rde.c rde_rib.c rde_decide.c rde_prefix.c mrt.c kroute.c \ control.c pfkey.c rde_update.c rde_attr.c printconf.c \ - rde_filter.c pftable.c name2id.c util.c + rde_filter.c pftable.c name2id.c util.c carp.c CFLAGS+= -Wall -I${.CURDIR} CFLAGS+= -Wstrict-prototypes -Wmissing-prototypes CFLAGS+= -Wmissing-declarations Index: bgpd.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v retrieving revision 1.137 diff -u -p -r1.137 bgpd.c --- bgpd.c 27 May 2006 21:24:36 -0000 1.137 +++ bgpd.c 14 Jun 2006 16:19:04 -0000 @@ -132,8 +132,11 @@ main(int argc, char *argv[]) peer_l = NULL; conf.csock = SOCKET_NAME; - while ((ch = getopt(argc, argv, "dD:f:nr:s:v")) != -1) { + while ((ch = getopt(argc, argv, "cdD:f:nr:s:v")) != -1) { switch (ch) { + case 'c': + conf.opts |= BGPD_OPT_FORCE_DEMOTE; + break; case 'd': debug = 1; break; @@ -645,6 +648,19 @@ dispatch_imsg(struct imsgbuf *ibuf, int log_warnx("IFINFO request with wrong len"); else kr_ifinfo(imsg.data); + break; + case IMSG_DEMOTE: + if (idx != PFD_PIPE_SESSION) + log_warnx("demote request not from SE"); + else if (imsg.hdr.len != IMSG_HEADER_SIZE + + sizeof(struct demote_msg)) + log_warnx("DEMOTE request with wrong len"); + else { + struct demote_msg *msg; + + msg = (struct demote_msg *)imsg.data; + carp_demote_set(msg->demote_group, msg->level); + } break; default: break; Index: bgpd.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v retrieving revision 1.201 diff -u -p -r1.201 bgpd.h --- bgpd.h 27 May 2006 21:24:36 -0000 1.201 +++ bgpd.h 14 Jun 2006 16:19:04 -0000 @@ -49,6 +49,7 @@ #define BGPD_OPT_VERBOSE 0x0001 #define BGPD_OPT_VERBOSE2 0x0002 #define BGPD_OPT_NOACTION 0x0004 +#define BGPD_OPT_FORCE_DEMOTE 0x0008 #define BGPD_FLAG_NO_FIB_UPDATE 0x0001 #define BGPD_FLAG_NO_EVALUATE 0x0002 @@ -220,6 +221,7 @@ struct peer_config { char group[PEER_DESCR_LEN]; char descr[PEER_DESCR_LEN]; char if_depend[IFNAMSIZ]; + char demote_group[IFNAMSIZ]; u_int32_t id; u_int32_t groupid; u_int32_t max_prefix; @@ -327,7 +329,8 @@ enum imsg_type { IMSG_CTL_SHOW_RIB_MEM, IMSG_CTL_SHOW_TERSE, IMSG_REFRESH, - IMSG_IFINFO + IMSG_IFINFO, + IMSG_DEMOTE }; struct imsg_hdr { @@ -340,6 +343,11 @@ struct imsg_hdr { struct imsg { struct imsg_hdr hdr; void *data; +}; + +struct demote_msg { + char demote_group[IFNAMSIZ]; + int level; }; enum ctl_results { Index: carp.c =================================================================== RCS file: carp.c diff -N carp.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ carp.c 14 Jun 2006 16:19:04 -0000 @@ -0,0 +1,160 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2006 Henning Brauer <[EMAIL PROTECTED]> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <net/if.h> + +#include <errno.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> + +#include "bgpd.h" +#include "session.h" + +struct carpgroup { + TAILQ_ENTRY(carpgroup) entry; + char *group; + int do_demote; + int changed_by; +}; + +TAILQ_HEAD(carpgroups, carpgroup) carpgroups = + TAILQ_HEAD_INITIALIZER(carpgroups); + +struct carpgroup *carp_group_find(char *group); +int carp_demote_ioctl(char *, int); + +struct carpgroup * +carp_group_find(char *group) +{ + struct carpgroup *c; + + TAILQ_FOREACH(c, &carpgroups, entry) + if (!strcmp(c->group, group)) + return (c); + + return (NULL); +} + +int +carp_demote_init(char *group, int force) +{ + struct carpgroup *c; + int level; + + if ((c = carp_group_find(group)) == NULL) { + if ((c = calloc(1, sizeof(struct carpgroup))) == NULL) { + log_warn("carp_demote_init calloc"); + return (-1); + } + if ((c->group = strdup(group)) == NULL) { + log_warn("carp_demote_init calloc"); + free(c); + return (-1); + } + + /* only demote if this group already is demoted */ + if ((level = carp_demote_get(group)) == -1) + return (-1); + if (level > 0 || force) + c->do_demote = 1; + + TAILQ_INSERT_TAIL(&carpgroups, c, entry); + } + + return (0); +} + +int +carp_demote_get(char *group) +{ + int s; + struct ifgroupreq ifgr; + + if ((s = socket(AF_INET, SOCK_DGRAM, 0)) == -1) { + log_warn("carp_demote_get: socket"); + return (-1); + } + + bzero(&ifgr, sizeof(ifgr)); + strlcpy(ifgr.ifgr_name, group, sizeof(ifgr.ifgr_name)); + + if (ioctl(s, SIOCGIFGATTR, (caddr_t)&ifgr) == -1) { + if (errno == ENOENT) + log_warnx("group \"%s\" does not exist", group); + else + log_warn("carp_demote_get: ioctl"); + close(s); + return (-1); + } + + close(s); + return ((int)ifgr.ifgr_attrib.ifg_carp_demoted); +} + +int +carp_demote_set(char *group, int demote) +{ + struct carpgroup *c; + + if ((c = carp_group_find(group)) == NULL) { + log_warnx("carp_group_find for %s returned NULL?!", group); + return (-1); + } + + if (c->changed_by + demote < 0) { + log_warnx("carp_demote_set: changed_by + demote < 0"); + return (-1); + } + + if (c->do_demote && carp_demote_ioctl(group, demote) == -1) + return (-1); + + c->changed_by += demote; + return (0); +} + +int +carp_demote_ioctl(char *group, int demote) +{ + int s, res; + struct ifgroupreq ifgr; + + if ((s = socket(AF_INET, SOCK_DGRAM, 0)) == -1) { + log_warn("carp_demote_get: socket"); + return (-1); + } + + bzero(&ifgr, sizeof(ifgr)); + strlcpy(ifgr.ifgr_name, group, sizeof(ifgr.ifgr_name)); + ifgr.ifgr_attrib.ifg_carp_demoted = demote; + + if ((res = ioctl(s, SIOCSIFGATTR, (caddr_t)&ifgr)) == -1) + log_warn("unable to %s the demote state " + "of group '%s'", (demote > 0) ? "increment" : "decrement", + group); + else + log_info("%s the demote state of group '%s'", + (demote > 0) ? "incremented" : "decremented", group); + + close (s); + return (res); +} Index: parse.y =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/parse.y,v retrieving revision 1.190 diff -u -p -r1.190 parse.y --- parse.y 31 May 2006 02:16:25 -0000 1.190 +++ parse.y 14 Jun 2006 16:19:04 -0000 @@ -149,7 +149,7 @@ typedef struct { %token AS ROUTERID HOLDTIME YMIN LISTEN ON FIBUPDATE %token RDE EVALUATE IGNORE COMPARE %token GROUP NEIGHBOR NETWORK -%token REMOTEAS DESCR LOCALADDR MULTIHOP PASSIVE MAXPREFIX ANNOUNCE +%token REMOTEAS DESCR LOCALADDR MULTIHOP PASSIVE MAXPREFIX ANNOUNCE DEMOTE %token ENFORCE NEIGHBORAS CAPABILITIES REFLECTOR DEPEND DOWN SOFTRECONFIG %token DUMP IN OUT %token LOG ROUTECOLL TRANSPARENT @@ -909,6 +909,24 @@ peeropts : REMOTEAS asnumber { } free($3); } + | DEMOTE STRING { + if (strlcpy(curpeer->conf.demote_group, $2, + sizeof(curpeer->conf.demote_group)) >= + sizeof(curpeer->conf.demote_group)) { + yyerror("demote group name \"%s\" too long: " + "max %u", $2, + sizeof(curpeer->conf.demote_group) - 1); + free($2); + YYERROR; + } + free($2); + if (carp_demote_init(curpeer->conf.demote_group, + conf->opts & BGPD_OPT_FORCE_DEMOTE) == -1) { + yyerror("error initializing group \"%s\"", + curpeer->conf.demote_group); + YYERROR; + } + } | SOFTRECONFIG inout yesno { if ($2) curpeer->conf.softreconfig_in = $3; @@ -1558,6 +1576,7 @@ lookup(char *s) { "compare", COMPARE}, { "connected", CONNECTED}, { "delete", DELETE}, + { "demote", DEMOTE}, { "deny", DENY}, { "depend", DEPEND}, { "descr", DESCR}, Index: session.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/session.c,v retrieving revision 1.250 diff -u -p -r1.250 session.c --- session.c 10 Jun 2006 16:32:38 -0000 1.250 +++ session.c 14 Jun 2006 16:19:05 -0000 @@ -81,6 +81,7 @@ int parse_capabilities(struct peer *, u_ void session_dispatch_imsg(struct imsgbuf *, int, u_int *); void session_up(struct peer *); void session_down(struct peer *); +void session_demote(struct peer *, int); int la_cmp(struct listen_addr *, struct listen_addr *); struct peer *getpeerbyip(struct sockaddr *); @@ -288,6 +289,9 @@ session_main(struct bgpd_config *config, /* deletion due? */ if (p->conf.reconf_action == RECONF_DELETE) { + if (p->demoted) + session_demote(p, -1); + p->conf.demote_group[0] = 0; bgp_fsm(p, EVNT_STOP); log_peer_warnx(&p->conf, "removed"); if (last != NULL) @@ -408,6 +412,17 @@ session_main(struct bgpd_config *config, p->IdleHoldResetTimer < nextaction) nextaction = p->IdleHoldResetTimer; + /* carp demotion */ + if (p->demoted && p->state == STATE_ESTABLISHED) { + if (time(NULL) - p->stats.last_updown >= + INTERVAL_HOLD_DEMOTED) + session_demote(p, -1); + if (p->stats.last_updown + + INTERVAL_HOLD_DEMOTED < nextaction) + nextaction = p->stats.last_updown + + INTERVAL_HOLD_DEMOTED; + } + /* are we waiting for a write? */ events = POLLIN; if (p->wbuf.queued > 0 || p->state == STATE_CONNECT) @@ -560,6 +575,14 @@ init_peer(struct peer *p) p->IdleHoldTimer = 0; /* no autostart */ else p->IdleHoldTimer = time(NULL); /* start ASAP */ + + /* + * on startup, demote if requested. + * do not handle new peers. they must reach ESTABLISHED beforehands. + * peers added at runtime have reconf_action set to RECONF_REINIT. + */ + if (p->conf.reconf_action != RECONF_REINIT && p->conf.demote_group[0]) + session_demote(p, +1); } void @@ -841,6 +864,11 @@ change_state(struct peer *peer, enum ses switch (state) { case STATE_IDLE: + /* carp demotion first. new peers handled in peer_init */ + if (peer->state == STATE_ESTABLISHED && + peer->conf.demote_group[0] && !peer->demoted) + session_demote(peer, +1); + /* * try to write out what's buffered (maybe a notification), * don't bother if it fails @@ -2735,4 +2763,19 @@ addr2sa(struct bgpd_addr *addr, u_int16_ } return ((struct sockaddr *)&ss); +} + +void +session_demote(struct peer *p, int level) +{ + struct demote_msg msg; + + strlcpy(msg.demote_group, p->conf.demote_group, + sizeof(msg.demote_group)); + msg.level = level; + if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1, + &msg, sizeof(msg)) == -1) + fatalx("imsg_compose error"); + + p->demoted += level; } Index: session.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/session.h,v retrieving revision 1.83 diff -u -p -r1.83 session.h --- session.h 27 May 2006 15:43:13 -0000 1.83 +++ session.h 14 Jun 2006 16:19:05 -0000 @@ -26,6 +26,7 @@ #define INTERVAL_HOLD 90 #define INTERVAL_IDLE_HOLD_INITIAL 30 #define INTERVAL_HOLD_CLONED 3600 +#define INTERVAL_HOLD_DEMOTED 60 #define MAX_IDLE_HOLD 3600 #define MSGSIZE_HEADER 19 #define MSGSIZE_HEADER_MARKER 16 @@ -178,6 +179,7 @@ struct peer { u_int16_t holdtime; u_int8_t auth_established; u_int8_t depend_ok; + u_int8_t demoted; u_int8_t passive; }; @@ -230,3 +232,8 @@ int pfkey_init(struct bgpd_sysdep *); /* printconf.c */ void print_config(struct bgpd_config *, struct network_head *, struct peer *, struct filter_head *, struct mrt_head *); + +/* carp.c */ +int carp_demote_init(char *, int); +int carp_demote_get(char *); +int carp_demote_set(char *, int); -- BS Web Services, http://www.bsws.de/ OpenBSD-based Webhosting, Mail Services, Managed Servers, ... Unix is very simple, but it takes a genius to understand the simplicity. (Dennis Ritchie)