Hello tech@, Similarly to http://marc.info/?l=openbsd-misc&m=122633408801658&w=2 we noticed that the relayctl command 'disable table' made relayd crash when we used relays:
# relayctl sh sum Id Type Name Avlblty Status 1 relay test-relay active 1 table test-table:80 empty 1 host 127.0.0.1 0.00% down # relayctl table disable 1 relayctl: pipe closed # The logs had the following to say: Nov 16 10:20:53 hostname relayd[2066]: fatal: disable_table: desynchronised Nov 16 10:20:53 hostname relayd[14235]: check_child: lost child: pf update engine exited Nov 16 10:20:53 hostname relayd[14235]: check_child: lost child: host check engine exited This was caused by disable_table() in pfe.c: 752 if ((rdr = rdr_find(env, table->conf.rdrid)) == NULL) 753 fatalx("disable_table: desynchronised"); It seems conf.rdrid is only set if this is a redirect, otherwise setting it to 0 so we changed it as such: Index: pfe.c =================================================================== RCS file: /cvs/src/usr.sbin/relayd/pfe.c,v retrieving revision 1.65 diff -u -r1.65 pfe.c --- pfe.c 1 Aug 2010 22:18:35 -0000 1.65 +++ pfe.c 16 Nov 2010 09:33:02 -0000 @@ -739,7 +739,6 @@ disable_table(struct ctl_conn *c, struct ctl_id *id) { struct table *table; - struct rdr *rdr; struct host *host; if (id->id == EMPTY_ID) @@ -749,7 +748,7 @@ if (table == NULL) return (-1); id->id = table->conf.id; - if ((rdr = rdr_find(env, table->conf.rdrid)) == NULL) + if (table->conf.rdrid > 0 && rdr_find(env, table->conf.rdrid) == NULL) fatalx("disable_table: desynchronised"); if (table->conf.flags & F_DISABLE) @@ -768,7 +767,6 @@ int enable_table(struct ctl_conn *c, struct ctl_id *id) { - struct rdr *rdr; struct table *table; struct host *host; @@ -780,7 +778,7 @@ return (-1); id->id = table->conf.id; - if ((rdr = rdr_find(env, table->conf.rdrid)) == NULL) + if (table->conf.rdrid > 0 && rdr_find(env, table->conf.rdrid) == NULL) fatalx("enable_table: desynchronised"); if (!(table->conf.flags & F_DISABLE)) With this patch, the disabling/enabling of tables would work, however, upon enabling them relayd would crash when a host in the table was having it's state changed from unknown to up: disable_table: disabled table 1 enable_table: enabled table 1 hce_notify_done: 127.0.0.1 (http code ok) host 127.0.0.1, check http code (1ms), state unknown -> up, availability 100.00% pfe_dispatch_imsg: state 1 for host 1 127.0.0.1 relay_dispatch_pfe: host 1 => 1 fatal: relay_dispatch_pfe: desynchronized pf update engine exiting check_child: lost child: socket relay engine exited host check engine exiting terminating This was due to two things, first of all, pfe.c only gives the IMSG_TABLE_DISABLE and IMSG_TABLE_ENABLE messages to the hce, not the relay engine, secondly, relay.c lacks support for handling these messages. This required some more work on pfe.c, I borrowed the code from enable_host() expanding the prior diff to this: Index: pfe.c =================================================================== RCS file: /cvs/src/usr.sbin/relayd/pfe.c,v retrieving revision 1.65 diff -u -r1.65 pfe.c --- pfe.c 1 Aug 2010 22:18:35 -0000 1.65 +++ pfe.c 16 Nov 2010 12:02:04 -0000 @@ -739,8 +739,8 @@ disable_table(struct ctl_conn *c, struct ctl_id *id) { struct table *table; - struct rdr *rdr; struct host *host; + int n; if (id->id == EMPTY_ID) table = table_findbyname(env, id->name); @@ -749,7 +749,7 @@ if (table == NULL) return (-1); id->id = table->conf.id; - if ((rdr = rdr_find(env, table->conf.rdrid)) == NULL) + if (table->conf.rdrid > 0 && rdr_find(env, table->conf.rdrid) == NULL) fatalx("disable_table: desynchronised"); if (table->conf.flags & F_DISABLE) @@ -760,6 +760,11 @@ host->up = HOST_UNKNOWN; imsg_compose_event(iev_hce, IMSG_TABLE_DISABLE, 0, 0, -1, &table->conf.id, sizeof(table->conf.id)); + /* Forward to relay engine(s) */ + for (n = 0; n < env->sc_prefork_relay; n++) + imsg_compose_event(&iev_relay[n], + IMSG_TABLE_DISABLE, 0, 0, -1, + &table->conf.id, sizeof(table->conf.id)); log_debug("disable_table: disabled table %d", table->conf.id); pfe_sync(); return (0); @@ -768,9 +773,9 @@ int enable_table(struct ctl_conn *c, struct ctl_id *id) { - struct rdr *rdr; struct table *table; struct host *host; + int n; if (id->id == EMPTY_ID) table = table_findbyname(env, id->name); @@ -780,7 +785,7 @@ return (-1); id->id = table->conf.id; - if ((rdr = rdr_find(env, table->conf.rdrid)) == NULL) + if (table->conf.rdrid > 0 && rdr_find(env, table->conf.rdrid) == NULL) fatalx("enable_table: desynchronised"); if (!(table->conf.flags & F_DISABLE)) @@ -792,6 +797,11 @@ host->up = HOST_UNKNOWN; imsg_compose_event(iev_hce, IMSG_TABLE_ENABLE, 0, 0, -1, &table->conf.id, sizeof(table->conf.id)); + /* Forward to relay engine(s) */ + for (n = 0; n < env->sc_prefork_relay; n++) + imsg_compose_event(&iev_relay[n], + IMSG_TABLE_ENABLE, 0, 0, -1, + &table->conf.id, sizeof(table->conf.id)); log_debug("enable_table: enabled table %d", table->conf.id); pfe_sync(); return (0); Now for the handling of IMSG_TABLE_DISABLE and IMSG_TABLE_ENABLE in relay.c (borrowed from hce.c): Index: relay.c =================================================================== RCS file: /cvs/src/usr.sbin/relayd/relay.c,v retrieving revision 1.123 diff -u -r1.123 relay.c --- relay.c 12 Oct 2010 14:52:21 -0000 1.123 +++ relay.c 16 Nov 2010 12:02:04 -0000 @@ -2492,6 +2492,22 @@ host->flags &= ~(F_DISABLE); host->up = HOST_UNKNOWN; break; + case IMSG_TABLE_DISABLE: + memcpy(&id, imsg.data, sizeof(id)); + if ((table = table_find(env, id)) == NULL) + fatalx("relay_dispatch_pfe: desynchronized"); + table->conf.flags |= F_DISABLE; + TAILQ_FOREACH(host, &table->hosts, entry) + host->up = HOST_UNKNOWN; + break; + case IMSG_TABLE_ENABLE: + memcpy(&id, imsg.data, sizeof(id)); + if ((table = table_find(env, id)) == NULL) + fatalx("relay_dispatch_pfe: desynchronized"); + table->conf.flags &= ~(F_DISABLE); + TAILQ_FOREACH(host, &table->hosts, entry) + host->up = HOST_UNKNOWN; + break; case IMSG_HOST_STATUS: if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(st)) fatalx("relay_dispatch_pfe: invalid request"); You will find the complete patch attached to this message for easy reference. This was all developed, built and tested on 4.8, but the patches are against -current. If you think these patches doesn't suck and you wish to use them, I would very much appreciate if you could also mention Linus Widstrvmer in the commit message because he helped out a huge amount to makes these patches see the light of day. Thanks for a great OS :) Regards, Patrik Lundin [demime 1.01d removed an attachment of type text/x-diff]