Hello tech@,

Similarly to http://marc.info/?l=openbsd-misc&m=122633408801658&w=2
we noticed that the relayctl command 'disable table' made relayd crash
when we used relays:

# relayctl sh sum
Id      Type            Name                            Avlblty Status
1       relay           test-relay                              active
1       table           test-table:80                           empty
1       host            127.0.0.1                       0.00%   down
# relayctl table disable 1
relayctl: pipe closed
# 

The logs had the following to say:

Nov 16 10:20:53 hostname relayd[2066]: fatal: disable_table: desynchronised
Nov 16 10:20:53 hostname relayd[14235]: check_child: lost child: pf update 
engine exited
Nov 16 10:20:53 hostname relayd[14235]: check_child: lost child: host check 
engine exited

This was caused by disable_table() in pfe.c:

752         if ((rdr = rdr_find(env, table->conf.rdrid)) == NULL)
753                 fatalx("disable_table: desynchronised");

It seems conf.rdrid is only set if this is a redirect, otherwise setting it to
0 so we changed it as such:

Index: pfe.c
===================================================================
RCS file: /cvs/src/usr.sbin/relayd/pfe.c,v
retrieving revision 1.65
diff -u -r1.65 pfe.c
--- pfe.c       1 Aug 2010 22:18:35 -0000       1.65
+++ pfe.c       16 Nov 2010 09:33:02 -0000
@@ -739,7 +739,6 @@
 disable_table(struct ctl_conn *c, struct ctl_id *id)
 {
        struct table    *table;
-       struct rdr      *rdr;
        struct host     *host;
 
        if (id->id == EMPTY_ID)
@@ -749,7 +748,7 @@
        if (table == NULL)
                return (-1);
        id->id = table->conf.id;
-       if ((rdr = rdr_find(env, table->conf.rdrid)) == NULL)
+       if (table->conf.rdrid > 0 && rdr_find(env, table->conf.rdrid) == NULL)
                fatalx("disable_table: desynchronised");
 
        if (table->conf.flags & F_DISABLE)
@@ -768,7 +767,6 @@
 int 
 enable_table(struct ctl_conn *c, struct ctl_id *id)
 {
-       struct rdr      *rdr;
        struct table    *table;
        struct host     *host;
 
@@ -780,7 +778,7 @@
                return (-1);
        id->id = table->conf.id;
 
-       if ((rdr = rdr_find(env, table->conf.rdrid)) == NULL)
+       if (table->conf.rdrid > 0 && rdr_find(env, table->conf.rdrid) == NULL)
                fatalx("enable_table: desynchronised");
 
        if (!(table->conf.flags & F_DISABLE))

With this patch, the disabling/enabling of tables would work, however, upon
enabling them relayd would crash when a host in the table was having it's state
changed from unknown to up:

disable_table: disabled table 1
enable_table: enabled table 1
hce_notify_done: 127.0.0.1 (http code ok) 
host 127.0.0.1, check http code (1ms), state unknown -> up, availability 100.00%
pfe_dispatch_imsg: state 1 for host 1 127.0.0.1
relay_dispatch_pfe: host 1 => 1
fatal: relay_dispatch_pfe: desynchronized
pf update engine exiting
check_child: lost child: socket relay engine exited
host check engine exiting
terminating

This was due to two things, first of all, pfe.c only gives the
IMSG_TABLE_DISABLE and IMSG_TABLE_ENABLE messages to the hce, not the
relay engine, secondly, relay.c lacks support for handling these messages.

This required some more work on pfe.c, I borrowed the code from enable_host()
expanding the prior diff to this:

Index: pfe.c
===================================================================
RCS file: /cvs/src/usr.sbin/relayd/pfe.c,v
retrieving revision 1.65
diff -u -r1.65 pfe.c
--- pfe.c       1 Aug 2010 22:18:35 -0000       1.65
+++ pfe.c       16 Nov 2010 12:02:04 -0000
@@ -739,8 +739,8 @@
 disable_table(struct ctl_conn *c, struct ctl_id *id)
 {
        struct table    *table;
-       struct rdr      *rdr;
        struct host     *host;
+       int              n;
 
        if (id->id == EMPTY_ID)
                table = table_findbyname(env, id->name);
@@ -749,7 +749,7 @@
        if (table == NULL)
                return (-1);
        id->id = table->conf.id;
-       if ((rdr = rdr_find(env, table->conf.rdrid)) == NULL)
+       if (table->conf.rdrid > 0 && rdr_find(env, table->conf.rdrid) == NULL)
                fatalx("disable_table: desynchronised");
 
        if (table->conf.flags & F_DISABLE)
@@ -760,6 +760,11 @@
                host->up = HOST_UNKNOWN;
        imsg_compose_event(iev_hce, IMSG_TABLE_DISABLE, 0, 0, -1,
            &table->conf.id, sizeof(table->conf.id));
+       /* Forward to relay engine(s) */
+       for (n = 0; n < env->sc_prefork_relay; n++)
+               imsg_compose_event(&iev_relay[n],
+                   IMSG_TABLE_DISABLE, 0, 0, -1,
+                   &table->conf.id, sizeof(table->conf.id));
        log_debug("disable_table: disabled table %d", table->conf.id);
        pfe_sync();
        return (0);
@@ -768,9 +773,9 @@
 int
 enable_table(struct ctl_conn *c, struct ctl_id *id)
 {
-       struct rdr      *rdr;
        struct table    *table;
        struct host     *host;
+       int              n;
 
        if (id->id == EMPTY_ID)
                table = table_findbyname(env, id->name);
@@ -780,7 +785,7 @@
                return (-1);
        id->id = table->conf.id;
 
-       if ((rdr = rdr_find(env, table->conf.rdrid)) == NULL)
+       if (table->conf.rdrid > 0 && rdr_find(env, table->conf.rdrid) == NULL)
                fatalx("enable_table: desynchronised");
 
        if (!(table->conf.flags & F_DISABLE))
@@ -792,6 +797,11 @@
                host->up = HOST_UNKNOWN;
        imsg_compose_event(iev_hce, IMSG_TABLE_ENABLE, 0, 0, -1,
            &table->conf.id, sizeof(table->conf.id));
+       /* Forward to relay engine(s) */
+       for (n = 0; n < env->sc_prefork_relay; n++)
+               imsg_compose_event(&iev_relay[n],
+                   IMSG_TABLE_ENABLE, 0, 0, -1,
+                   &table->conf.id, sizeof(table->conf.id));
        log_debug("enable_table: enabled table %d", table->conf.id);
        pfe_sync();
        return (0);

Now for the handling of IMSG_TABLE_DISABLE and IMSG_TABLE_ENABLE in relay.c 
(borrowed from hce.c):

Index: relay.c
===================================================================
RCS file: /cvs/src/usr.sbin/relayd/relay.c,v
retrieving revision 1.123
diff -u -r1.123 relay.c
--- relay.c     12 Oct 2010 14:52:21 -0000      1.123
+++ relay.c     16 Nov 2010 12:02:04 -0000
@@ -2492,6 +2492,22 @@
                        host->flags &= ~(F_DISABLE);
                        host->up = HOST_UNKNOWN;
                        break;
+               case IMSG_TABLE_DISABLE:
+                       memcpy(&id, imsg.data, sizeof(id));
+                       if ((table = table_find(env, id)) == NULL)
+                               fatalx("relay_dispatch_pfe: desynchronized");
+                       table->conf.flags |= F_DISABLE;
+                       TAILQ_FOREACH(host, &table->hosts, entry)
+                               host->up = HOST_UNKNOWN;
+                       break;
+               case IMSG_TABLE_ENABLE:
+                       memcpy(&id, imsg.data, sizeof(id));
+                       if ((table = table_find(env, id)) == NULL)
+                               fatalx("relay_dispatch_pfe: desynchronized");
+                       table->conf.flags &= ~(F_DISABLE);
+                       TAILQ_FOREACH(host, &table->hosts, entry)
+                               host->up = HOST_UNKNOWN;
+                       break;
                case IMSG_HOST_STATUS:
                        if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(st))
                                fatalx("relay_dispatch_pfe: invalid request");


You will find the complete patch attached to this message for easy reference.

This was all developed, built and tested on 4.8, but the patches are against
-current.

If you think these patches doesn't suck and you wish to use them, I would very
much appreciate if you could also mention Linus Widstrvmer in the commit
message because he helped out a huge amount to makes these patches see the
light of day.

Thanks for a great OS :)

Regards,
Patrik Lundin

[demime 1.01d removed an attachment of type text/x-diff]

Reply via email to