On Sat, Mar 20, 2021 at 04:15:29PM -0400, Ashton Fagg wrote: > Hello. > > Pinging on this one hoping to get some feedback. I've reattached the > diff below. >
Hi Ashton, I adjusted your diff a bit (mainly cleanup of spacing and other style changes). Please have a look. I think this version should be committed. Cheers -- :wq Claudio > Ashton Fagg <[email protected]> writes: > > > Hello tech, > > > > Recently I encountered a problem with automounting iscsi volumes at boot > > time. This came down to a timing issue, where iscsictl reload was > > returning before the volumes were attached - causing the machine to > > enter single user mode when it would try to fsck the iscsi volumes. > > > > See this thread: > > > > https://marc.info/?l=openbsd-bugs&m=161404736018432&w=2 > > > > Adding "&& sleep 5" to /etc/rc.d/iscsid did mitigate this, however > > that's both hacky and precarious. > > > > After some discussion with claudio@, the principled solution is to make > > iscsictl wait until the sessions are up and the devices are attached > > before returning. Since "iscsictl reload" is called in the rc script, > > this would halt the boot process long enough for all the volumes to be > > available before progressing - and thus, eliminating my problem. > > > > The following diff is an attempt at that, with some caveats. It does the > > following things: > > > > - adds a control command to iscsid to allow for polling of session and > > connection status during iscsictl reload. > > > > - book-keeping code for working out if there are sessions still > > initializing > > > > - some poll-wait mechanisms in iscsictl, which (a) waits for iscsid to > > tell it everything is up and running or (b) returns after 10 poll > > attempts 1 second apart (thus, the max delay here is currently 10 > > seconds). > > > > I have confirmed that the current implementation does indeed solve my > > problem, however there's a couple of things I still question: > > > > 1. In the poll_and_wait() function in iscsictl, you'll notice there's an > > extra sleep with the comment that it is there to give time for the > > devices to attach. In my case, the extra second was needed for both my > > devices to attach. without the extra sleep, the first device would > > attach successfully while iscsictl was still waiting, but the second > > still had not fully attached even though my book-keeping code said that > > nothing further was in flight. I question whether there's a better way > > to do this and welcome suggestions about that. > > > > 2. It's probably too chatty. Is it also perhaps better to have a > > separate command that does this, rather than hijacking "reload"? > > > > 3. I don't know whether the book-keeping code is entirely > > reasonable. I've tried to capture every state, but I do wonder if there > > are cases where things slip through the cracks. > > > > For context, here is the machine booting (and hitting single user mode) > > with the standard iscsid: > > > > https://www.youtube.com/watch?v=F09PaT-8IJU&feature=youtu.be > > > > Whereas, here is the same machine configured identically with my iscsid > > changes applied: > > > > https://www.youtube.com/watch?v=TZzmQBVDRxo&feature=youtu.be > > > > The delay appears to be somewhere around 2-3 seconds, which is less than > > the full 10 allowable by the poll_and_wait() loop - so it does appear to > > be detecting the status correctly (at least in this configuration). > > > > Anyhow, I'd love to hear some comments and suggestions. Please note this > > is my first submission outside of ports that's bigger than a 1 or 2 > > liner, and it's been a while since I've written C (I write C++ at my day > > job, and it's not systems programming...), so hopefully I haven't done > > anything too silly. > > > > Thanks, > Index: usr.sbin/iscsictl/iscsictl.c =================================================================== RCS file: /cvs/src/usr.sbin/iscsictl/iscsictl.c,v retrieving revision 1.11 diff -u -p -r1.11 iscsictl.c --- usr.sbin/iscsictl/iscsictl.c 16 Aug 2016 18:41:57 -0000 1.11 +++ usr.sbin/iscsictl/iscsictl.c 12 Apr 2021 10:17:21 -0000 @@ -40,6 +40,10 @@ struct pdu *ctl_getpdu(char *, size_t); int ctl_sendpdu(int, struct pdu *); void show_config(struct ctrlmsghdr *, struct pdu *); void show_vscsi_stats(struct ctrlmsghdr *, struct pdu *); +void poll_and_wait(void); +void poll_session_status(void); +void register_poll(struct ctrlmsghdr *, struct pdu *); +void poll_print(struct session_poll *); char cbuf[CONTROL_READ_SIZE]; @@ -48,6 +52,11 @@ struct control { int fd; } control; + +struct session_poll poll_result; +#define POLL_DELAY_SEC 1 +#define POLL_ATTEMPTS 10 + __dead void usage(void) { @@ -68,7 +77,7 @@ main (int argc, char* argv[]) char *sockname = ISCSID_CONTROL; struct session_ctlcfg *s; struct iscsi_config *cf; - int ch, val = 0; + int ch, poll = 0, val = 0; /* check flags */ while ((ch = getopt(argc, argv, "f:s:")) != -1) { @@ -135,6 +144,7 @@ main (int argc, char* argv[]) &cf->initiator, sizeof(cf->initiator)) == -1) err(1, "control_compose"); } + SIMPLEQ_FOREACH(s, &cf->sessions, entry) { struct ctrldata cdv[3]; bzero(cdv, sizeof(cdv)); @@ -157,6 +167,9 @@ main (int argc, char* argv[]) nitems(cdv), cdv) == -1) err(1, "control_build"); } + + /* Reloading, so poll for connection establishment. */ + poll = 1; break; case DISCOVERY: printf("discover %s\n", log_sockaddr(&res->addr)); @@ -174,8 +187,10 @@ main (int argc, char* argv[]) run(); - close(control.fd); + if (poll) + poll_and_wait(); + close(control.fd); return (0); } @@ -237,6 +252,11 @@ run_command(struct pdu *pdu) show_vscsi_stats(cmh, pdu); done = 1; break; + case CTRL_SESS_POLL: + register_poll(cmh, pdu); + done = 1; + break; + } } } @@ -382,4 +402,70 @@ show_vscsi_stats(struct ctrlmsghdr *cmh, vs->cnt_t2i_status[0], vs->cnt_t2i_status[1], vs->cnt_t2i_status[2]); +} + +void +poll_session_status(void) +{ + struct pdu *pdu; + + if (control_compose(NULL, CTRL_SESS_POLL, NULL, 0) == -1) + err(1, "control_compose"); + + while ((pdu = TAILQ_FIRST(&control.channel)) != NULL) { + TAILQ_REMOVE(&control.channel, pdu, entry); + run_command(pdu); + } +} + +void +poll_and_wait(void) +{ + int attempts; + + printf("waiting for config to settle.."); + fflush(stdout); + + for (attempts = 0; attempts < POLL_ATTEMPTS; attempts++) { + + poll_session_status(); + + /* Poll says we are good to go. */ + if (poll_result.sess_conn_status != 0) { + printf("ok\n"); + /* wait a bit longer so all is settled. */ + sleep(POLL_DELAY_SEC); + return; + } + + /* Poll says we should wait... */ + printf("."); + fflush(stdout); + sleep(POLL_DELAY_SEC); + } + + printf("giving up.\n"); + + poll_print(&poll_result); +} + +void +register_poll(struct ctrlmsghdr *cmh, struct pdu *pdu) +{ + if (cmh->len[0] != sizeof(poll_result)) + errx(1, "poll: bad size of response"); + + poll_result = *((struct session_poll *)pdu_getbuf(pdu, NULL, 1)); +} + +void +poll_print(struct session_poll *p) +{ + printf("Configured sessions: %d\n", p->session_count); + printf("Sessions initializing: %d\n", p->session_init_count); + printf("Sessions started/failed: %d\n", p->session_running_count); + printf("Sessions logged in: %d\n", p->conn_logged_in_count); + printf("Sessions with failed connections: %d\n", p->conn_failed_count); + printf("Sessions still attempting to connect: %d\n", + p->conn_waiting_count); } Index: usr.sbin/iscsid/Makefile =================================================================== RCS file: /cvs/src/usr.sbin/iscsid/Makefile,v retrieving revision 1.5 diff -u -p -r1.5 Makefile --- usr.sbin/iscsid/Makefile 25 Nov 2019 22:52:13 -0000 1.5 +++ usr.sbin/iscsid/Makefile 12 Apr 2021 09:20:10 -0000 @@ -2,7 +2,7 @@ PROG= iscsid SRCS= connection.c control.c initiator.c iscsid.c log.c logmsg.c pdu.c \ - session.c task.c util.c vscsi.c + poll.c session.c task.c util.c vscsi.c MAN= iscsid.8 Index: usr.sbin/iscsid/iscsid.c =================================================================== RCS file: /cvs/src/usr.sbin/iscsid/iscsid.c,v retrieving revision 1.21 diff -u -p -r1.21 iscsid.c --- usr.sbin/iscsid/iscsid.c 27 Jan 2021 07:21:54 -0000 1.21 +++ usr.sbin/iscsid/iscsid.c 12 Apr 2021 10:01:17 -0000 @@ -211,6 +211,7 @@ iscsid_ctrl_dispatch(void *ch, struct pd struct initiator_config *ic; struct session_config *sc; struct session *s; + struct session_poll p = { 0 }; int *valp; cmh = pdu_getbuf(pdu, NULL, 0); @@ -303,6 +304,12 @@ iscsid_ctrl_dispatch(void *ch, struct pd } control_compose(ch, CTRL_SUCCESS, NULL, 0); + break; + case CTRL_SESS_POLL: + TAILQ_FOREACH(s, &initiator->sessions, entry) + poll_session(&p, s); + poll_finalize(&p); + control_compose(ch, CTRL_SESS_POLL, &p, sizeof(p)); break; default: log_warnx("unknown control message type %d", cmh->type); Index: usr.sbin/iscsid/iscsid.h =================================================================== RCS file: /cvs/src/usr.sbin/iscsid/iscsid.h,v retrieving revision 1.16 diff -u -p -r1.16 iscsid.h --- usr.sbin/iscsid/iscsid.h 2 Sep 2016 16:22:31 -0000 1.16 +++ usr.sbin/iscsid/iscsid.h 12 Apr 2021 10:02:19 -0000 @@ -67,7 +67,7 @@ struct ctrldata { #define CTRL_LOG_VERBOSE 6 #define CTRL_VSCSI_STATS 7 #define CTRL_SHOW_SUM 8 - +#define CTRL_SESS_POLL 9 TAILQ_HEAD(session_head, session); TAILQ_HEAD(connection_head, connection); @@ -75,9 +75,9 @@ TAILQ_HEAD(pduq, pdu); TAILQ_HEAD(taskq, task); /* as in tcp_seq.h */ -#define SEQ_LT(a,b) ((int)((a)-(b)) < 0) -#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0) -#define SEQ_GT(a,b) ((int)((a)-(b)) > 0) +#define SEQ_LT(a,b) ((int)((a)-(b)) < 0) +#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0) +#define SEQ_GT(a,b) ((int)((a)-(b)) > 0) #define SEQ_GEQ(a,b) ((int)((a)-(b)) >= 0) #define SESS_INIT 0x0001 @@ -251,6 +251,16 @@ struct session { int action; }; +struct session_poll { + int session_count; + int session_init_count; + int session_running_count; + int conn_logged_in_count; + int conn_failed_count; + int conn_waiting_count; + int sess_conn_status; +}; + struct connection { struct event ev; struct event wev; @@ -390,6 +400,10 @@ void vscsi_data(unsigned long, int, void void vscsi_status(int, int, void *, size_t); void vscsi_event(unsigned long, u_int, u_int); struct vscsi_stats *vscsi_stats(void); + +/* Session polling */ +void poll_session(struct session_poll *, struct session *); +void poll_finalize(struct session_poll *); /* logmsg.c */ void log_hexdump(void *, size_t); Index: usr.sbin/iscsid/poll.c =================================================================== RCS file: usr.sbin/iscsid/poll.c diff -N usr.sbin/iscsid/poll.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.sbin/iscsid/poll.c 12 Apr 2021 10:00:50 -0000 @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2021 Dr Ashton Fagg <[email protected]> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <sys/queue.h> +#include <sys/socket.h> +#include <sys/uio.h> + +#include <event.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "iscsid.h" +#include "log.h" + +void +poll_session(struct session_poll *p, struct session *s) +{ + if (!s) + fatal("poll_session failed: invalid session"); + + ++p->session_count; + + /* + * If SESS_RUNNING, the session has either been brought up + * successfully, or has failed. + */ + if (s->state & SESS_RUNNING) { + struct connection *c; + int is_logged_in = 0; + int is_failed = 0; + int is_waiting = 0; + + ++p->session_running_count; + + /* Next, check what state the connections are in. */ + TAILQ_FOREACH(c, &s->connections, entry) { + if (c->state & CONN_LOGGED_IN) + ++is_logged_in; + else if (c->state & CONN_FAILED) + ++is_failed; + else if (c->state & CONN_NEVER_LOGGED_IN) + ++is_waiting; + + } + + /* + * Potentially, we have multiple connections for each session. + * Handle this by saying that a single connection logging + * in takes precedent over a failed connection. Only say + * the session login has failed if none of the connections + * have logged in and nothing is in flight. + */ + + if (is_logged_in) + ++p->conn_logged_in_count; + if (is_failed) + ++p->conn_failed_count; + if (is_waiting) + ++p->conn_waiting_count; + + /* Sessions in SESS_INIT need to be waited on. */ + } else if (s->state & SESS_INIT) + ++p->session_init_count; + else + fatal("poll_session: unknown state."); +} + +/* + * Set session_poll sess_conn_status based on the number of sessions + * versus number of logged in or failed sessions. If the numbers are + * equal iscsictl can stop polling since all sessions reached a stable state. + */ +void +poll_finalize(struct session_poll *p) +{ + p->sess_conn_status = (p->session_count == p->conn_logged_in_count + + p->conn_failed_count); +}
