it is possible to remove the inflight var by opening the socket before accepting. Thus there is no need for guessing if the peer socket can be open,
On Sat, May 30, 2015 at 7:40 AM, Claudio Jeker <[email protected]> wrote: > On Fri, May 22, 2015 at 09:18:29PM +0200, Alexander Bluhm wrote: > > This breaks relayd with TLS inspection. Moving down the "called > > once" check after the F_TLSINSPECT block fixes the plain SSL case. > > But HTTPS still hangs. I have just commited a test. Try > > > > cd /usr/src/regress/usr.sbin/relayd && make > run-regress-args-https-inspect.pl > > > > bluhm > > > > On Fri, May 22, 2015 at 03:55:16PM +0200, Claudio Jeker wrote: > > > On our production systems we did hit the "relay_connect: no connection > in > > > flight" on a so regular bases that I had to make it non-fatal with the > > > result of leaking sockets. > > > > > > After more investigation I found the problem to be a race against > > > connecting to the backend servers. In short: > > > - relay_read_http() will open a connection if following conditions are > met > > > cre->dir == RELAY_DIR_REQUEST && cre->toread <= 0 && cre->dst->bev > == NULL > > > > > > - relay_connect() does not initialize con->se_out.bev (which is also > > > cre->dst->bev). Instead this is deferred to relay_connected() > > > > > > - if a event happens that calls relay_read_http() while connecting to > the > > > backend then relay_connect() will be called again. Result is the > panic > > > since the count gets out of sync. > > > > > > The following diff solves this issue by adding an extra flag to > > > ctl_relay_event to know if a relay is already connected (or the > connect is > > > pending). relay_close() will then clean the flag when closing the > session. > > > I decided to use a flag since the EMFILE || ENFILE case is hard to > detect > > > otherwise. > > > > > > Running with this on production with no visible issues at the moment. > > > I think it would make sense to restructure the http proxy code more and > > > introduce a proper state machine but that is a much bigger and complex > > > issue, so lets fix the bug first. > > > > > New version that now passes all regress tests. It got a fair bit more > complex because the F_TLSINSPECT case results in multiple entries of > relay_connect and relay_connected. So I switched to use a state variable > instead of a flag. It seems to work (and also works in the EMFILE/ENFILE > case). At least it "worked" for me by forcing that code path all the time. > > -- > :wq Claudio > > Index: relay.c > =================================================================== > RCS file: /cvs/src/usr.sbin/relayd/relay.c,v > retrieving revision 1.194 > diff -u -p -r1.194 relay.c > --- relay.c 18 May 2015 16:57:20 -0000 1.194 > +++ relay.c 30 May 2015 11:12:27 -0000 > @@ -1408,8 +1408,10 @@ relay_connect_retry(int fd, short sig, v > struct relay *rlay = con->se_relay; > int bnds = -1; > > - if (relay_inflight < 1) > - fatalx("relay_connect_retry: no connection in flight"); > + if (relay_inflight < 1) { > + log_warnx("relay_connect_retry: no connection in flight"); > + relay_inflight = 1; > + } > > DPRINTF("%s: retry %d of %d, inflight: %d",__func__, > con->se_retrycount, con->se_retry, relay_inflight); > @@ -1466,6 +1468,10 @@ relay_connect_retry(int fd, short sig, v > return; > } > > + if (rlay->rl_conf.flags & F_TLSINSPECT) > + con->se_out.state = PRECONNECT; > + else > + con->se_out.state = CONNECTED; > relay_inflight--; > DPRINTF("%s: inflight decremented, now %d",__func__, > relay_inflight); > > @@ -1484,9 +1490,14 @@ relay_connect_retry(int fd, short sig, v > int > relay_preconnect(struct rsession *con) > { > + int rv; > + > log_debug("%s: session %d: process %d", __func__, > con->se_id, privsep_process); > - return (relay_connect(con)); > + rv = relay_connect(con); > + if (con->se_out.state == CONNECTED) > + con->se_out.state = PRECONNECT; > + return (rv); > } > > int > @@ -1496,18 +1507,28 @@ relay_connect(struct rsession *con) > struct timeval evtpause = { 1, 0 }; > int bnds = -1, ret; > > + /* relay_connect should only be called once per relay */ > + if (con->se_out.state == CONNECTED) { > + log_debug("%s: connect already called once", __func__); > + return (0); > + } > + > /* Connection is already established but session not active */ > - if ((rlay->rl_conf.flags & F_TLSINSPECT) && con->se_out.s != -1) { > + if ((rlay->rl_conf.flags & F_TLSINSPECT) && > + con->se_out.state == PRECONNECT) { > if (con->se_out.ssl == NULL) { > log_debug("%s: tls connect failed", __func__); > return (-1); > } > relay_connected(con->se_out.s, EV_WRITE, con); > + con->se_out.state = CONNECTED; > return (0); > } > > - if (relay_inflight < 1) > - fatalx("relay_connect: no connection in flight"); > + if (relay_inflight < 1) { > + log_warnx("relay_connect: no connection in flight"); > + relay_inflight = 1; > + } > > getmonotime(&con->se_tv_start); > > @@ -1555,6 +1576,8 @@ relay_connect(struct rsession *con) > event_del(&rlay->rl_ev); > evtimer_add(&con->se_inflightevt, &evtpause); > evtimer_add(&rlay->rl_evt, &evtpause); > + /* this connect is pending */ > + con->se_out.state = PENDING; > return (0); > } else { > if (con->se_retry) { > @@ -1572,6 +1595,7 @@ relay_connect(struct rsession *con) > } > } > > + con->se_out.state = CONNECTED; > relay_inflight--; > DPRINTF("%s: inflight decremented, now %d",__func__, > relay_inflight); > @@ -1673,6 +1697,7 @@ relay_close(struct rsession *con, const > event_add(&rlay->rl_ev, NULL); > } > } > + con->se_out.state = INIT; > > if (con->se_out.buf != NULL) > free(con->se_out.buf); > Index: relay_http.c > =================================================================== > RCS file: /cvs/src/usr.sbin/relayd/relay_http.c,v > retrieving revision 1.47 > diff -u -p -r1.47 relay_http.c > --- relay_http.c 22 May 2015 01:34:13 -0000 1.47 > +++ relay_http.c 30 May 2015 11:12:53 -0000 > @@ -275,8 +275,10 @@ relay_read_http(struct bufferevent *bev, > goto lookup; > } else if (cre->line == 1 && cre->dir == > RELAY_DIR_REQUEST) { > if ((desc->http_method = > relay_httpmethod_byname(key)) > - == HTTP_METHOD_NONE) > + == HTTP_METHOD_NONE) { > + free(line); > goto fail; > + } > /* > * Decode request path and query > */ > @@ -419,7 +421,7 @@ relay_read_http(struct bufferevent *bev, > relay_reset_http(cre); > done: > if (cre->dir == RELAY_DIR_REQUEST && cre->toread <= 0 && > - cre->dst->bev == NULL) { > + cre->dst->state != CONNECTED) { > if (rlay->rl_conf.fwdmode == FWD_TRANS) { > relay_bindanyreq(con, 0, IPPROTO_TCP); > return; > Index: relayd.h > =================================================================== > RCS file: /cvs/src/usr.sbin/relayd/relayd.h,v > retrieving revision 1.209 > diff -u -p -r1.209 relayd.h > --- relayd.h 2 May 2015 13:15:24 -0000 1.209 > +++ relayd.h 30 May 2015 10:36:28 -0000 > @@ -200,6 +200,7 @@ struct ctl_relay_event { > int line; > int done; > int timedout; > + enum { INIT, PENDING, PRECONNECT, CONNECTED } state; > enum direction dir; > > u_int8_t *buf; > > -- --------------------------------------------------------------------------------------------------------------------- () ascii ribbon campaign - against html e-mail /\
