31.07.2018 20:30, Vladimir Sementsov-Ogievskiy wrote: > Implement reconnect. To achieve this: > > 1. add new modes: > connecting-wait: means, that reconnecting is in progress, and there > were small number of reconnect attempts, so all requests are > waiting for the connection. > connecting-nowait: reconnecting is in progress, there were a lot of > attempts of reconnect, all requests will return errors. > > two old modes are used too: > connected: normal state > quit: exiting after fatal error or on close > > Possible transitions are: > > * -> quit > connecting-* -> connected > connecting-wait -> connecting-nowait (transition is done after > reconnect-delay seconds in connecting-wait mode) > connected -> connecting-wait > > 2. Implement reconnect in connection_co. So, in connecting-* mode, > connection_co, tries to reconnect unlimited times. > > 3. Retry nbd queries on channel error, if we are in connecting-wait > state. > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com> > --- > block/nbd-client.h | 4 + > block/nbd-client.c | 304 > +++++++++++++++++++++++++++++++++++++++++++---------- > 2 files changed, 255 insertions(+), 53 deletions(-) > > diff --git a/block/nbd-client.h b/block/nbd-client.h > index ef8a6a9239..52e4ec66be 100644 > --- a/block/nbd-client.h > +++ b/block/nbd-client.h > @@ -40,6 +40,10 @@ typedef struct NBDClientSession { > Coroutine *connection_co; > int in_flight; > NBDClientState state; > + bool receiving; > + int connect_status; > + Error *connect_err; > + bool wait_in_flight; > > NBDClientRequest requests[MAX_NBD_REQUESTS]; > NBDReply reply; > diff --git a/block/nbd-client.c b/block/nbd-client.c > index 41e6e6e702..b09907096d 100644 > --- a/block/nbd-client.c > +++ b/block/nbd-client.c > @@ -34,10 +34,26 @@ > #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ (uint64_t)(intptr_t)(bs)) > #define INDEX_TO_HANDLE(bs, index) ((index) ^ (uint64_t)(intptr_t)(bs))
[...] > +static coroutine_fn void nbd_reconnect_attempt(NBDConnection *con) > +{ > + NBDClientSession *s = nbd_get_client_session(con->bs); > + Error *local_err = NULL; > + > + assert(nbd_client_connecting(s)); > + > + /* Wait completion of all in-flight requests */ > + > + qemu_co_mutex_lock(&s->send_mutex); > + > + while (s->in_flight > 0) { > + qemu_co_mutex_unlock(&s->send_mutex); > + nbd_recv_coroutines_wake_all(s); > + s->wait_in_flight = true; > + qemu_coroutine_yield(); > + s->wait_in_flight = false; > + qemu_co_mutex_lock(&s->send_mutex); > + } > + > + qemu_co_mutex_unlock(&s->send_mutex); > + > + /* Now we are sure, that nobody accessing the channel now and nobody > + * will try to access the channel, until we set state to CONNECTED > + */ > + > + /* Finalize previous connection if any */ > + if (s->ioc) { > + nbd_client_detach_aio_context(con->bs); > + object_unref(OBJECT(s->sioc)); > + s->sioc = NULL; > + object_unref(OBJECT(s->ioc)); > + s->ioc = NULL; > + } > + > + s->connect_status = nbd_client_connect(con->bs, con->saddr, > + con->export, con->tlscreds, > + con->hostname, > con->x_dirty_bitmap, > + &local_err); > + error_free(s->connect_err); > + s->connect_err = NULL; > + error_propagate(&s->connect_err, local_err); > + local_err = NULL; > > - nbd_client_detach_aio_context(bs); > - object_unref(OBJECT(client->sioc)); > - client->sioc = NULL; > - object_unref(OBJECT(client->ioc)); > - client->ioc = NULL; > + if (s->connect_status == -EINVAL) { > + /* Protocol error or something like this, go to NBD_CLIENT_QUIT */ > + nbd_channel_error(s, s->connect_status); > + return; Unfortunately, nbd_client_connect returns -EINVAL for io errors instead of -EIO. And it is not trivial to fix it. So, this if{} should be removed. > + } > + > + if (s->connect_status < 0) { > + /* failed attempt */ > + return; > + } > + > + /* successfully connected */ > + s->state = NBD_CLIENT_CONNECTED; > + qemu_co_queue_restart_all(&s->free_sema); > +} > + -- Best regards, Vladimir