We don't want the guest to be able to control the reconnection rate (by sending lots of I/O requests to the block device), so we try once every 5 seconds and fail I/O requests that happen while we're disconnected.
TODO: Allow the reconnect delay to be specified as an option. TODO: Queue, rather than fail, I/O requests when disconnected Signed-off-by: Nick Thomas <n...@bytemark.co.uk> --- block/nbd.c | 54 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 files changed, 44 insertions(+), 10 deletions(-)
diff --git a/block/nbd.c b/block/nbd.c index 1caae89..e55a9a7 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -50,6 +50,8 @@ #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs)) #define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs)) +#define RECONNECT_DELAY 5000 + typedef struct BDRVNBDState { int sock; uint32_t nbdflags; @@ -62,6 +64,8 @@ typedef struct BDRVNBDState { Coroutine *send_coroutine; int in_flight; + QEMUTimer *recon_timer; + Coroutine *recv_coroutine[MAX_NBD_REQUESTS]; struct nbd_reply reply; @@ -125,6 +129,27 @@ out: return err; } +static void nbd_reconnect(void *opaque) +{ + BDRVNBDState *s = opaque; + + if (nbd_is_connected(s)) { + logout("Already connected to NBD server, disarming timer\n"); + qemu_del_timer(s->recon_timer); + return; + } + + if (nbd_establish_connection(s) == 0) { + logout("Reconnected to server, disabling reconnect timer\n"); + qemu_del_timer(s->recon_timer); + } else { + logout("Failed to reconnect, trying again in %ims\n", RECONNECT_DELAY); + int64_t delay = qemu_get_clock_ms(rt_clock) + RECONNECT_DELAY; + qemu_mod_timer(s->recon_timer, delay); + } + +} + static void nbd_coroutine_start(BDRVNBDState *s, struct nbd_request *request) { int i; @@ -155,11 +180,15 @@ static int nbd_have_request(void *opaque) return s->in_flight > 0; } -static void nbd_abort_inflight_requests(BDRVNBDState *s) +static void nbd_disconnect_for_reconnect(BDRVNBDState *s) { int i; Coroutine *co; + int64_t recon_time; + nbd_teardown_connection(s, false); + + /* Abort all in-flight requests. TODO: these should be retried */ s->reply.handle = 0; for (i = 0; i < MAX_NBD_REQUESTS; i++) { co = s->recv_coroutine[i]; @@ -167,6 +196,9 @@ static void nbd_abort_inflight_requests(BDRVNBDState *s) qemu_coroutine_enter(co, NULL); } } + + recon_time = qemu_get_clock_ms(rt_clock) + RECONNECT_DELAY; + qemu_mod_timer(s->recon_timer, recon_time); } static void nbd_reply_ready(void *opaque) @@ -185,8 +217,7 @@ static void nbd_reply_ready(void *opaque) return; } if (ret < 0) { - nbd_teardown_connection(s, false); - nbd_abort_inflight_requests(s); + nbd_disconnect_for_reconnect(s); return; } } @@ -196,8 +227,7 @@ static void nbd_reply_ready(void *opaque) * one coroutine is called until the reply finishes. */ i = HANDLE_TO_INDEX(s, s->reply.handle); if (i >= MAX_NBD_REQUESTS) { - nbd_teardown_connection(s, false); - nbd_abort_inflight_requests(s); + nbd_disconnect_for_reconnect(s); return; } @@ -218,14 +248,14 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, { int rc, ret; - qemu_co_mutex_lock(&s->send_mutex); - - if (!nbd_is_connected(s) && nbd_establish_connection(s) != 0) { - nbd_abort_inflight_requests(s); - qemu_co_mutex_unlock(&s->send_mutex); + /* TODO: We should be able to add this to the queue regardless, once + * coroutines retry if not connected */ + if (!nbd_is_connected(s)) { return -EIO; } + qemu_co_mutex_lock(&s->send_mutex); + s->send_coroutine = qemu_coroutine_self(); qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, nbd_have_request, s); @@ -349,6 +379,7 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags) int result; s->sock = -1; + s->recon_timer = qemu_new_timer_ms(rt_clock, nbd_reconnect, s); qemu_co_mutex_init(&s->send_mutex); qemu_co_mutex_init(&s->free_sema); @@ -520,6 +551,9 @@ static void nbd_close(BlockDriverState *bs) g_free(s->export_name); g_free(s->host_spec); + qemu_del_timer(s->recon_timer); + qemu_free_timer(s->recon_timer); + nbd_teardown_connection(s, true); }