Using the new NBD extension of NBD_INFO_INIT_STATE, we can pass on the information when a server reports that an image initially reads as all zeroes. The server information is treated as stale the moment we request a write operation, even across reconnections to the server, which is fine since our intended usage of BDRV_ZERO_OPEN is to optimize qemu-img at startup, and not something relied on during later image use.
Update iotests to reflect improved output of 'qemu-nbd --list'. As NBD still cannot create or resize images, we don't need to worry about BDRV_ZERO_CREATE or BDRV_ZERO_TRUNCATE. Signed-off-by: Eric Blake <ebl...@redhat.com> --- block/nbd.c | 15 +++++++++++++++ include/block/nbd.h | 4 ++++ nbd/client.c | 24 ++++++++++++++++++++---- nbd/trace-events | 1 + qemu-nbd.c | 13 +++++++++++++ tests/qemu-iotests/223.out | 4 ++++ tests/qemu-iotests/233.out | 1 + 7 files changed, 58 insertions(+), 4 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index d085554f21ea..2e1fbd6152f6 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -1204,6 +1204,7 @@ static int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, }; assert(!(s->info.flags & NBD_FLAG_READ_ONLY)); + s->info.modified = true; if (flags & BDRV_REQ_FUA) { assert(s->info.flags & NBD_FLAG_SEND_FUA); request.flags |= NBD_CMD_FLAG_FUA; @@ -1276,6 +1277,7 @@ static int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, }; assert(!(s->info.flags & NBD_FLAG_READ_ONLY)); + s->info.modified = true; if (!(s->info.flags & NBD_FLAG_SEND_TRIM) || !bytes) { return 0; } @@ -1909,6 +1911,16 @@ static int nbd_co_flush(BlockDriverState *bs) return nbd_client_co_flush(bs); } +static int nbd_known_zeroes(BlockDriverState *bs) +{ + BDRVNBDState *s = bs->opaque; + + if (!s->info.modified && s->info.init_state & NBD_INIT_ZERO) { + return BDRV_ZERO_OPEN; + } + return 0; +} + static void nbd_refresh_limits(BlockDriverState *bs, Error **errp) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; @@ -2027,6 +2039,7 @@ static BlockDriver bdrv_nbd = { .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, + .bdrv_known_zeroes = nbd_known_zeroes, .bdrv_refresh_limits = nbd_refresh_limits, .bdrv_getlength = nbd_getlength, .bdrv_detach_aio_context = nbd_client_detach_aio_context, @@ -2052,6 +2065,7 @@ static BlockDriver bdrv_nbd_tcp = { .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, + .bdrv_known_zeroes = nbd_known_zeroes, .bdrv_refresh_limits = nbd_refresh_limits, .bdrv_getlength = nbd_getlength, .bdrv_detach_aio_context = nbd_client_detach_aio_context, @@ -2077,6 +2091,7 @@ static BlockDriver bdrv_nbd_unix = { .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, + .bdrv_known_zeroes = nbd_known_zeroes, .bdrv_refresh_limits = nbd_refresh_limits, .bdrv_getlength = nbd_getlength, .bdrv_detach_aio_context = nbd_client_detach_aio_context, diff --git a/include/block/nbd.h b/include/block/nbd.h index 0de020904a37..5103053bed49 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -307,6 +307,7 @@ struct NBDExportInfo { uint32_t min_block; uint32_t opt_block; uint32_t max_block; + uint16_t init_state; uint32_t context_id; @@ -314,6 +315,9 @@ struct NBDExportInfo { char *description; int n_contexts; char **contexts; + + /* Set during runtime to track if init_state is still trustworthy. */ + bool modified; }; typedef struct NBDExportInfo NBDExportInfo; diff --git a/nbd/client.c b/nbd/client.c index ba173108baab..199a8a2bc49e 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -350,16 +350,17 @@ static int nbd_opt_info_or_go(QIOChannel *ioc, uint32_t opt, assert(opt == NBD_OPT_GO || opt == NBD_OPT_INFO); trace_nbd_opt_info_go_start(nbd_opt_lookup(opt), info->name); - buf = g_malloc(4 + len + 2 + 2 * info->request_sizes + 1); + buf = g_malloc(4 + len + 2 + 2 * (info->request_sizes + 1) + 1); stl_be_p(buf, len); memcpy(buf + 4, info->name, len); - /* At most one request, everything else up to server */ - stw_be_p(buf + 4 + len, info->request_sizes); + /* One or two requests, everything else up to server */ + stw_be_p(buf + 4 + len, info->request_sizes + 1); if (info->request_sizes) { stw_be_p(buf + 4 + len + 2, NBD_INFO_BLOCK_SIZE); } + stw_be_p(buf + 4 + len + 2 + 2 * info->request_sizes, NBD_INFO_INIT_STATE); error = nbd_send_option_request(ioc, opt, - 4 + len + 2 + 2 * info->request_sizes, + 4 + len + 2 + 2 * (info->request_sizes + 1), buf, errp); g_free(buf); if (error < 0) { @@ -484,6 +485,21 @@ static int nbd_opt_info_or_go(QIOChannel *ioc, uint32_t opt, info->max_block); break; + case NBD_INFO_INIT_STATE: + if (len != sizeof(info->init_state)) { + error_setg(errp, "remaining export info len %" PRIu32 + " is unexpected size", len); + nbd_send_opt_abort(ioc); + return -1; + } + if (nbd_read16(ioc, &info->init_state, "info init state", + errp) < 0) { + nbd_send_opt_abort(ioc); + return -1; + } + trace_nbd_opt_info_init_state(info->init_state); + break; + default: /* * Not worth the bother to check if NBD_INFO_NAME or diff --git a/nbd/trace-events b/nbd/trace-events index a955918e9707..12589b2afb84 100644 --- a/nbd/trace-events +++ b/nbd/trace-events @@ -10,6 +10,7 @@ nbd_opt_info_go_start(const char *opt, const char *name) "Attempting %s for expo nbd_opt_info_go_success(const char *opt) "Export is ready after %s request" nbd_opt_info_unknown(int info, const char *name) "Ignoring unknown info %d (%s)" nbd_opt_info_block_size(uint32_t minimum, uint32_t preferred, uint32_t maximum) "Block sizes are 0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32 +nbd_opt_info_init_state(unsigned int flags) "Initial state flags 0x%x" nbd_receive_query_exports_start(const char *wantname) "Querying export list for '%s'" nbd_receive_query_exports_success(const char *wantname) "Found desired export name '%s'" nbd_receive_starttls_new_client(void) "Setting up TLS" diff --git a/qemu-nbd.c b/qemu-nbd.c index 4aa005004ebd..856df85823bc 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -220,6 +220,19 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls, printf(" opt block: %u\n", list[i].opt_block); printf(" max block: %u\n", list[i].max_block); } + { + static const char *const init_names[] = { + [NBD_INIT_SPARSE_BIT] = "sparse", + [NBD_INIT_ZERO_BIT] = "zero", + }; + printf(" init state: 0x%x (", list[i].init_state); + for (size_t bit = 0; bit < ARRAY_SIZE(init_names); bit++) { + if (init_names[bit] && (list[i].init_state & (1 << bit))) { + printf(" %s", init_names[bit]); + } + } + printf(" )\n"); + } if (list[i].n_contexts) { printf(" available meta contexts: %d\n", list[i].n_contexts); for (j = 0; j < list[i].n_contexts; j++) { diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out index 80c0cf65095b..ce7945aa7cf6 100644 --- a/tests/qemu-iotests/223.out +++ b/tests/qemu-iotests/223.out @@ -59,6 +59,7 @@ exports available: 2 min block: 1 opt block: 4096 max block: 33554432 + init state: 0x0 ( ) available meta contexts: 2 base:allocation qemu:dirty-bitmap:b @@ -69,6 +70,7 @@ exports available: 2 min block: 1 opt block: 4096 max block: 33554432 + init state: 0x0 ( ) available meta contexts: 2 base:allocation qemu:dirty-bitmap:b2 @@ -140,6 +142,7 @@ exports available: 2 min block: 1 opt block: 4096 max block: 33554432 + init state: 0x0 ( ) available meta contexts: 2 base:allocation qemu:dirty-bitmap:b @@ -150,6 +153,7 @@ exports available: 2 min block: 1 opt block: 4096 max block: 33554432 + init state: 0x0 ( ) available meta contexts: 2 base:allocation qemu:dirty-bitmap:b2 diff --git a/tests/qemu-iotests/233.out b/tests/qemu-iotests/233.out index c3c344811b2b..5be30d6b7c9c 100644 --- a/tests/qemu-iotests/233.out +++ b/tests/qemu-iotests/233.out @@ -43,6 +43,7 @@ exports available: 1 min block: 1 opt block: 4096 max block: 33554432 + init state: 0x0 ( ) available meta contexts: 1 base:allocation -- 2.24.1