RFC because there is still discussion on the NBD list about adding an NBD_OPT_ to let the client suggest server defaults related to scanning for zeroes during NBD_CMD_WRITE, which may tweak this patch.
Upstream NBD protocol recently added the ability to efficiently write zeroes without having to send the zeroes over the wire, along with a flag to control whether the client wants a hole. Signed-off-by: Eric Blake <ebl...@redhat.com> --- include/block/nbd.h | 5 ++++- nbd/server.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 64 insertions(+), 4 deletions(-) diff --git a/include/block/nbd.h b/include/block/nbd.h index 4c57754..a1d955c 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -70,6 +70,7 @@ typedef struct nbd_reply nbd_reply; #define NBD_FLAG_SEND_FUA (1 << 3) /* Send FUA (Force Unit Access) */ #define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm - rotational media */ #define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */ +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */ #define NBD_FLAG_SEND_CLOSE (1 << 8) /* Send CLOSE */ /* New-style handshake (global) flags, sent from server to client, and @@ -92,7 +93,8 @@ typedef struct nbd_reply nbd_reply; #define NBD_REP_ERR_UNKNOWN ((UINT32_C(1) << 31) | 6) /* Export unknown */ /* Request flags, sent from client to server during transmission phase */ -#define NBD_CMD_FLAG_FUA (1 << 0) +#define NBD_CMD_FLAG_FUA (1 << 0) /* 'force unit access' during write */ +#define NBD_CMD_FLAG_NO_HOLE (1 << 1) /* don't punch hole on zero run */ /* Supported request types */ enum { @@ -101,6 +103,7 @@ enum { NBD_CMD_DISC = 2, NBD_CMD_FLUSH = 3, NBD_CMD_TRIM = 4, + NBD_CMD_WRITE_ZEROES = 5, NBD_CMD_CLOSE = 7, }; diff --git a/nbd/server.c b/nbd/server.c index 2a6eaf2..09af915 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -625,7 +625,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) int rc; const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA | - NBD_FLAG_SEND_CLOSE); + NBD_FLAG_SEND_CLOSE | + NBD_FLAG_SEND_WRITE_ZEROES); bool oldStyle; size_t len; @@ -1088,7 +1089,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque goto out; } - if (request->flags & ~NBD_CMD_FLAG_FUA) { + if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) { LOG("unsupported flags (got 0x%x)", request->flags); return -EINVAL; } @@ -1102,7 +1103,13 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque TRACE("Decoding type"); command = request->type; - if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) { + if (request->flags & NBD_CMD_FLAG_NO_HOLE && + !(command == NBD_CMD_WRITE || command == NBD_CMD_WRITE_ZEROES)) { + LOG("NO_HOLE flag valid only with write operation"); + return -EINVAL; + } + if (command == NBD_CMD_READ || command == NBD_CMD_WRITE || + command == NBD_CMD_WRITE_ZEROES) { if (request->len > NBD_MAX_BUFFER_SIZE) { LOG("len (%" PRIu32" ) is larger than max len (%u)", request->len, NBD_MAX_BUFFER_SIZE); @@ -1143,6 +1150,7 @@ static void nbd_trip(void *opaque) struct nbd_reply reply; ssize_t ret; uint32_t command; + int flags; TRACE("Reading request."); if (client->closing) { @@ -1221,6 +1229,9 @@ static void nbd_trip(void *opaque) TRACE("Writing to device"); + /* FIXME: if the client passes NBD_CMD_FLAG_NO_HOLE, can we + * make that override a server that is set to look for + * holes? */ ret = blk_write(exp->blk, (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE, req->data, request.len / BDRV_SECTOR_SIZE); @@ -1243,6 +1254,52 @@ static void nbd_trip(void *opaque) goto out; } break; + case NBD_CMD_WRITE_ZEROES: + TRACE("Request type is WRITE_ZEROES"); + + if (exp->nbdflags & NBD_FLAG_READ_ONLY) { + TRACE("Server is read-only, return error"); + reply.error = EROFS; + goto error_reply; + } + + TRACE("Writing to device"); + + flags = 0; + if (request.flags & NBD_CMD_FLAG_FUA) { + flags |= BDRV_REQ_FUA; + } + if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) { + /* FIXME: should this depend on whether the server is set to + look for holes? */ + flags |= BDRV_REQ_MAY_UNMAP; + } + ret = blk_write_zeroes(exp->blk, + ((request.from + exp->dev_offset) / + BDRV_SECTOR_SIZE), + request.len / BDRV_SECTOR_SIZE, + flags); + if (ret < 0) { + LOG("writing to file failed"); + reply.error = -ret; + goto error_reply; + } + + /* FIXME: do we need FUA flush here, if we also passed it to + * blk_write_zeroes? */ + if (request.flags & NBD_CMD_FLAG_FUA) { + ret = blk_co_flush(exp->blk); + if (ret < 0) { + LOG("flush failed"); + reply.error = -ret; + goto error_reply; + } + } + + if (nbd_co_send_reply(req, &reply, 0) < 0) { + goto out; + } + break; case NBD_CMD_DISC: TRACE("Request type is DISCONNECT"); goto out; -- 2.5.5