On Fri, Apr 08, 2016 at 04:05:57PM -0600, Eric Blake wrote: > RFC because there is still discussion on the NBD list about > adding an NBD_OPT_ to let the client suggest server defaults > related to scanning for zeroes during NBD_CMD_WRITE, which may > tweak this patch. > > Upstream NBD protocol recently added the ability to efficiently > write zeroes without having to send the zeroes over the wire, > along with a flag to control whether the client wants a hole. > > Signed-off-by: Eric Blake <ebl...@redhat.com> > --- > include/block/nbd.h | 5 ++++- > nbd/server.c | 63 > ++++++++++++++++++++++++++++++++++++++++++++++++++--- > 2 files changed, 64 insertions(+), 4 deletions(-) > > diff --git a/include/block/nbd.h b/include/block/nbd.h > index 4c57754..a1d955c 100644 > --- a/include/block/nbd.h > +++ b/include/block/nbd.h > @@ -70,6 +70,7 @@ typedef struct nbd_reply nbd_reply; > #define NBD_FLAG_SEND_FUA (1 << 3) /* Send FUA (Force Unit > Access) */ > #define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm - > rotational media */ > #define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */ > +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */ > #define NBD_FLAG_SEND_CLOSE (1 << 8) /* Send CLOSE */ > > /* New-style handshake (global) flags, sent from server to client, and > @@ -92,7 +93,8 @@ typedef struct nbd_reply nbd_reply; > #define NBD_REP_ERR_UNKNOWN ((UINT32_C(1) << 31) | 6) /* Export unknown > */ > > /* Request flags, sent from client to server during transmission phase */ > -#define NBD_CMD_FLAG_FUA (1 << 0) > +#define NBD_CMD_FLAG_FUA (1 << 0) /* 'force unit access' during write > */ > +#define NBD_CMD_FLAG_NO_HOLE (1 << 1) /* don't punch hole on zero run */ > > /* Supported request types */ > enum { > @@ -101,6 +103,7 @@ enum { > NBD_CMD_DISC = 2, > NBD_CMD_FLUSH = 3, > NBD_CMD_TRIM = 4, > + NBD_CMD_WRITE_ZEROES = 5, > NBD_CMD_CLOSE = 7, > }; > > diff --git a/nbd/server.c b/nbd/server.c > index 2a6eaf2..09af915 100644 > --- a/nbd/server.c > +++ b/nbd/server.c > @@ -625,7 +625,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData > *data) > int rc; > const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | > NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA | > - NBD_FLAG_SEND_CLOSE); > + NBD_FLAG_SEND_CLOSE | > + NBD_FLAG_SEND_WRITE_ZEROES); > bool oldStyle; > size_t len; > > @@ -1088,7 +1089,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, > struct nbd_request *reque > goto out; > } > > - if (request->flags & ~NBD_CMD_FLAG_FUA) { > + if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) { > LOG("unsupported flags (got 0x%x)", request->flags); > return -EINVAL; > } > @@ -1102,7 +1103,13 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, > struct nbd_request *reque > TRACE("Decoding type"); > > command = request->type; > - if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) { > + if (request->flags & NBD_CMD_FLAG_NO_HOLE && > + !(command == NBD_CMD_WRITE || command == NBD_CMD_WRITE_ZEROES)) { > + LOG("NO_HOLE flag valid only with write operation"); > + return -EINVAL; > + } > + if (command == NBD_CMD_READ || command == NBD_CMD_WRITE || > + command == NBD_CMD_WRITE_ZEROES) { > if (request->len > NBD_MAX_BUFFER_SIZE) { > LOG("len (%" PRIu32" ) is larger than max len (%u)", > request->len, NBD_MAX_BUFFER_SIZE); > @@ -1143,6 +1150,7 @@ static void nbd_trip(void *opaque) > struct nbd_reply reply; > ssize_t ret; > uint32_t command; > + int flags; > > TRACE("Reading request."); > if (client->closing) { > @@ -1221,6 +1229,9 @@ static void nbd_trip(void *opaque) > > TRACE("Writing to device"); > > + /* FIXME: if the client passes NBD_CMD_FLAG_NO_HOLE, can we > + * make that override a server that is set to look for > + * holes? */ > ret = blk_write(exp->blk, > (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE, > req->data, request.len / BDRV_SECTOR_SIZE); > @@ -1243,6 +1254,52 @@ static void nbd_trip(void *opaque) > goto out; > } > break; > + case NBD_CMD_WRITE_ZEROES: > + TRACE("Request type is WRITE_ZEROES"); > + > + if (exp->nbdflags & NBD_FLAG_READ_ONLY) { > + TRACE("Server is read-only, return error"); > + reply.error = EROFS; > + goto error_reply; > + } > + > + TRACE("Writing to device"); > + > + flags = 0; > + if (request.flags & NBD_CMD_FLAG_FUA) { > + flags |= BDRV_REQ_FUA; > + } > + if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) { > + /* FIXME: should this depend on whether the server is set to > + look for holes? */ > + flags |= BDRV_REQ_MAY_UNMAP; > + } > + ret = blk_write_zeroes(exp->blk, > + ((request.from + exp->dev_offset) / > + BDRV_SECTOR_SIZE), > + request.len / BDRV_SECTOR_SIZE, > + flags);
blk_write_zeroes() ignores flags right now. Something like this is required for this code to work: diff --git a/block/block-backend.c b/block/block-backend.c index f0470bc..3ae4157 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -827,7 +827,7 @@ int blk_write_zeroes(BlockBackend *blk, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { return blk_rw(blk, sector_num, NULL, nb_sectors, blk_write_entry, - BDRV_REQ_ZERO_WRITE); + BDRV_REQ_ZERO_WRITE | flags); } static void error_callback_bh(void *opaque) > + if (ret < 0) { > + LOG("writing to file failed"); > + reply.error = -ret; > + goto error_reply; > + } > + > + /* FIXME: do we need FUA flush here, if we also passed it to > + * blk_write_zeroes? */ > + if (request.flags & NBD_CMD_FLAG_FUA) { > + ret = blk_co_flush(exp->blk); > + if (ret < 0) { > + LOG("flush failed"); > + reply.error = -ret; > + goto error_reply; > + } > + } > + > + if (nbd_co_send_reply(req, &reply, 0) < 0) { > + goto out; > + } > + break; > case NBD_CMD_DISC: > TRACE("Request type is DISCONNECT"); > goto out; > -- > 2.5.5 > >