On Mon, 07/18 22:08, Eric Blake wrote: > Upstream NBD protocol recently added the ability to efficiently > write zeroes without having to send the zeroes over the wire, > along with a flag to control whether the client wants a hole. > > Signed-off-by: Eric Blake <ebl...@redhat.com> > > --- > v4: rebase, fix value for constant > v3: abandon NBD_CMD_CLOSE extension, rebase to use blk_pwrite_zeroes > --- > include/block/nbd.h | 8 ++++++-- > nbd/server.c | 42 ++++++++++++++++++++++++++++++++++++++++-- > 2 files changed, 46 insertions(+), 4 deletions(-) > > diff --git a/include/block/nbd.h b/include/block/nbd.h > index fc4426c..e23ef73 100644 > --- a/include/block/nbd.h > +++ b/include/block/nbd.h > @@ -69,6 +69,7 @@ struct nbd_reply { > #define NBD_FLAG_SEND_FUA (1 << 3) /* Send FUA (Force Unit > Access) */ > #define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm - > rotational media */ > #define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */ > +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */ > > /* New-style handshake (global) flags, sent from server to client, and > control what will happen during handshake phase. */ > @@ -94,7 +95,8 @@ struct nbd_reply { > #define NBD_REP_ERR_SHUTDOWN NBD_REP_ERR(7) /* Server shutting down */ > > /* Request flags, sent from client to server during transmission phase */ > -#define NBD_CMD_FLAG_FUA (1 << 0) > +#define NBD_CMD_FLAG_FUA (1 << 0) /* 'force unit access' during write > */ > +#define NBD_CMD_FLAG_NO_HOLE (1 << 1) /* don't punch hole on zero run */ > > /* Supported request types */ > enum { > @@ -102,7 +104,9 @@ enum { > NBD_CMD_WRITE = 1, > NBD_CMD_DISC = 2, > NBD_CMD_FLUSH = 3, > - NBD_CMD_TRIM = 4 > + NBD_CMD_TRIM = 4, > + /* 5 reserved for failed experiment NBD_CMD_CACHE */ > + NBD_CMD_WRITE_ZEROES = 6, > }; > > #define NBD_DEFAULT_PORT 10809 > diff --git a/nbd/server.c b/nbd/server.c > index 689636c..3a2fecb 100644 > --- a/nbd/server.c > +++ b/nbd/server.c > @@ -610,7 +610,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData > *data) > char buf[8 + 8 + 8 + 128]; > int rc; > const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | > - NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA); > + NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA | > + NBD_FLAG_SEND_WRITE_ZEROES); > bool oldStyle; > size_t len; > > @@ -1126,11 +1127,17 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, > rc = request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL; > goto out; > } > - if (request->flags & ~NBD_CMD_FLAG_FUA) { > + if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) { > LOG("unsupported flags (got 0x%x)", request->flags); > rc = -EINVAL; > goto out; > } > + if (request->type != NBD_CMD_WRITE_ZEROES && > + (request->flags & NBD_CMD_FLAG_NO_HOLE)) { > + LOG("unexpected flags (got 0x%x)", request->flags); > + rc = -EINVAL; > + goto out; > + } > > rc = 0; > > @@ -1235,6 +1242,37 @@ static void nbd_trip(void *opaque) > } > break; > > + case NBD_CMD_WRITE_ZEROES: > + TRACE("Request type is WRITE_ZEROES"); > + > + if (exp->nbdflags & NBD_FLAG_READ_ONLY) { > + TRACE("Server is read-only, return error"); > + reply.error = EROFS; > + goto error_reply; > + } > + > + TRACE("Writing to device"); > + > + flags = 0; > + if (request.flags & NBD_CMD_FLAG_FUA) { > + flags |= BDRV_REQ_FUA; > + } > + if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) { > + flags |= BDRV_REQ_MAY_UNMAP;
If I'm reading the NBD proto.md correctly, this is not enough if NBD_CMD_FLAG_NO_HOLE is specified. We probably need to use a zeroed buffer with blk_pwrite, or pass a new flag (BDRV_RED_NO_HOLE) to blk_pwrite_zeroes to enforce the bdrv_driver_pwritev() branch in bdrv_co_do_pwrite_zeroes(). > + } > + ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset, > + request.len, flags); > + if (ret < 0) { > + LOG("writing to file failed"); > + reply.error = -ret; > + goto error_reply; > + } > + > + if (nbd_co_send_reply(req, &reply, 0) < 0) { > + goto out; > + } > + break; > + > case NBD_CMD_DISC: > /* unreachable, thanks to special case in nbd_co_receive_request() */ > abort(); > -- > 2.5.5 > >