Unfortunately Linux kernel could send non-aligned requests to qemu-nbd if the caller is using O_DIRECT and does not align in-memory data to page. Thus qemu-nbd will call block layer with non-aligned requests.
qcow2_co_write_zeroes forcibly asks the caller to supply block-aligned data. In the other case it rejects with ENOTSUP which is properly handled on the upper level. The problem is that this grows the image. This could be optimized a bit: - particular request could be split to block aligned part and head/tail, which could be handled separately - writes could be omitted when we do know that the image already contains zeroes at the offsets being written Signed-off-by: Denis V. Lunev <d...@openvz.org> CC: Kevin Wolf <kw...@redhat.com> CC: Max Reitz <mre...@redhat.com> --- block/qcow2.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 5 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 470734b..9bdaa15 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2411,21 +2411,69 @@ finish: return ret; } +static int write_zeroes_chunk(BlockDriverState *bs, int64_t sector_num, int nr) +{ + int ret, count; + BlockDriverState *file; + uint8_t *buf; + struct iovec iov; + QEMUIOVector local_qiov; + + ret = bdrv_get_block_status_above(bs, NULL, sector_num, nr, &count, &file); + if (ret > 0 && (ret & BDRV_BLOCK_ZERO) && count == nr) { + /* Nothing to do. The area is zeroed already. + Worth to check to avoid image expansion for non-aligned reqs. */ + return 0; + } + + buf = qemu_blockalign0(bs, nr << BDRV_SECTOR_BITS); + iov = (struct iovec) { + .iov_base = buf, + .iov_len = nr << BDRV_SECTOR_BITS, + }; + qemu_iovec_init_external(&local_qiov, &iov, 1); + + return qcow2_co_writev(bs, sector_num, nr, &local_qiov); +} + static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { int ret; BDRVQcow2State *s = bs->opaque; - /* Emulate misaligned zero writes */ - if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) { - return -ENOTSUP; + int nr = sector_num % s->cluster_sectors; + if (nr != 0) { + nr = MIN(s->cluster_sectors - nr, nb_sectors); + + ret = write_zeroes_chunk(bs, sector_num, nr); + if (ret < 0) { + return ret; + } + + sector_num += nr; + nb_sectors -= nr; + if (nb_sectors == 0) { + return 0; + } + } + + nr = nb_sectors % s->cluster_sectors; + if (nr != 0) { + ret = write_zeroes_chunk(bs, sector_num + nb_sectors - nr, nr); + if (ret < 0) { + return ret; + } + + nb_sectors -= nr; + if (nb_sectors == 0) { + return 0; + } } /* Whatever is left can use real zero clusters */ qemu_co_mutex_lock(&s->lock); - ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS, - nb_sectors); + ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS, nb_sectors); qemu_co_mutex_unlock(&s->lock); return ret; -- 2.5.0