Plain image expansion spends a lot of time to update image file size. This seriously affects the performance. The following simple test qemu_img create -f parallels -o cluster_size=64k ./1.hds 64G qemu_io -n -c "write -P 0x11 0 1024M" ./1.hds could be improved if the format driver will pre-allocate some space in the image file with a reasonable chunk.
This patch preallocates 128 Mb using bdrv_write_zeroes, which should normally use fallocate() call inside. Fallback to older truncate() could be used as a fallback using image open options thanks to the previous patch. The benefit is around 15%. This patch is final in this series. Block driver has near native performance now. Signed-off-by: Denis V. Lunev <d...@openvz.org> CC: Kevin Wolf <kw...@redhat.com> CC: Stefan Hajnoczi <stefa...@redhat.com> --- block/parallels.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 12a9cea..5ec4a0d 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -82,6 +82,7 @@ typedef struct BDRVParallelsState { int bat_cache_off; int data_off; + int64_t prealloc_off; uint64_t prealloc_size; ParallelsPreallocMode prealloc_mode; @@ -216,9 +217,19 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, goto fail_options; } - for (i = 0; i < s->bat_size; i++) + for (i = 0; i < s->bat_size; i++) { + int64_t off; le32_to_cpus(&s->bat[i]); + if (s->bat[i] == 0) { + continue; + } + off = s->bat[i] * s->off_multiplier; + if (off >= s->prealloc_off) { + s->prealloc_off = off + s->tracks; + } + } + qemu_co_mutex_init(&s->lock); s->bat_cache_off = -1; @@ -230,6 +241,9 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, if (s->data_off == 0) { s->data_off = ROUND_UP(bat_offset(s->bat_size), BDRV_SECTOR_SIZE); } + if (s->prealloc_off == 0) { + s->prealloc_off = s->data_off >> BDRV_SECTOR_BITS; + } return 0; @@ -338,7 +352,19 @@ static int64_t allocate_sector(BlockDriverState *bs, int64_t sector_num) } pos = bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS; - bdrv_truncate(bs->file, (pos + s->tracks) << BDRV_SECTOR_BITS); + if (s->prealloc_off + s->tracks > pos) { + if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) + ret = bdrv_write_zeroes(bs->file, s->prealloc_off, + s->prealloc_size, 0); + else + ret = bdrv_truncate(bs->file, + (s->prealloc_off + s->prealloc_size) << BDRV_SECTOR_BITS); + if (ret < 0) { + return ret; + } + } + pos = s->prealloc_off; + s->prealloc_off += s->tracks; ret = cache_bat(bs, idx, pos / s->off_multiplier); if (ret < 0) { @@ -546,6 +572,11 @@ exit: static void parallels_close(BlockDriverState *bs) { BDRVParallelsState *s = bs->opaque; + + if (bs->open_flags & BDRV_O_RDWR) { + bdrv_truncate(bs->file, s->prealloc_off << BDRV_SECTOR_BITS); + } + qemu_vfree(s->bat_cache); g_free(s->bat); } -- 1.9.1