On Tue, Dec 30, 2014 at 01:07:10PM +0300, Denis V. Lunev wrote: > The idea is that we do not need to immediately sync BAT to the image as > from the guest point of view there is a possibility that IO is lost > even in the physical controller until flush command was finished. > bdrv_co_flush_to_os is exactly the right place for this purpose. > > Technically the patch aligns writes into BAT to MAX(bdrv_align, 4096), > which elliminates read-modify-write transactions on BAT update and > cache ready-to-write content in a special buffer in BDRVParallelsState. > > This buffer possibly contains ParallelsHeader if the first page of the > image should be modified. The header occupies first 64 bytes of the image > and the BAT starts immediately after it. > > It is also possible that BAT end is not aligned to the cluster size. > ParallelsHeader->data_off is not specified for this case. We should write > only part of the cache in that case. > > This patch speed ups > qemu-img create -f parallels -o cluster_size=64k ./1.hds 64G > qemu-io -f parallels -c "write -P 0x11 0 1024k" 1.hds > writing from 50-60 Mb/sec to 80-90 Mb/sec on rotational media and > from 160 Mb/sec to 190 Mb/sec on SSD disk. > > Signed-off-by: Denis V. Lunev <d...@openvz.org> > CC: Kevin Wolf <kw...@redhat.com> > CC: Stefan Hajnoczi <stefa...@redhat.com> > --- > block/parallels.c | 99 > ++++++++++++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 94 insertions(+), 5 deletions(-) > > diff --git a/block/parallels.c b/block/parallels.c > index 46cf031..18b9267 100644 > --- a/block/parallels.c > +++ b/block/parallels.c > @@ -62,6 +62,11 @@ typedef struct BDRVParallelsState { > uint32_t *bat; > unsigned int bat_size; > > + uint32_t *bat_cache; > + unsigned bat_cache_size; > + int bat_cache_off; > + int data_off; > + > unsigned int tracks; > > unsigned int off_multiplier; > @@ -148,6 +153,17 @@ static int parallels_open(BlockDriverState *bs, QDict > *options, int flags, > le32_to_cpus(&s->bat[i]); > > qemu_co_mutex_init(&s->lock); > + > + s->bat_cache_off = -1; > + if (bs->open_flags & BDRV_O_RDWR) { > + s->bat_cache_size = MAX(bdrv_opt_mem_align(bs->file), 4096); > + s->bat_cache = qemu_blockalign(bs->file, s->bat_cache_size); > + } > + s->data_off = le32_to_cpu(s->ph.data_off) * BDRV_SECTOR_SIZE; > + if (s->data_off == 0) { > + s->data_off = ROUND_UP(bat_offset(s->bat_size), BDRV_SECTOR_SIZE); > + } > + > return 0; > > fail_format: > @@ -171,10 +187,71 @@ static int64_t seek_to_sector(BDRVParallelsState *s, > int64_t sector_num) > return (uint64_t)s->bat[index] * s->off_multiplier + offset; > } > > +static int write_bat_cache(BlockDriverState *bs) > +{ > + BDRVParallelsState *s = bs->opaque; > + int size, off; > + > + if (s->bat_cache_off == -1) { > + /* no cached data */ > + return 0; > + } > + > + size = s->bat_cache_size; > + if (size + s->bat_cache_off > s->data_off) { > + /* avoid writing to the first data block */ > + size = s->data_off - s->bat_cache_off; > + } > + > + off = s->bat_cache_off; > + s->bat_cache_off = -1; > + return bdrv_pwrite(bs->file, off, s->bat_cache, size); > +} > + > +static int cache_bat(BlockDriverState *bs, uint32_t idx, uint32_t > new_data_off) > +{ > + int ret, i, off, cache_off; > + int64_t first_idx, last_idx; > + BDRVParallelsState *s = bs->opaque; > + uint32_t *cache = s->bat_cache; > + > + off = bat_offset(idx); > + cache_off = (off / s->bat_cache_size) * s->bat_cache_size; > + > + if (s->bat_cache_off != -1 && s->bat_cache_off != cache_off) { > + ret = write_bat_cache(bs); > + if (ret < 0) { > + return ret; > + } > + } > + > + first_idx = idx - (off - cache_off) / sizeof(uint32_t); > + last_idx = first_idx + s->bat_cache_size / sizeof(uint32_t); > + if (first_idx < 0) { > + memcpy(s->bat_cache, &s->ph, sizeof(s->ph)); > + first_idx = 0; > + cache = s->bat_cache + sizeof(s->ph) / sizeof(uint32_t); > + } > + > + if (last_idx > s->bat_size) { > + memset(cache + s->bat_size - first_idx, 0, > + sizeof(uint32_t) * (last_idx - s->bat_size)); > + } > + > + for (i = 0; i < last_idx - first_idx; i++) { > + cache[i] = cpu_to_le32(s->bat[first_idx + i]); > + }
You're re-populating the bat_cache on every bat update. Why? Shouldn't this be done only with empty cache, i.e. under if (s->bat_cache_off == -1)? > + cache[idx - first_idx] = cpu_to_le32(new_data_off); > + s->bat[idx] = new_data_off; > + > + s->bat_cache_off = cache_off; > + return 0; > +} > + > static int64_t allocate_sector(BlockDriverState *bs, int64_t sector_num) > { > BDRVParallelsState *s = bs->opaque; > - uint32_t idx, offset, tmp; > + uint32_t idx, offset; > int64_t pos; > int ret; > > @@ -190,17 +267,27 @@ static int64_t allocate_sector(BlockDriverState *bs, > int64_t sector_num) > > pos = bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS; > bdrv_truncate(bs->file, (pos + s->tracks) << BDRV_SECTOR_BITS); > - s->bat[idx] = pos / s->off_multiplier; > - > - tmp = cpu_to_le32(s->bat[idx]); > > - ret = bdrv_pwrite(bs->file, bat_offset(idx), &tmp, sizeof(tmp)); > + ret = cache_bat(bs, idx, pos / s->off_multiplier); > if (ret < 0) { > return ret; > } > return (uint64_t)s->bat[idx] * s->off_multiplier + offset; > } > > +static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs) > +{ > + BDRVParallelsState *s = bs->opaque; > + int ret; > + > + qemu_co_mutex_lock(&s->lock); > + ret = write_bat_cache(bs); > + qemu_co_mutex_unlock(&s->lock); > + > + return ret; > +} > + > + > static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num, > int nb_sectors) > { > @@ -387,6 +474,7 @@ exit: > static void parallels_close(BlockDriverState *bs) > { > BDRVParallelsState *s = bs->opaque; > + qemu_vfree(s->bat_cache); Don't you need to flush the bat cache here first? > g_free(s->bat); > } > > @@ -416,6 +504,7 @@ static BlockDriver bdrv_parallels = { > .bdrv_open = parallels_open, > .bdrv_close = parallels_close, > .bdrv_co_get_block_status = parallels_co_get_block_status, > + .bdrv_co_flush_to_os = parallels_co_flush_to_os, > .bdrv_co_readv = parallels_co_readv, > .bdrv_co_writev = parallels_co_writev, Roman.