On Sat, Jun 20, 2020 at 05:36:47PM +0300, Vladimir Sementsov-Ogievskiy wrote: > It may be used for file-systems with slow allocation. > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com> > --- > qapi/block-core.json | 3 +- > block/preallocate.c | 255 +++++++++++++++++++++++++++++++++++++++++++ > block/Makefile.objs | 1 + > 3 files changed, 258 insertions(+), 1 deletion(-) > create mode 100644 block/preallocate.c
Please add documentation to docs/system/qemu-block-drivers.rst.inc describing the purpose of this block driver and how to use it. Since this filter grows the file I guess it's intended to be below an image format? > diff --git a/qapi/block-core.json b/qapi/block-core.json > index 0e1c6a59f2..a0bda399d6 100644 > --- a/qapi/block-core.json > +++ b/qapi/block-core.json > @@ -2805,7 +2805,7 @@ > 'cloop', 'compress', 'copy-on-read', 'dmg', 'file', 'ftp', > 'ftps', > 'gluster', 'host_cdrom', 'host_device', 'http', 'https', 'iscsi', > 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels', > - 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd', > + 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd', > { 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' }, > 'sheepdog', > 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' > ] } > @@ -3995,6 +3995,7 @@ > 'null-co': 'BlockdevOptionsNull', > 'nvme': 'BlockdevOptionsNVMe', > 'parallels': 'BlockdevOptionsGenericFormat', > + 'preallocate':'BlockdevOptionsGenericFormat', > 'qcow2': 'BlockdevOptionsQcow2', > 'qcow': 'BlockdevOptionsQcow', > 'qed': 'BlockdevOptionsGenericCOWFormat', > diff --git a/block/preallocate.c b/block/preallocate.c > new file mode 100644 > index 0000000000..c272a6e41d > --- /dev/null > +++ b/block/preallocate.c > @@ -0,0 +1,255 @@ > +/* > + * preallocate filter driver > + * > + * The driver performs preallocate operation: it is injected above > + * some node, and before each write over EOF it does additional preallocating > + * write-zeroes request. > + * > + * Copyright (c) 2020 Virtuozzo International GmbH. > + * > + * Author: > + * Sementsov-Ogievskiy Vladimir <vsement...@virtuozzo.com> > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program. If not, see <http://www.gnu.org/licenses/>. > + */ > + > +#include "qemu/osdep.h" > + > +#include "qemu/module.h" > +#include "qemu/units.h" > +#include "block/block_int.h" > + > + > +typedef struct BDRVPreallocateState { > + int64_t prealloc_size; > + int64_t prealloc_align; > + > + /* > + * Track real data end, to crop preallocation on close data_end may be > + * negative, which means that actual status is unknown (nothing cropped > in > + * this case) > + */ > + int64_t data_end; > +} BDRVPreallocateState; > + > + > +static int preallocate_open(BlockDriverState *bs, QDict *options, int flags, > + Error **errp) > +{ > + BDRVPreallocateState *s = bs->opaque; > + > + /* > + * Parameters are hardcoded now. May need to add corresponding options in > + * future. > + */ The code for .bdrv_open() options is quick to write. If you add the options right away then it will be much easier for users who need to tweak them in the future. > + s->prealloc_align = 1 * MiB; > + s->prealloc_size = 128 * MiB; > + > + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, > + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, > + false, errp); > + if (!bs->file) { > + return -EINVAL; > + } > + > + s->data_end = bdrv_getlength(bs->file->bs); > + if (s->data_end < 0) { > + return s->data_end; > + } > + > + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | > + (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); > + > + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | > + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & > + bs->file->bs->supported_zero_flags); > + > + return 0; > +} > + > +static void preallocate_close(BlockDriverState *bs) > +{ > + BDRVPreallocateState *s = bs->opaque; > + > + if (s->data_end >= 0 && bdrv_getlength(bs->file->bs) > s->data_end) { > + bdrv_truncate(bs->file, s->data_end, true, PREALLOC_MODE_OFF, 0, > NULL); > + } > +} > + > +static void preallocate_child_perm(BlockDriverState *bs, BdrvChild *c, > + BdrvChildRole role, > + BlockReopenQueue *reopen_queue, > + uint64_t perm, uint64_t shared, > + uint64_t *nperm, uint64_t *nshared) > +{ > + bdrv_default_perms(bs, c, role, reopen_queue, perm, shared, nperm, > nshared); > + > + /* Force RESIZE permission, to be able to crop file on close() */ > + *nperm |= BLK_PERM_RESIZE; > +} > + > +static coroutine_fn int preallocate_co_preadv_part( > + BlockDriverState *bs, uint64_t offset, uint64_t bytes, > + QEMUIOVector *qiov, size_t qiov_offset, int flags) > +{ > + return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset, > + flags); > +} > + > +static int coroutine_fn preallocate_co_pdiscard(BlockDriverState *bs, > + int64_t offset, int bytes) > +{ > + return bdrv_co_pdiscard(bs->file, offset, bytes); > +} > + > +static bool coroutine_fn do_preallocate(BlockDriverState *bs, int64_t offset, > + int64_t bytes, bool write_zero) > +{ > + BDRVPreallocateState *s = bs->opaque; > + int64_t len, start, end; > + BdrvTrackedRequest *lock; > + int ret; > + > + if (s->data_end >= 0) { > + s->data_end = MAX(s->data_end, > + QEMU_ALIGN_UP(offset + bytes, BDRV_SECTOR_SIZE)); > + } > + > + len = bdrv_getlength(bs->file->bs); > + if (len < 0) { > + return false; > + } > + > + if (s->data_end < 0) { > + s->data_end = MAX(len, > + QEMU_ALIGN_UP(offset + bytes, BDRV_SECTOR_SIZE)); > + } > + > + if (offset + bytes <= len) { > + return false; > + } > + > + lock = bdrv_co_range_try_lock(bs->file->bs, len, INT64_MAX - len); > + if (!lock) { > + /* There are already preallocating requests in-fligth */ s/fligth/flight/ > + return false; > + } > + > + /* Length should not have changed */ > + assert(len == bdrv_getlength(bs->file->bs)); > + > + start = write_zero ? MIN(offset, len) : len; > + end = QEMU_ALIGN_UP(offset + bytes + s->prealloc_size, > s->prealloc_align); > + > + ret = bdrv_co_pwrite_zeroes_locked(bs->file, start, end - start, > + BDRV_REQ_NO_FALLBACK, lock); > + > + bdrv_co_range_unlock(lock); Hmm...if this piece of code is the only user of bdrv_co_range_try_lock() then a BDRV_REQ_NO_WAIT flag might be a simpler API. I thought the lock request would be used to perform multiple operations, but if it's just for a single operation then I think it's less code and easier to understand without the lock request. > + > + return !ret; > +} > + > +static int coroutine_fn preallocate_co_pwrite_zeroes(BlockDriverState *bs, > + int64_t offset, int bytes, BdrvRequestFlags flags) > +{ > + if (do_preallocate(bs, offset, bytes, true)) { > + return 0; > + } > + > + return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); > +} > + > +static coroutine_fn int preallocate_co_pwritev_part(BlockDriverState *bs, > + uint64_t offset, > + uint64_t bytes, > + QEMUIOVector *qiov, > + size_t qiov_offset, > + int flags) > +{ > + do_preallocate(bs, offset, bytes, false); > + > + return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset, > + flags); > +} > + > +static int coroutine_fn > +preallocate_co_truncate(BlockDriverState *bs, int64_t offset, > + bool exact, PreallocMode prealloc, > + BdrvRequestFlags flags, Error **errp) > +{ > + BDRVPreallocateState *s = bs->opaque; > + int ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, > errp); > + > + /* s->data_end may become negative here, which means unknown data end */ > + s->data_end = bdrv_getlength(bs->file->bs); > + > + return ret; > +} > + > +static int coroutine_fn preallocate_co_flush(BlockDriverState *bs) > +{ > + if (!bs->file) { > + return 0; > + } When does this happen? It's surprising to see the !bs->file check here but not in other functions. > + > + return bdrv_co_flush(bs->file->bs); > +} > + > +static int64_t preallocate_getlength(BlockDriverState *bs) > +{ > + /* > + * We probably can return s->data_end here, but seems safer to return > real > + * file length, not trying to hide the preallocation. > + * > + * Still, don't miss the chance to restore s->data_end if it is broken. > + */ > + BDRVPreallocateState *s = bs->opaque; > + int64_t ret = bdrv_getlength(bs->file->bs); > + > + if (s->data_end < 0) { > + s->data_end = ret; > + } > + > + return ret; > +} > + > +BlockDriver bdrv_preallocate_filter = { > + .format_name = "preallocate", > + .instance_size = sizeof(BDRVPreallocateState), > + > + .bdrv_getlength = preallocate_getlength, > + .bdrv_open = preallocate_open, > + .bdrv_close = preallocate_close, > + > + .bdrv_co_preadv_part = preallocate_co_preadv_part, > + .bdrv_co_pwritev_part = preallocate_co_pwritev_part, > + .bdrv_co_pwrite_zeroes = preallocate_co_pwrite_zeroes, > + .bdrv_co_pdiscard = preallocate_co_pdiscard, > + .bdrv_co_flush = preallocate_co_flush, > + .bdrv_co_truncate = preallocate_co_truncate, > + > + .bdrv_co_block_status = bdrv_co_block_status_from_file, > + > + .bdrv_child_perm = preallocate_child_perm, > + > + .has_variable_length = true, > + .is_filter = true, > +}; > + > +static void bdrv_preallocate_init(void) > +{ > + bdrv_register(&bdrv_preallocate_filter); > +} > + > +block_init(bdrv_preallocate_init); > diff --git a/block/Makefile.objs b/block/Makefile.objs > index 3635b6b4c1..f46a353a35 100644 > --- a/block/Makefile.objs > +++ b/block/Makefile.objs > @@ -45,6 +45,7 @@ block-obj-y += crypto.o > block-obj-y += aio_task.o > block-obj-y += backup-top.o > block-obj-y += filter-compress.o > +block-obj-y += preallocate.o > common-obj-y += monitor/ > > block-obj-y += stream.o > -- > 2.18.0 >
signature.asc
Description: PGP signature