Am 02.10.2016 um 21:13 hat Tomáš Golembiovský geschrieben: > Added two new options 'offset' and 'size'. This makes it possible to use > only part of the file as a device. This can be used e.g. to limit the > access only to single partition in a disk image or use a disk inside a > tar archive (like OVA). > > For now this is only possible for files in read-only mode. It should be > possible to extend it later to allow read-write mode, but would probably > require that the size of the device is kept constant (i.e. no resizing). > > Signed-off-by: Tomáš Golembiovský <tgole...@redhat.com> > --- > block/raw-posix.c | 97 > +++++++++++++++++++++++++++++++++++++++++++++++++++---- > 1 file changed, 91 insertions(+), 6 deletions(-)
Sounds like a very useful feature. I agree with comments made by others that we should include write support from the start, and that the code should be moved to raw_bsd.c. You also need to update the options for the blockdev-add QMP command in qapi/block-core.json. The relevant type is BlockdevOptionsFile. > diff --git a/block/raw-posix.c b/block/raw-posix.c > index 6ed7547..36f2712 100644 > --- a/block/raw-posix.c > +++ b/block/raw-posix.c > @@ -136,6 +136,8 @@ typedef struct BDRVRawState { > int type; > int open_flags; > size_t buf_align; > + uint64_t offset; > + uint64_t assumed_size; > > #ifdef CONFIG_XFS > bool is_xfs:1; > @@ -154,6 +156,7 @@ typedef struct BDRVRawReopenState { > > static int fd_open(BlockDriverState *bs); > static int64_t raw_getlength(BlockDriverState *bs); > +static int64_t raw_getlength_real(BlockDriverState *bs); > > typedef struct RawPosixAIOData { > BlockDriverState *bs; > @@ -399,6 +402,16 @@ static QemuOptsList raw_runtime_opts = { > .type = QEMU_OPT_STRING, > .help = "File name of the image", > }, > + { > + .name = "offset", > + .type = QEMU_OPT_SIZE, > + .help = "Offset into the file" > + }, > + { > + .name = BLOCK_OPT_SIZE, > + .type = QEMU_OPT_SIZE, > + .help = "Virtual disk size" > + }, BLOCK_OPT_SIZE is an option for bdrv_create(). I'd either create a BDRV_OPT_SIZE to be consistent with other bdrv_open() option name macros, or just use a string literal here. > { /* end of list */ } > }, > }; > @@ -412,6 +425,7 @@ static int raw_open_common(BlockDriverState *bs, QDict > *options, > const char *filename = NULL; > int fd, ret; > struct stat st; > + int64_t real_size; > > opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort); > qemu_opts_absorb_qdict(opts, options, &local_err); > @@ -421,6 +435,17 @@ static int raw_open_common(BlockDriverState *bs, QDict > *options, > goto fail; > } > > + s->offset = qemu_opt_get_size(opts, "offset", 0); > + s->assumed_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); > + > + if (((bs->drv != &bdrv_file) || !bs->read_only) && > + ((s->offset > 0) || (s->assumed_size > 0))) { > + error_setg(errp, "offset and size options are allowed only for " > + "files in read-only mode"); > + ret = -EINVAL; > + goto fail; > + } > + > filename = qemu_opt_get(opts, "filename"); > > ret = raw_normalize_devicepath(&filename); > @@ -443,6 +468,23 @@ static int raw_open_common(BlockDriverState *bs, QDict > *options, > } > s->fd = fd; > > + /* Check size and offset */ > + real_size = raw_getlength_real(bs); raw_getlength() can fail, so real_size < 0 should be handled separately (otherwise we end up with an error message that doesn't make a lot of sense). > + if (real_size < (s->offset + s->assumed_size)) { > + if (s->assumed_size == 0) { > + error_setg(errp, "The offset has to be smaller than actual size " > + "of the containing file (%ld) ", > + real_size); > + } else { > + error_setg(errp, "The sum of offset (%lu) and size (%lu) has to " > + "be smaller than actual size of the containing " > + "file (%ld) ", > + s->offset, s->assumed_size, real_size); I think the need for PRId64/PRIu64 has been mentioned. Also s/has to be smaller/must not be larger/. Equality is allowed. > + } > + ret = -EINVAL; > + goto fail; > + } > + > #ifdef CONFIG_LINUX_AIO > if (!raw_use_aio(bdrv_flags) && (bdrv_flags & BDRV_O_NATIVE_AIO)) { > error_setg(errp, "aio=native was specified, but it requires " > @@ -1271,6 +1313,19 @@ static int coroutine_fn raw_co_preadv(BlockDriverState > *bs, uint64_t offset, > uint64_t bytes, QEMUIOVector *qiov, > int flags) > { > + BDRVRawState *s = bs->opaque; > + if (s->assumed_size > 0) { > + if (offset > s->assumed_size) { > + /* Attempt to read beyond EOF */ > + return 0; > + } else if ((offset + bytes) > s->assumed_size) { > + /* Trying to read more than is available */ > + bytes = s->assumed_size - offset; > + } > + } As long as we make sure to return the right values in .bdrv_getlength(), we can assert(!s->assumed_size || offset + bytes <= s->assumed_size) instead of doing all of this. The block layer already handles reads across and beyond EOF (by returning a zeroed area, not ignoring half of the request as this code would do). > + offset += s->offset; > + > return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ); > } > > @@ -1348,7 +1403,7 @@ static int raw_truncate(BlockDriverState *bs, int64_t > offset) > } > > #ifdef __OpenBSD__ > -static int64_t raw_getlength(BlockDriverState *bs) > +static int64_t raw_getlength_real(BlockDriverState *bs) > { > BDRVRawState *s = bs->opaque; > int fd = s->fd; > @@ -1367,7 +1422,7 @@ static int64_t raw_getlength(BlockDriverState *bs) > return st.st_size; > } > #elif defined(__NetBSD__) > -static int64_t raw_getlength(BlockDriverState *bs) > +static int64_t raw_getlength_real(BlockDriverState *bs) > { > BDRVRawState *s = bs->opaque; > int fd = s->fd; > @@ -1392,7 +1447,7 @@ static int64_t raw_getlength(BlockDriverState *bs) > return st.st_size; > } > #elif defined(__sun__) > -static int64_t raw_getlength(BlockDriverState *bs) > +static int64_t raw_getlength_real(BlockDriverState *bs) > { > BDRVRawState *s = bs->opaque; > struct dk_minfo minfo; > @@ -1423,7 +1478,7 @@ static int64_t raw_getlength(BlockDriverState *bs) > return size; > } > #elif defined(CONFIG_BSD) > -static int64_t raw_getlength(BlockDriverState *bs) > +static int64_t raw_getlength_real(BlockDriverState *bs) > { > BDRVRawState *s = bs->opaque; > int fd = s->fd; > @@ -1497,7 +1552,7 @@ again: > return size; > } > #else > -static int64_t raw_getlength(BlockDriverState *bs) > +static int64_t raw_getlength_real(BlockDriverState *bs) > { > BDRVRawState *s = bs->opaque; > int ret; > @@ -1516,6 +1571,28 @@ static int64_t raw_getlength(BlockDriverState *bs) > } > #endif > > +static int64_t raw_getlength(BlockDriverState *bs) > +{ > + BDRVRawState *s = bs->opaque; > + > + if (s->assumed_size > 0) { > + return (int64_t)s->assumed_size; > + } > + > + int64_t size = raw_getlength_real(bs); > + if (s->offset > 0) { > + if (s->offset > size) { > + /* The size has changed! We didn't expect that. */ > + return -EIO; You're overwriting any error return code with -EIO here. I think the correct way to phrase it would be: if (size >= 0 && s->offset > size) { return -EIO; } return size - s->offset; You can subtract s->offset unconditionally as subtracting 0 doesn't hurt. > > + } > + size -= s->offset; > + } > + > + return size; > +} > + > + > + Why three newlines? > static int64_t raw_get_allocated_file_size(BlockDriverState *bs) > { > struct stat st; > @@ -1524,7 +1601,15 @@ static int64_t > raw_get_allocated_file_size(BlockDriverState *bs) > if (fstat(s->fd, &st) < 0) { > return -errno; > } > - return (int64_t)st.st_blocks * 512; > + uint64_t size = st.st_blocks * 512; > + /* If the file is sparse we have no idea which part of the file is > + * allocated and which is not. So we just make sure the returned value is > + * not greater than what we're working with. > + */ > + if (s->assumed_size > 0 && s->assumed_size < size) { > + size = s->assumed_size; > + } > + return (int64_t)size; > } This function isn't on a hot path, so maybe even using a bdrv_get_block_status() loop to determine the actual allocation status could be defendable here. I won't insist on it, though. Kevin