Re: [Qemu-devel] [PATCH] file-posix: Cache lseek result for data regions

Vladimir Sementsov-Ogievskiy Thu, 24 Jan 2019 08:25:49 -0800

24.01.2019 17:17, Kevin Wolf wrote:
> Depending on the exact image layout and the storage backend (tmpfs is
> konwn to have very slow SEEK_HOLE/SEEK_DATA), caching lseek results can
> save us a lot of time e.g. during a mirror block job or qemu-img convert
> with a fragmented source image (.bdrv_co_block_status on the protocol
> layer can be called for every single cluster in the extreme case).
> 
> We may only cache data regions because of possible concurrent writers.
> This means that we can later treat a recently punched hole as data, but
> this is safe. We can't cache holes because then we might treat recently
> written data as holes, which can cause corruption.
> 
> Signed-off-by: Kevin Wolf <kw...@redhat.com>
> ---
>   block/file-posix.c | 51 ++++++++++++++++++++++++++++++++++++++++++++--
>   1 file changed, 49 insertions(+), 2 deletions(-)
> 
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 8aee7a3fb8..7272c7c99d 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -168,6 +168,12 @@ typedef struct BDRVRawState {
>       bool needs_alignment;
>       bool check_cache_dropped;
>   
> +    struct seek_data_cache {
> +        bool        valid;
> +        uint64_t    start;
> +        uint64_t    end;
> +    } seek_data_cache;
> +
>       PRManager *pr_mgr;
>   } BDRVRawState;
>   
> @@ -1555,8 +1561,17 @@ static int handle_aiocb_write_zeroes_unmap(void 
> *opaque)
>   {
>       RawPosixAIOData *aiocb = opaque;
>       BDRVRawState *s G_GNUC_UNUSED = aiocb->bs->opaque;
> +    struct seek_data_cache *sdc;
>       int ret;
>   
> +    /* Invalidate seek_data_cache if it overlaps */
> +    sdc = &s->seek_data_cache;
> +    if (sdc->valid && !(sdc->end < aiocb->aio_offset ||
> +                        sdc->start > aiocb->aio_offset + aiocb->aio_nbytes))


to be presize: <= and >=

> +    {
> +        sdc->valid = false;
> +    }
> +
>       /* First try to write zeros and unmap at the same time */
>   


Why not to drop cache on handle_aiocb_write_zeroes()? Otherwise, we'll return 
DATA
for these regions which may unallocated read-as-zero, if I'm not mistaken.


>   #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
> @@ -1634,11 +1649,20 @@ static int handle_aiocb_discard(void *opaque)
>       RawPosixAIOData *aiocb = opaque;
>       int ret = -EOPNOTSUPP;
>       BDRVRawState *s = aiocb->bs->opaque;
> +    struct seek_data_cache *sdc;
>   
>       if (!s->has_discard) {
>           return -ENOTSUP;
>       }
>   
> +    /* Invalidate seek_data_cache if it overlaps */
> +    sdc = &s->seek_data_cache;
> +    if (sdc->valid && !(sdc->end < aiocb->aio_offset ||
> +                        sdc->start > aiocb->aio_offset + aiocb->aio_nbytes))

and <= and >=

and if add same to handle_aiocb_write_zeroes(), then it worth to create helper 
function
to invalidate cache.

> +    {
> +        sdc->valid = false;
> +    }
> +
>       if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
>   #ifdef BLKDISCARD
>           do {
> @@ -2424,6 +2448,8 @@ static int coroutine_fn 
> raw_co_block_status(BlockDriverState *bs,
>                                               int64_t *map,
>                                               BlockDriverState **file)
>   {
> +    BDRVRawState *s = bs->opaque;
> +    struct seek_data_cache *sdc;
>       off_t data = 0, hole = 0;
>       int ret;
>   
> @@ -2439,6 +2465,14 @@ static int coroutine_fn 
> raw_co_block_status(BlockDriverState *bs,
>           return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
>       }
>   
> +    sdc = &s->seek_data_cache;
> +    if (sdc->valid && sdc->start <= offset && sdc->end > offset) {
> +        *pnum = MIN(bytes, sdc->end - offset);
> +        *map = offset;
> +        *file = bs;
> +        return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
> +    }
> +
>       ret = find_allocation(bs, offset, &data, &hole);
>       if (ret == -ENXIO) {
>           /* Trailing hole */
> @@ -2451,14 +2485,27 @@ static int coroutine_fn 
> raw_co_block_status(BlockDriverState *bs,
>       } else if (data == offset) {
>           /* On a data extent, compute bytes to the end of the extent,
>            * possibly including a partial sector at EOF. */
> -        *pnum = MIN(bytes, hole - offset);
> +        *pnum = hole - offset;

hmm, why? At least you didn't mention it in commit-message..

>           ret = BDRV_BLOCK_DATA;
>       } else {
>           /* On a hole, compute bytes to the beginning of the next extent.  */
>           assert(hole == offset);
> -        *pnum = MIN(bytes, data - offset);
> +        *pnum = data - offset;
>           ret = BDRV_BLOCK_ZERO;
>       }
> +
> +    /* Caching allocated ranges is okay even if another process writes to the
> +     * same file because we allow declaring things allocated even if there 
> is a
> +     * hole. However, we cannot cache holes without risking corruption. */
> +    if (ret == BDRV_BLOCK_DATA) {
> +        *sdc = (struct seek_data_cache) {
> +            .valid  = true,
> +            .start  = offset,
> +            .end    = offset + *pnum,
> +        };
> +    }
> +
> +    *pnum = MIN(*pnum, bytes);
>       *map = offset;
>       *file = bs;
>       return ret | BDRV_BLOCK_OFFSET_VALID;
> 


-- 
Best regards,
Vladimir

Re: [Qemu-devel] [PATCH] file-posix: Cache lseek result for data regions

Reply via email to