Implement find_hole() for dm-qcow2 target. Iterate over ranges with cluster granularity until hole or data is found. To reduce code duplication, we should use already existing parse_metadata() We can pretend that seek request is read request for metadata purposes and than interpret parsing result in our favor. Since parse_metadata() support request postponing (for example when the requested L2 cluster is absent in RAM), we should create separate qio list for our queries.
https://jira.vzint.dev/browse/PSBM-145746 Signed-off-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com> --- drivers/md/dm-qcow2-map.c | 140 +++++++++++++++++++++++++++++++++++ drivers/md/dm-qcow2-target.c | 1 + drivers/md/dm-qcow2.h | 2 + 3 files changed, 143 insertions(+) diff --git a/drivers/md/dm-qcow2-map.c b/drivers/md/dm-qcow2-map.c index a779889c6970..f728a52ab5e4 100644 --- a/drivers/md/dm-qcow2-map.c +++ b/drivers/md/dm-qcow2-map.c @@ -3980,6 +3980,14 @@ static void process_resubmit_qios(struct qcow2 *qcow2, struct list_head *qios) } } +static void process_seek_qios(struct qcow2 *qcow, struct list_head *qios) +{ + struct qio *qio; + + while ((qio = qio_list_pop(qios)) != NULL) + complete(qio->data); +} + void do_qcow2_work(struct work_struct *ws) { struct qcow2 *qcow2 = container_of(ws, struct qcow2, worker); @@ -3991,6 +3999,7 @@ void do_qcow2_work(struct work_struct *ws) LIST_HEAD(cow_indexes_qios); LIST_HEAD(cow_end_qios); LIST_HEAD(resubmit_qios); + LIST_HEAD(seek_qios); unsigned int pflags = current->flags; current->flags |= PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO; @@ -4003,6 +4012,7 @@ void do_qcow2_work(struct work_struct *ws) list_splice_init(&qcow2->qios[QLIST_COW_INDEXES], &cow_indexes_qios); list_splice_init(&qcow2->qios[QLIST_COW_END], &cow_end_qios); list_splice_init(&qcow2->resubmit_qios, &resubmit_qios); + list_splice_init(&qcow2->qios[QLIST_SEEK], &seek_qios); spin_unlock_irq(&qcow2->deferred_lock); process_embedded_qios(qcow2, &embedded_qios, &deferred_qios); @@ -4013,6 +4023,7 @@ void do_qcow2_work(struct work_struct *ws) process_cow_indexes_write(qcow2, &cow_indexes_qios); process_cow_end(qcow2, &cow_end_qios); process_resubmit_qios(qcow2, &resubmit_qios); + process_seek_qios(qcow2, &seek_qios); /* This actually submits batch of md writeback, initiated above */ submit_metadata_writeback(qcow2); @@ -4235,3 +4246,132 @@ static void handle_cleanup_mask(struct qio *qio) ext->cleanup_mask &= ~FREE_ALLOCATED_CLU; } } + +static sector_t get_next_l2(struct qio *qio) +{ + struct qcow2 *qcow2 = qio->qcow2; + loff_t start, add; + + start = to_bytes(qio->bi_iter.bi_sector); + add = qcow2->l2_entries - (start / qcow2->clu_size) % qcow2->l2_entries; + + return qio->bi_iter.bi_sector + (qcow2->clu_size / to_bytes(1)) * add; +} + +static sector_t get_next_clu(struct qio *qio) +{ + struct qcow2 *qcow2 = qio->qcow2; + loff_t offset; + + offset = to_bytes(qio->bi_iter.bi_sector); + offset = offset / qcow2->clu_size; + offset = (offset + 1) * qcow2->clu_size; + + return to_sector(offset); +} + +loff_t qcow2_find_hole(struct dm_target *ti, loff_t offset, int whence) +{ + struct qcow2 *qcow2 = to_qcow2_target(ti)->top; + DECLARE_COMPLETION_ONSTACK(compl); + bool unmapped, zeroes, try_lower; + struct qio qio = {0}, *qptr; + loff_t result = -EINVAL; + struct qcow2_map map; + u32 size; + int ret; + + qio.bi_iter.bi_sector = to_sector(offset); + qio.bi_iter.bi_size = qcow2->clu_size - offset % qcow2->clu_size; + + qcow2_init_qio(&qio, REQ_OP_READ, qcow2); + qio.queue_list_id = QLIST_SEEK; + qio.data = &compl; + + while (qio.bi_iter.bi_sector < to_sector(qcow2->hdr.size)) { + qio.qcow2 = qcow2; +retry: + memset(&map, 0, sizeof(map)); + map.qcow2 = qio.qcow2; + qptr = &qio; + ret = parse_metadata(qio.qcow2, &qptr, &map); + /* ENXIO has a special meaning for llseek so remap it to EINVAL*/ + if (ret < 0) + return (ret == -ENXIO) ? -EINVAL : ret; + if (qptr == NULL) { + wait_for_completion(&compl); + reinit_completion(&compl); + goto retry; + } + +calc_subclu: + zeroes = unmapped = try_lower = false; + zeroes = (size = qio_all_zeroes_size(qio.qcow2, &qio, &map)); + if (!size) + unmapped = (size = qio_unmapped_size(qio.qcow2, &qio, &map)); + if (!size) + size = qio_mapped_not_zeroes_size(qio.qcow2, &qio, &map); + if (unmapped) + try_lower = maybe_mapped_in_lower_delta(qio.qcow2, &qio); + + if (unmapped && try_lower) { + loff_t end = to_bytes(qio.bi_iter.bi_sector) + qio.bi_iter.bi_size; + + if (end < qio.qcow2->hdr.size) { + qio.qcow2 = qio.qcow2->lower; + goto retry; + } + } + + if (whence & SEEK_HOLE) { + if (zeroes || unmapped) { + result = to_bytes(qio.bi_iter.bi_sector); + break; + } else if (size != qio.bi_iter.bi_size) { + /* + * range starts with data subclusters and after that + * some subclusters are zero or unmapped + */ + result = to_bytes(qio.bi_iter.bi_sector) + size; + break; + } + } + + if (whence & SEEK_DATA) { + if (!zeroes && !unmapped) { + result = to_bytes(qio.bi_iter.bi_sector); + break; + } else if (size != qio.bi_iter.bi_size) { + /* + * range starts with zero or unmapped subclusters + * but after that it still can be unmapped or zero + * We do not need to parse metadata again but we should + * skip this sublusters and look onto next ones + */ + qio.bi_iter.bi_sector += to_sector(size); + qio.bi_iter.bi_size -= size; + goto calc_subclu; + } + } + + /* whole L2 table is unmapped - skip to next l2 table */ + if (!(map.level & L2_LEVEL)) + qio.bi_iter.bi_sector = get_next_l2(&qio); + else + qio.bi_iter.bi_sector = get_next_clu(&qio); + + qio.bi_iter.bi_size = qcow2->clu_size; + } + + if (result >= 0 && result < offset) + result = offset; + + if (qio.bi_iter.bi_sector >= to_sector(qcow2->hdr.size)) { + if (whence & SEEK_HOLE) + result = qcow2->hdr.size; + if (whence & SEEK_DATA) + result = -ENXIO; + } + + return result; +} diff --git a/drivers/md/dm-qcow2-target.c b/drivers/md/dm-qcow2-target.c index ffcab9b574ab..88a7af661829 100644 --- a/drivers/md/dm-qcow2-target.c +++ b/drivers/md/dm-qcow2-target.c @@ -1020,6 +1020,7 @@ static struct target_type qcow2_target = { .resume = qcow2_resume, .clone_and_map_rq = qcow2_clone_and_map, .message = qcow2_message, + .find_hole = qcow2_find_hole, }; static int __init dm_qcow2_init(void) diff --git a/drivers/md/dm-qcow2.h b/drivers/md/dm-qcow2.h index aec64c23dbae..704e369c126f 100644 --- a/drivers/md/dm-qcow2.h +++ b/drivers/md/dm-qcow2.h @@ -151,6 +151,7 @@ enum { QLIST_COW_DATA, QLIST_COW_INDEXES, QLIST_COW_END, + QLIST_SEEK, QLIST_COUNT, QLIST_INVALID = QLIST_COUNT, @@ -299,6 +300,7 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv, char *result, unsigned int maxlen); int qcow2_clone_and_map(struct dm_target *ti, struct request *rq, union map_info *info, struct request **clone); +loff_t qcow2_find_hole(struct dm_target *ti, loff_t offset, int whence); void do_qcow2_work(struct work_struct *ws); void do_qcow2_fsync_work(struct work_struct *ws); -- 2.39.3 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel