In block.c, we combine bdrv_co_do_readv(), bdrv_co_do_writev() and bdrv_co_do_write_zeroes() into single routine, bdrv_co_do_rw_vector(), and remove lots of now unneded helper functions.
sheepdog driver already had sd_co_rw_vector() which half-combined read and write paths. qcow and qcow2 block drivers now contains multiplexing functions (rw_vector() -> readv()+writev()), since for these drivers, the read and write routines actually have very little in common. Signed-off-by: Michael Tokarev <m...@tls.msk.ru> --- block.c | 206 ++++++++++++++++++------------------------------- block.h | 6 ++ block/nbd.c | 95 ++++++----------------- block/qcow.c | 10 ++- block/qcow2-cluster.c | 4 +- block/qcow2.c | 13 +++- block/raw.c | 15 +--- block/sheepdog.c | 125 +++++++++++++----------------- block_int.h | 6 +- 9 files changed, 186 insertions(+), 294 deletions(-) diff --git a/block.c b/block.c index 25ce28b..7a0eed7 100644 --- a/block.c +++ b/block.c @@ -49,21 +49,17 @@ #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ typedef enum { - BDRV_REQ_COPY_ON_READ = 0x1, - BDRV_REQ_ZERO_WRITE = 0x2, + BDRV_REQ_READ = 0x0 /* the same as false */, + BDRV_REQ_WRITE = 0x1 /* the same as true */, + BDRV_REQ_COPY_ON_READ = 0x2, + BDRV_REQ_ZERO_WRITE = 0x4, } BdrvRequestFlags; static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load); -static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - QEMUIOVector *iov); -static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - QEMUIOVector *iov); -static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, - BdrvRequestFlags flags); -static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, +static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *iov, + bool is_write); +static int coroutine_fn bdrv_co_do_rw_vector(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, BdrvRequestFlags flags); static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, @@ -267,9 +263,8 @@ void path_combine(char *dest, int dest_size, void bdrv_register(BlockDriver *bdrv) { /* Block drivers without coroutine functions need emulation */ - if (!bdrv->bdrv_co_readv) { - bdrv->bdrv_co_readv = bdrv_co_readv_em; - bdrv->bdrv_co_writev = bdrv_co_writev_em; + if (!bdrv->bdrv_co_rw_vector) { + bdrv->bdrv_co_rw_vector = bdrv_co_io_em; /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if * the block driver lacks aio we need to emulate that too. @@ -1338,14 +1333,9 @@ typedef struct RwCo { static void coroutine_fn bdrv_rw_co_entry(void *opaque) { RwCo *rwco = opaque; - - if (!rwco->is_write) { - rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num, - rwco->nb_sectors, rwco->qiov, 0); - } else { - rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num, - rwco->nb_sectors, rwco->qiov, 0); - } + rwco->ret = bdrv_co_do_rw_vector(rwco->bs, rwco->sector_num, + rwco->nb_sectors, rwco->qiov, + rwco->is_write ? BDRV_REQ_WRITE : BDRV_REQ_READ); } /* @@ -1580,8 +1570,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len); qemu_iovec_init_external(&bounce_qiov, &iov, 1); - ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors, - &bounce_qiov); + ret = drv->bdrv_co_rw_vector(bs, cluster_sector_num, cluster_nb_sectors, + &bounce_qiov, BDRV_READ); if (ret < 0) { goto err; } @@ -1591,8 +1581,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, ret = drv->bdrv_co_write_zeroes(bs, cluster_sector_num, cluster_nb_sectors); } else { - ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors, - &bounce_qiov); + ret = drv->bdrv_co_rw_vector(bs, cluster_sector_num, cluster_nb_sectors, + &bounce_qiov, BDRV_WRITE); } if (ret < 0) { @@ -1613,29 +1603,36 @@ err: } /* - * Handle a read request in coroutine context + * Handle a read or write request in coroutine context + * This is internal function which is called with flags being + * one of READ,WRITE,COPY_ON_READ,ZERO_WRITE, but not any combination. + * For ZERO_WRITE, the data (qiov) is not provided. */ -static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, +static int coroutine_fn bdrv_co_do_rw_vector(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, BdrvRequestFlags flags) { BlockDriver *drv = bs->drv; BdrvTrackedRequest req; + bool is_write = flags & (BDRV_REQ_WRITE|BDRV_REQ_ZERO_WRITE); int ret; if (!drv) { return -ENOMEDIUM; } + if (is_write && bs->read_only) { + return -EACCES; + } if (bdrv_check_request(bs, sector_num, nb_sectors)) { return -EIO; } /* throttling disk read I/O */ if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, false, nb_sectors); + bdrv_io_limits_intercept(bs, is_write, nb_sectors); } - if (bs->copy_on_read) { + if (!is_write && bs->copy_on_read) { flags |= BDRV_REQ_COPY_ON_READ; } if (flags & BDRV_REQ_COPY_ON_READ) { @@ -1646,7 +1643,7 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, wait_for_overlapping_requests(bs, sector_num, nb_sectors); } - tracked_request_begin(&req, bs, sector_num, nb_sectors, false); + tracked_request_begin(&req, bs, sector_num, nb_sectors, is_write); if (flags & BDRV_REQ_COPY_ON_READ) { int pnum; @@ -1662,7 +1659,39 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, } } - ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); + if (flags & BDRV_REQ_ZERO_WRITE) { + + /* First try the efficient write zeroes operation */ + if (drv->bdrv_co_write_zeroes) { + ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors); + } else { + /* Fall back to bounce buffer if write zeroes is unsupported */ + QEMUIOVector qiov; + struct iovec iov; + /*XXX may do better by allocating small buffer and repeating it several times */ + iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE; + iov.iov_base = qemu_blockalign(bs, iov.iov_len); + memset(iov.iov_base, 0, iov.iov_len); + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = drv->bdrv_co_rw_vector(bs, sector_num, nb_sectors, &qiov, BDRV_WRITE); + + qemu_vfree(iov.iov_base); + } + + } else { + ret = drv->bdrv_co_rw_vector(bs, sector_num, nb_sectors, qiov, is_write); + } + + if (is_write) { + if (bs->dirty_bitmap) { + set_dirty_bitmap(bs, sector_num, nb_sectors, 1); + } + + if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { + bs->wr_highest_sector = sector_num + nb_sectors - 1; + } + } out: tracked_request_end(&req); @@ -1679,7 +1708,7 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, { trace_bdrv_co_readv(bs, sector_num, nb_sectors); - return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0); + return bdrv_co_do_rw_vector(bs, sector_num, nb_sectors, qiov, BDRV_REQ_READ); } int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, @@ -1687,92 +1716,27 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, { trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors); - return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, - BDRV_REQ_COPY_ON_READ); + return bdrv_co_do_rw_vector(bs, sector_num, nb_sectors, qiov, + BDRV_REQ_COPY_ON_READ); } -static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) +int coroutine_fn bdrv_co_rw_vector(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov, bool is_write) { - BlockDriver *drv = bs->drv; - QEMUIOVector qiov; - struct iovec iov; - int ret; - - /* First try the efficient write zeroes operation */ - if (drv->bdrv_co_write_zeroes) { - return drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors); - } - - /* Fall back to bounce buffer if write zeroes is unsupported */ - iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE; - iov.iov_base = qemu_blockalign(bs, iov.iov_len); - memset(iov.iov_base, 0, iov.iov_len); - qemu_iovec_init_external(&qiov, &iov, 1); - - ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov); - - qemu_vfree(iov.iov_base); - return ret; + return bdrv_co_do_rw_vector(bs, sector_num, nb_sectors, qiov, + is_write ? BDRV_REQ_WRITE : BDRV_REQ_READ); } /* * Handle a write request in coroutine context */ -static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, - BdrvRequestFlags flags) -{ - BlockDriver *drv = bs->drv; - BdrvTrackedRequest req; - int ret; - - if (!bs->drv) { - return -ENOMEDIUM; - } - if (bs->read_only) { - return -EACCES; - } - if (bdrv_check_request(bs, sector_num, nb_sectors)) { - return -EIO; - } - - /* throttling disk write I/O */ - if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, true, nb_sectors); - } - - if (bs->copy_on_read_in_flight) { - wait_for_overlapping_requests(bs, sector_num, nb_sectors); - } - - tracked_request_begin(&req, bs, sector_num, nb_sectors, true); - - if (flags & BDRV_REQ_ZERO_WRITE) { - ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors); - } else { - ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); - } - - if (bs->dirty_bitmap) { - set_dirty_bitmap(bs, sector_num, nb_sectors, 1); - } - - if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { - bs->wr_highest_sector = sector_num + nb_sectors - 1; - } - - tracked_request_end(&req); - - return ret; -} - int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { trace_bdrv_co_writev(bs, sector_num, nb_sectors); - return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0); + return bdrv_co_do_rw_vector(bs, sector_num, nb_sectors, qiov, + BDRV_REQ_WRITE); } int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, @@ -1780,8 +1744,8 @@ int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, { trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors); - return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, - BDRV_REQ_ZERO_WRITE); + return bdrv_co_do_rw_vector(bs, sector_num, nb_sectors, NULL, + BDRV_REQ_ZERO_WRITE); } /** @@ -3232,13 +3196,9 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque) BlockDriverAIOCBCoroutine *acb = opaque; BlockDriverState *bs = acb->common.bs; - if (!acb->is_write) { - acb->req.error = bdrv_co_do_readv(bs, acb->req.sector, - acb->req.nb_sectors, acb->req.qiov, 0); - } else { - acb->req.error = bdrv_co_do_writev(bs, acb->req.sector, - acb->req.nb_sectors, acb->req.qiov, 0); - } + acb->req.error = bdrv_co_do_rw_vector(bs, acb->req.sector, + acb->req.nb_sectors, acb->req.qiov, + acb->is_write ? BDRV_REQ_WRITE : BDRV_REQ_READ); acb->bh = qemu_bh_new(bdrv_co_em_bh, acb); qemu_bh_schedule(acb->bh); @@ -3394,20 +3354,6 @@ static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num, return co.ret; } -static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - QEMUIOVector *iov) -{ - return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false); -} - -static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - QEMUIOVector *iov) -{ - return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true); -} - static void coroutine_fn bdrv_flush_co_entry(void *opaque) { RwCo *rwco = opaque; diff --git a/block.h b/block.h index f0f68e0..f15d3ce 100644 --- a/block.h +++ b/block.h @@ -78,6 +78,10 @@ typedef struct BlockDevOps { #define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS) #define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1) +/* defines for is_write for bdrv_*_rw_vector */ +#define BDRV_READ false +#define BDRV_WRITE true + typedef enum { BLOCK_ERR_REPORT, BLOCK_ERR_IGNORE, BLOCK_ERR_STOP_ENOSPC, BLOCK_ERR_STOP_ANY @@ -140,6 +144,8 @@ int bdrv_pwrite(BlockDriverState *bs, int64_t offset, const void *buf, int count); int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, const void *buf, int count); +int coroutine_fn bdrv_co_rw_vector(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov, bool is_write); int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, diff --git a/block/nbd.c b/block/nbd.c index 161b299..7b757bb 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -320,91 +320,45 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags) return result; } -static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov, - int offset) -{ - BDRVNBDState *s = bs->opaque; - struct nbd_request request; - struct nbd_reply reply; - - request.type = NBD_CMD_READ; - request.from = sector_num * 512; - request.len = nb_sectors * 512; - - nbd_coroutine_start(s, &request); - if (nbd_co_send_request(s, &request, NULL, 0) == -1) { - reply.error = errno; - } else { - nbd_co_receive_reply(s, &request, &reply, qiov->iov, offset); - } - nbd_coroutine_end(s, &request); - return -reply.error; - -} +/* qemu-nbd has a limit of slightly less than 1M per request. Try to + * remain aligned to 4K. */ +#define NBD_MAX_SECTORS 2040 -static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov, - int offset) +static int nbd_co_rw_vector(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov, bool is_write) { BDRVNBDState *s = bs->opaque; + int offset = 0; struct nbd_request request; struct nbd_reply reply; - request.type = NBD_CMD_WRITE; - if (!bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) { + request.type = is_write ? NBD_CMD_WRITE : NBD_CMD_READ; + if (is_write && !bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) { request.type |= NBD_CMD_FLAG_FUA; } + reply.error = 0; - request.from = sector_num * 512; - request.len = nb_sectors * 512; + do { - nbd_coroutine_start(s, &request); - if (nbd_co_send_request(s, &request, qiov->iov, offset) == -1) { - reply.error = errno; - } else { - nbd_co_receive_reply(s, &request, &reply, NULL, 0); - } - nbd_coroutine_end(s, &request); - return -reply.error; -} + request.from = sector_num * 512; + request.len = MIN(nb_sectors, NBD_MAX_SECTORS) * 512; -/* qemu-nbd has a limit of slightly less than 1M per request. Try to - * remain aligned to 4K. */ -#define NBD_MAX_SECTORS 2040 + nbd_coroutine_start(s, &request); + /* send_request() and receive_reply() ignores offset if iov is NULL */ + if (nbd_co_send_request(s, &request, is_write ? qiov->iov : NULL, offset) == -1) { + reply.error = errno; + } else { + nbd_co_receive_reply(s, &request, &reply, is_write ? NULL : qiov->iov, offset); + } + nbd_coroutine_end(s, &request); -static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) -{ - int offset = 0; - int ret; - while (nb_sectors > NBD_MAX_SECTORS) { - ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset); - if (ret < 0) { - return ret; - } offset += NBD_MAX_SECTORS * 512; sector_num += NBD_MAX_SECTORS; nb_sectors -= NBD_MAX_SECTORS; - } - return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset); -} -static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) -{ - int offset = 0; - int ret; - while (nb_sectors > NBD_MAX_SECTORS) { - ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset); - if (ret < 0) { - return ret; - } - offset += NBD_MAX_SECTORS * 512; - sector_num += NBD_MAX_SECTORS; - nb_sectors -= NBD_MAX_SECTORS; - } - return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset); + } while (reply.error == 0 && nb_sectors > 0); + + return -reply.error; } static int nbd_co_flush(BlockDriverState *bs) @@ -479,8 +433,7 @@ static BlockDriver bdrv_nbd = { .format_name = "nbd", .instance_size = sizeof(BDRVNBDState), .bdrv_file_open = nbd_open, - .bdrv_co_readv = nbd_co_readv, - .bdrv_co_writev = nbd_co_writev, + .bdrv_co_rw_vector = nbd_co_rw_vector, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_discard = nbd_co_discard, diff --git a/block/qcow.c b/block/qcow.c index b1cfe1f..1c4191a 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -629,6 +629,13 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, return ret; } +static coroutine_fn int qcow_co_rw_vector(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov, bool is_write) +{ + return is_write ? qcow_co_writev(bs, sector_num, nb_sectors, qiov) + : qcow_co_readv(bs, sector_num, nb_sectors, qiov); +} + static void qcow_close(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; @@ -875,8 +882,7 @@ static BlockDriver bdrv_qcow = { .bdrv_close = qcow_close, .bdrv_create = qcow_create, - .bdrv_co_readv = qcow_co_readv, - .bdrv_co_writev = qcow_co_writev, + .bdrv_co_rw_vector = qcow_co_rw_vector, .bdrv_co_flush_to_disk = qcow_co_flush, .bdrv_co_is_allocated = qcow_co_is_allocated, diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 07a2e93..b5011e0 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -320,11 +320,11 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs, BLKDBG_EVENT(bs->file, BLKDBG_COW_READ); - /* Call .bdrv_co_readv() directly instead of using the public block-layer + /* Call .bdrv_co_rw_vector() directly instead of using the public block-layer * interface. This avoids double I/O throttling and request tracking, * which can lead to deadlock when block layer copy-on-read is enabled. */ - ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov); + ret = bs->drv->bdrv_co_rw_vector(bs, start_sect + n_start, n, &qiov, false); if (ret < 0) { goto out; } diff --git a/block/qcow2.c b/block/qcow2.c index 3692b45..517c443 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -648,6 +648,16 @@ fail: return ret; } +static coroutine_fn int qcow2_co_rw_vector(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors, + QEMUIOVector *qiov, bool is_write) +{ + return is_write ? qcow2_co_writev(bs, sector_num, nb_sectors, qiov) + : qcow2_co_readv(bs, sector_num, nb_sectors, qiov); +} + + static void qcow2_close(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; @@ -1359,8 +1369,7 @@ static BlockDriver bdrv_qcow2 = { .bdrv_set_key = qcow2_set_key, .bdrv_make_empty = qcow2_make_empty, - .bdrv_co_readv = qcow2_co_readv, - .bdrv_co_writev = qcow2_co_writev, + .bdrv_co_rw_vector = qcow2_co_rw_vector, .bdrv_co_flush_to_os = qcow2_co_flush_to_os, .bdrv_co_flush_to_disk = qcow2_co_flush_to_disk, diff --git a/block/raw.c b/block/raw.c index 1cdac0c..5348553 100644 --- a/block/raw.c +++ b/block/raw.c @@ -9,16 +9,10 @@ static int raw_open(BlockDriverState *bs, int flags) return 0; } -static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) +static int coroutine_fn raw_co_rw_vector(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov, bool is_write) { - return bdrv_co_readv(bs->file, sector_num, nb_sectors, qiov); -} - -static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) -{ - return bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov); + return bdrv_co_rw_vector(bs->file, sector_num, nb_sectors, qiov, is_write); } static void raw_close(BlockDriverState *bs) @@ -111,8 +105,7 @@ static BlockDriver bdrv_raw = { .bdrv_open = raw_open, .bdrv_close = raw_close, - .bdrv_co_readv = raw_co_readv, - .bdrv_co_writev = raw_co_writev, + .bdrv_co_rw_vector = raw_co_rw_vector, .bdrv_co_flush_to_disk = raw_co_flush, .bdrv_co_discard = raw_co_discard, diff --git a/block/sheepdog.c b/block/sheepdog.c index 00276f6f..d768dbf 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1419,28 +1419,60 @@ out: * Returns 1 when we need to wait a response, 0 when there is no sent * request and -errno in error cases. */ -static int coroutine_fn sd_co_rw_vector(void *p) +static coroutine_fn int sd_co_rw_vector(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov, bool is_write) { - SheepdogAIOCB *acb = p; + SheepdogAIOCB *acb; int ret = 0; - unsigned long len, done = 0, total = acb->nb_sectors * SECTOR_SIZE; - unsigned long idx = acb->sector_num * SECTOR_SIZE / SD_DATA_OBJ_SIZE; + unsigned long len, done = 0, total = nb_sectors * SECTOR_SIZE; + unsigned long idx = sector_num * SECTOR_SIZE / SD_DATA_OBJ_SIZE; uint64_t oid; - uint64_t offset = (acb->sector_num * SECTOR_SIZE) % SD_DATA_OBJ_SIZE; - BDRVSheepdogState *s = acb->common.bs->opaque; + uint64_t offset = (sector_num * SECTOR_SIZE) % SD_DATA_OBJ_SIZE; + BDRVSheepdogState *s = bs->opaque; SheepdogInode *inode = &s->inode; AIOReq *aio_req; - if (acb->aiocb_type == AIOCB_WRITE_UDATA && s->is_snapshot) { + if (is_write && bs->growable && sector_num + nb_sectors > bs->total_sectors) { + /* TODO: shouldn't block here */ + if (sd_truncate(bs, (sector_num + nb_sectors) * SECTOR_SIZE) < 0) { + return -EIO; + } + bs->total_sectors = sector_num + nb_sectors; + } + + acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors, NULL, NULL); + + if (is_write) { + + acb->aio_done_func = sd_write_done; + acb->aiocb_type = AIOCB_WRITE_UDATA; + + if (s->is_snapshot) { + /* + * In the case we open the snapshot VDI, Sheepdog creates the + * writable VDI when we do a write operation first. + */ + ret = sd_create_branch(s); + if (ret) { + acb->ret = -EIO; + goto out; + } + } + + } else { /* read */ + + int i; + acb->aiocb_type = AIOCB_READ_UDATA; + acb->aio_done_func = sd_finish_aiocb; + /* - * In the case we open the snapshot VDI, Sheepdog creates the - * writable VDI when we do a write operation first. + * TODO: we can do better; we don't need to initialize + * blindly. */ - ret = sd_create_branch(s); - if (ret) { - acb->ret = -EIO; - goto out; + for (i = 0; i < qiov->niov; i++) { + memset(qiov->iov[i].iov_base, 0, qiov->iov[i].iov_len); } + } while (done != total) { @@ -1453,12 +1485,12 @@ static int coroutine_fn sd_co_rw_vector(void *p) len = MIN(total - done, SD_DATA_OBJ_SIZE - offset); if (!inode->data_vdi_id[idx]) { - if (acb->aiocb_type == AIOCB_READ_UDATA) { + if (!is_write) { goto done; } create = 1; - } else if (acb->aiocb_type == AIOCB_WRITE_UDATA + } else if (is_write && !is_data_obj_writable(inode, idx)) { /* Copy-On-Write */ create = 1; @@ -1511,63 +1543,13 @@ static int coroutine_fn sd_co_rw_vector(void *p) done += len; } out: - if (QLIST_EMPTY(&acb->aioreq_head)) { - return acb->ret; - } - return 1; -} - -static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) -{ - SheepdogAIOCB *acb; - int ret; - if (bs->growable && sector_num + nb_sectors > bs->total_sectors) { - /* TODO: shouldn't block here */ - if (sd_truncate(bs, (sector_num + nb_sectors) * SECTOR_SIZE) < 0) { - return -EIO; - } - bs->total_sectors = sector_num + nb_sectors; - } - - acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors, NULL, NULL); - acb->aio_done_func = sd_write_done; - acb->aiocb_type = AIOCB_WRITE_UDATA; - - ret = sd_co_rw_vector(acb); - if (ret <= 0) { - qemu_aio_release(acb); - return ret; - } - - qemu_coroutine_yield(); - return acb->ret; -} - -static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) -{ - SheepdogAIOCB *acb; - int i, ret; - - acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors, NULL, NULL); - acb->aiocb_type = AIOCB_READ_UDATA; - acb->aio_done_func = sd_finish_aiocb; - - /* - * TODO: we can do better; we don't need to initialize - * blindly. - */ - for (i = 0; i < qiov->niov; i++) { - memset(qiov->iov[i].iov_base, 0, qiov->iov[i].iov_len); - } - - ret = sd_co_rw_vector(acb); - if (ret <= 0) { - qemu_aio_release(acb); - return ret; + if (QLIST_EMPTY(&acb->aioreq_head)) { + if (acb->ret <= 0) { + qemu_aio_release(acb); + return acb->ret; + } } qemu_coroutine_yield(); @@ -1902,8 +1884,7 @@ BlockDriver bdrv_sheepdog = { .bdrv_getlength = sd_getlength, .bdrv_truncate = sd_truncate, - .bdrv_co_readv = sd_co_readv, - .bdrv_co_writev = sd_co_writev, + .bdrv_co_rw_vector = sd_co_rw_vector, .bdrv_snapshot_create = sd_snapshot_create, .bdrv_snapshot_goto = sd_snapshot_goto, diff --git a/block_int.h b/block_int.h index aeafa2e..146d9b7 100644 --- a/block_int.h +++ b/block_int.h @@ -122,10 +122,8 @@ struct BlockDriver { int64_t sector_num, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque); - int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); - int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); + int coroutine_fn (*bdrv_co_rw_vector)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, bool is_write); /* * Efficiently zero a region of the disk image. Typically an image format * would use a compact metadata representation to implement this. This -- 1.7.9