Am 23.01.2015 um 09:24 hat Teruaki Ishizaki geschrieben: > Previously, qemu block driver of sheepdog used hard-coded VDI object size. > This patch enables users to handle "block_size_shift" value for > calculating VDI object size. > > When you start qemu, you don't need to specify additional command option. > > But when you create the VDI which doesn't have default object size > with qemu-img command, you specify block_size_shift option. > > If you want to create a VDI of 8MB(1 << 23) object size, > you need to specify following command option. > > # qemu-img create -o block_size_shift=23 sheepdog:test1 100M > > In addition, when you don't specify qemu-img command option, > a default value of sheepdog cluster is used for creating VDI. > > # qemu-img create sheepdog:test2 100M > > Signed-off-by: Teruaki Ishizaki <ishizaki.teru...@lab.ntt.co.jp> > --- > V3: > - Delete the needless operation of buffer. > - Delete the needless operations of request header > for SD_OP_GET_CLUSTER_DEFAULT. > - Fix coding style problems. > > V2: > - Fix coding style problem (white space). > - Add members, store_policy and block_size_shift to struct SheepdogVdiReq > - Initialize request header to use block_size_shift specified by user. > --- > block/sheepdog.c | 140 > ++++++++++++++++++++++++++++++++++++++------- > include/block/block_int.h | 1 + > 2 files changed, 119 insertions(+), 22 deletions(-) > > diff --git a/block/sheepdog.c b/block/sheepdog.c > index be3176f..c9f06db 100644 > --- a/block/sheepdog.c > +++ b/block/sheepdog.c > @@ -37,6 +37,7 @@ > #define SD_OP_READ_VDIS 0x15 > #define SD_OP_FLUSH_VDI 0x16 > #define SD_OP_DEL_VDI 0x17 > +#define SD_OP_GET_CLUSTER_DEFAULT 0x18 > > #define SD_FLAG_CMD_WRITE 0x01 > #define SD_FLAG_CMD_COW 0x02 > @@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq { > uint32_t base_vdi_id; > uint8_t copies; > uint8_t copy_policy; > - uint8_t reserved[2]; > + uint8_t store_policy; > + uint8_t block_size_shift; > uint32_t snapid; > uint32_t type; > uint32_t pad[2]; > @@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp { > uint32_t pad[5]; > } SheepdogVdiRsp; > > +typedef struct SheepdogClusterRsp { > + uint8_t proto_ver; > + uint8_t opcode; > + uint16_t flags; > + uint32_t epoch; > + uint32_t id; > + uint32_t data_length; > + uint32_t result; > + uint8_t nr_copies; > + uint8_t copy_policy; > + uint8_t block_size_shift; > + uint8_t __pad1; > + uint32_t __pad2[6]; > +} SheepdogClusterRsp; > + > typedef struct SheepdogInode { > char name[SD_MAX_VDI_LEN]; > char tag[SD_MAX_VDI_TAG_LEN]; > @@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t > *vdi_id, int snapshot, > hdr.vdi_size = s->inode.vdi_size; > hdr.copy_policy = s->inode.copy_policy; > hdr.copies = s->inode.nr_copies; > + hdr.block_size_shift = s->inode.block_size_shift; > > ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen); > > @@ -1569,9 +1587,11 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t > *vdi_id, int snapshot, > static int sd_prealloc(const char *filename, Error **errp) > { > BlockDriverState *bs = NULL; > + BDRVSheepdogState *base = NULL; > uint32_t idx, max_idx; > + uint32_t object_size; > int64_t vdi_size; > - void *buf = g_malloc0(SD_DATA_OBJ_SIZE); > + void *buf = NULL; > int ret; > > ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, > @@ -1585,18 +1605,23 @@ static int sd_prealloc(const char *filename, Error > **errp) > ret = vdi_size; > goto out; > } > - max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE); > + > + base = bs->opaque; > + object_size = (UINT32_C(1) << base->inode.block_size_shift); > + buf = g_malloc0(object_size);
If I understand correctly, block_size_shift can be up to 31, i.e. this is a 2 GB allocation. Do you really think this is a good idea? At least use g_try_malloc0() here, so that a memory allocation failure doesn't crash qemu. (Same goes for all potentially huge allocations that you make in the whole codebase.) > + max_idx = DIV_ROUND_UP(vdi_size, object_size); > > for (idx = 0; idx < max_idx; idx++) { > /* > * The created image can be a cloned image, so we need to read > * a data from the source image. > */ > - ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); > + ret = bdrv_pread(bs, idx * object_size, buf, object_size); > if (ret < 0) { > goto out; > } > - ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); > + ret = bdrv_pwrite(bs, idx * object_size, buf, object_size); > if (ret < 0) { > goto out; > } > @@ -1610,7 +1635,9 @@ out_with_err_set: > if (bs) { > bdrv_unref(bs); > } > - g_free(buf); > + if (buf) { > + g_free(buf); > + } This is unnecessary. g_free(NULL) is valid, it does nothing. > return ret; > } > @@ -1669,6 +1696,17 @@ static int parse_redundancy(BDRVSheepdogState *s, > const char *opt) > return 0; > } > > +static int parse_block_size_shift(BDRVSheepdogState *s, const char *opt) > +{ > + struct SheepdogInode *inode = &s->inode; > + inode->block_size_shift = (uint8_t)atoi(opt); atoi() is best avoided, it has poor error handling. But I think you don't need this parse function at all, see below. > + if (inode->block_size_shift < 20 || inode->block_size_shift > 31) { > + return -EINVAL; > + } > + > + return 0; > +} > + > static int sd_create(const char *filename, QemuOpts *opts, > Error **errp) > { > @@ -1679,6 +1717,7 @@ static int sd_create(const char *filename, QemuOpts > *opts, > BDRVSheepdogState *s; > char tag[SD_MAX_VDI_TAG_LEN]; > uint32_t snapid; > + uint64_t max_vdi_size; > bool prealloc = false; > > s = g_new0(BDRVSheepdogState, 1); > @@ -1718,10 +1757,15 @@ static int sd_create(const char *filename, QemuOpts > *opts, > } > } > > - if (s->inode.vdi_size > SD_MAX_VDI_SIZE) { > - error_setg(errp, "too big image size"); > - ret = -EINVAL; > - goto out; > + g_free(buf); > + buf = qemu_opt_get_del(opts, BLOCK_OPT_BLOCK_SIZE_SHIFT); > + if (buf) { > + ret = parse_block_size_shift(s, buf); > + if (ret < 0) { > + error_setg(errp, "Invalid block_size_shift:" > + " '%s'. please use 20-31", buf); > + goto out; > + } > } You could use qemu_opt_get_number_del() here and get a number directly from QemuOpts that you don't have to parse any more, if you defined the option as QEMU_OPT_NUMBER instead of QEMU_OPT_STRING. > if (backing_file) { > @@ -1757,6 +1801,45 @@ static int sd_create(const char *filename, QemuOpts > *opts, > } > > s->aio_context = qemu_get_aio_context(); > + > + /* if block_size_shift is not specified, get cluster default value */ > + if (s->inode.block_size_shift == 0) { > + SheepdogVdiReq hdr; > + SheepdogClusterRsp *rsp = (SheepdogClusterRsp *)&hdr; > + Error *local_err = NULL; > + int fd; > + unsigned int wlen = 0, rlen = 0; > + > + fd = connect_to_sdog(s, &local_err); > + if (fd < 0) { > + error_report("%s", error_get_pretty(local_err)); > + error_free(local_err); > + ret = -EIO; > + goto out; > + } > + > + memset(&hdr, 0, sizeof(hdr)); > + hdr.opcode = SD_OP_GET_CLUSTER_DEFAULT; > + hdr.proto_ver = SD_PROTO_VER; > + > + ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, > + NULL, &wlen, &rlen); > + closesocket(fd); > + if (ret) { > + error_setg_errno(errp, -ret, "failed to get cluster default"); > + goto out; > + } > + s->inode.block_size_shift = rsp->block_size_shift; > + } > + > + max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * > MAX_DATA_OBJS; > + > + if (s->inode.vdi_size > max_vdi_size) { > + error_setg(errp, "too big image size"); Please capitalise the first word in an error messages, i.e. "Too big image size". > + ret = -EINVAL; > + goto out; > + } > + > ret = do_sd_create(s, &vid, 0, errp); > if (ret) { > goto out; Kevin