Wenchao Xia <xiaw...@linux.vnet.ibm.com> wrote: > This patch is the implemention of transaction based snapshot > internal API. Internal snapshot for specified block device > is added, which can be used as part of functionality in one way > to live full back up vm seperating internal block snapshot and > vmstate(vmstate goes to another file, not implemented in this > patch). > Every request's handler need to have three function: > execution, updating, cancelling. Also another check function
canceling > could be provided to check if request is valid before execition. > internal snapshot implemention was based on the code from > diet...@proxmox.com. > > Signed-off-by: Wenchao Xia <xiaw...@linux.vnet.ibm.com> > Signed-off-by: Dietmar Maurer <diet...@proxmox.com> > --- > blockdev.c | 515 > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 files changed, 515 insertions(+), 0 deletions(-) > > diff --git a/blockdev.c b/blockdev.c > index 9a05e57..1c38c67 100644 > --- a/blockdev.c > +++ b/blockdev.c > @@ -660,6 +660,521 @@ void do_commit(Monitor *mon, const QDict *qdict) > } > } > > +/* snapshot functions. > + * following are implemention around core struct BlkTransactionStates. Normally qemu commonts dont' start with one space. Same for all comments in the series > + * to start, invalidate, cancel the action. > + */ > + > +/* Block internal snapshot(qcow2, sheepdog image...) support. > + checking and execution was splitted to enable a check for every device > +before execution in group case. */ > + > +SNTime get_sn_time(void) > +{ > + SNTime time; > + /* is uint32_t big enough to get time_t value on other machine ? */ > +#ifdef _WIN32 > + struct _timeb tb; > + _ftime(&tb); > + time.date_sec = tb.time; > + time.date_nsec = tb.millitm * 1000000; > +#else > + struct timeval tv; > + gettimeofday(&tv, NULL); > + time.date_sec = tv.tv_sec; > + time.date_nsec = tv.tv_usec * 1000; > +#endif Can we move this bit of code os-lib-win32.c? (yes, the mess already existed before this patch) > + time.vm_clock_nsec = qemu_get_clock_ns(vm_clock); > + return time; > +} > + > +void generate_sn_name_from_time(SNTime *time, char *time_str, int size) > +{ > +#ifdef _WIN32 > + time_t t = time->date_sec; > + struct tm *ptm = localtime(&t); > + strftime(time_str, size, "vm-%Y%m%d%H%M%S", ptm); > +#else > + /* cast below needed for OpenBSD where tv_sec is still 'long' */ > + time_t second = time->date_sec; > + struct tm tm; > + localtime_r((const time_t *)&second, &tm); > + strftime(time_str, size, "vm-%Y%m%d%H%M%S", &tm); > +#endif can we use localtime_r() also for windows? We have one implementation on os-lib-win32.c? It says that it miss locking, should we look at improving it instead? > +} > + > +static int blk_sn_check_create_internal_sync(BlkTransactionStates *states, > + Error **errp) > +{ > + BlkTransactionStatesSync *st_sync = &states->st_sync; > + BlockDriverState *bs = st_sync->internal.bs; > + const char *device = bdrv_get_device_name(bs); > + QEMUSnapshotInfo *old_sn = &st_sync->internal.old_sn; > + const char *name = st_sync->internal.sn_name; > + bool use_existing = st_sync->use_existing; > + int ret; > + > + if (!bdrv_is_inserted(bs)) { > + error_set_replace(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); > + return -1; > + } > + > + if (bdrv_is_read_only(bs)) { > + error_set_replace(errp, QERR_DEVICE_IS_READ_ONLY, device); > + return -1; > + } > + > + if (!bdrv_can_snapshot(bs)) { > + error_set_replace(errp, QERR_NOT_SUPPORTED); > + return -1; > + } > + > + ret = bdrv_snapshot_find(bs, old_sn, name); > + if (ret >= 0) { > + st_sync->internal.old_sn_exist = TRUE; > + } else { > + if (use_existing) { > + /* caller require use existing one */ > + error_set_replace(errp, ERROR_CLASS_GENERIC_ERROR, > + "snapshot '%s' not exists on '%s' but caller " > + "want to use it.", > + name, device); > + return -1; > + } > + } > + > + st_sync->internal.step = BLK_SNAPSHOT_INT_START; > + return 0; > +} > + > +static int blk_sn_create_internal_sync(BlkTransactionStates *states, > + Error **errp) > +{ > + BlkTransactionStatesSync *st_sync = &states->st_sync; > + BlockDriverState *bs = st_sync->internal.bs; > + const char *name = st_sync->internal.sn_name; > + QEMUSnapshotInfo *sn = &st_sync->internal.sn; > + int ret = 0; > + const char *device = bdrv_get_device_name(bs); > + QEMUSnapshotInfo *old_sn = &st_sync->internal.old_sn; > + > + SNTime *time = &st_sync->internal.time; > + if (time->vm_clock_nsec == 0) { > + /* not preset, it need to be set */ > + error_setg_replace(errp, > + "Invalid timestamp was set on for '%s' on '%s'\n", > + name, device); > + return -1; > + } > + > + sn->date_sec = time->date_sec; > + sn->date_nsec = time->date_nsec; > + sn->vm_clock_nsec = time->vm_clock_nsec; > + sn->vm_state_size = st_sync->internal.vm_state_size; > + > + if (st_sync->internal.old_sn_exist) { > + ret = bdrv_snapshot_delete(bs, old_sn->id_str); > + if (ret < 0) { > + error_setg_replace(errp, > + "Error in deleting existing snapshot %s on '%s'\n", > + name, device); > + return -1; > + } > + pstrcpy(sn->name, sizeof(sn->name), old_sn->name); > + pstrcpy(sn->id_str, sizeof(sn->id_str), old_sn->id_str); > + } else { > + pstrcpy(sn->name, sizeof(sn->name), name); > + } > + > + ret = bdrv_snapshot_create(bs, sn); > + if (ret < 0) { > + error_set_replace(errp, ERROR_CLASS_GENERIC_ERROR, > + "Error while creating snapshot on '%s'\n", device); > + return -1; > + } > + > + st_sync->internal.step = BLK_SNAPSHOT_INT_CREATED; > + return 0; > +} > + > +static int blk_sn_cancel_internal_sync(BlkTransactionStates *states, > + Error **errp) > +{ > + BlkTransactionStatesSync *st_sync = &states->st_sync; > + const char *name = st_sync->internal.sn_name; > + BlockDriverState *bs = st_sync->internal.bs; > + const char *device = bdrv_get_device_name(bs); > + int ret; > + > + if (st_sync->internal.step == BLK_SNAPSHOT_INT_CREATED) { > + if (st_sync->internal.old_sn_exist) { > + error_setg_replace(errp, > + "sn '%s' on '%s' exist originally at it have been " > + "overwritten, can't cancel the action.\n", > + name, device); > + return -1; > + } > + ret = bdrv_snapshot_delete(bs, name); > + if (ret < 0) { > + error_setg_replace(errp, > + "Error while deleting snapshot '%s' on '%s' to " > + "cancel the operation.\n", name, device); > + return -1; > + } > + st_sync->internal.step = BLK_SNAPSHOT_INT_CANCELED; > + } > + return 0; > +} > + > +static int blk_sn_check_delete_internal_sync(BlkTransactionStates *states, > + Error **errp) > +{ > + BlkTransactionStatesSync *st_sync = &states->st_sync; > + BlockDriverState *bs = st_sync->internal.bs; > + QEMUSnapshotInfo *old_sn = &st_sync->internal.old_sn; > + const char *device = bdrv_get_device_name(bs); > + const char *name = st_sync->internal.sn_name; > + int ret; > + > + if (bdrv_is_read_only(bs)) { > + error_set_replace(errp, QERR_DEVICE_IS_READ_ONLY, device); > + return -1; > + } > + > + if (!bdrv_can_snapshot(bs)) { > + error_set_replace(errp, QERR_NOT_SUPPORTED); > + return -1; > + } > + > + ret = bdrv_snapshot_find(bs, old_sn, name); > + if (ret < 0) { > + /* caller require use existing one */ > + error_set_replace(errp, ERROR_CLASS_GENERIC_ERROR, > + "snapshot '%s' not exists on '%s'.", > + name, device); > + return -1; > + } > + return 0; > +} > + > +static int blk_sn_delete_internal_sync(BlkTransactionStates *states, > + Error **errp) > +{ > + BlkTransactionStatesSync *st_sync = &states->st_sync; > + int ret = 0; > + const char *name = st_sync->internal.sn_name; > + BlockDriverState *bs = st_sync->internal.bs; > + const char *device = bdrv_get_device_name(bs); > + > + /* no need to check snapshot existence? */ > + ret = bdrv_snapshot_delete(bs, name); > + if (ret < 0) { > + error_setg_replace(errp, > + "Error while deleting snapshot on '%s'\n", device); > + return -1; > + } > + return 0; > +} > + > + > +/* Block external snapshot(backing file chain) support. */ > + > +static int blk_sn_check_create_external_sync(BlkTransactionStates *states, > + Error **errp) > +{ > + BlkTransactionStatesSync *st_sync = &states->st_sync; > + BlockDriverState *old_bs = st_sync->external.old_bs; > + const char *device = bdrv_get_device_name(old_bs); > + const char *format = st_sync->external.format; > + const char *new_image_file = st_sync->external.new_image_file; > + BlockDriver *proto_drv; > + BlockDriver *drv; > + > + if (!bdrv_is_inserted(old_bs)) { > + error_set_replace(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); > + return -1; > + } > + > + if (bdrv_in_use(old_bs)) { > + error_set_replace(errp, QERR_DEVICE_IN_USE, device); > + return -1; > + } > + > + if (!bdrv_is_read_only(old_bs)) { > + if (bdrv_flush(old_bs)) { > + error_set_replace(errp, QERR_IO_ERROR); > + return -1; > + } > + } > + > + drv = bdrv_find_format(format); > + if (!drv) { > + error_set_replace(errp, QERR_INVALID_BLOCK_FORMAT, format); > + return -1; > + } > + st_sync->external.format_drv = drv; > + > + proto_drv = bdrv_find_protocol(new_image_file); > + if (!proto_drv) { > + error_set_replace(errp, QERR_INVALID_BLOCK_FORMAT, format); > + return -1; > + } > + st_sync->external.step = BLK_SNAPSHOT_EXT_START; > + return 0; > +} > + > +/* We don't do anything that commits us to the snapshot here, do it later. */ > +static int blk_sn_create_external_sync(BlkTransactionStates *states, > + Error **errp) > +{ > + BlkTransactionStatesSync *st_sync = &states->st_sync; > + BlockDriverState *old_bs = st_sync->external.old_bs; > + const char *format = st_sync->external.format; > + BlockDriver *drv = st_sync->external.format_drv; > + const char *new_image_file = st_sync->external.new_image_file; > + bool use_existing = st_sync->use_existing; > + BlockDriverState *new_bs; > + Error *local_err = NULL; > + > + int flags = old_bs->open_flags, ret; > + /* create new image w/backing file */ > + if (!use_existing) { > + bdrv_img_create(new_image_file, format, > + old_bs->filename, > + old_bs->drv->format_name, > + NULL, -1, flags, &local_err); > + if (error_is_set(&local_err)) { > + error_propagate(errp, local_err); > + return -1; > + } > + } > + > + /* We will manually add the backing_hd field to the bs later */ > + new_bs = bdrv_new(""); > + ret = bdrv_open(new_bs, new_image_file, > + flags | BDRV_O_NO_BACKING, drv); > + if (ret != 0) { > + error_set_replace(errp, QERR_OPEN_FILE_FAILED, new_image_file); > + return -1; > + } > + > + st_sync->external.new_bs = new_bs; > + st_sync->external.step = BLK_SNAPSHOT_EXT_CREATED; > + return 0; > +} > + > +/* refresh the blk device's states to emulator, always succeed now. */ > +static int blk_sn_invalid_external_sync(BlkTransactionStates *states, > + Error **errp) > +{ > + BlkTransactionStatesSync *st_sync = &states->st_sync; > + BlkSnapshotExternal *ext = &st_sync->external; > + /* This removes our old bs from the bdrv_states, and adds the new > + * bs */ > + bdrv_append(ext->new_bs, ext->old_bs); > + /* We don't need (or want) to use the transactional > + * bdrv_reopen_multiple() across all the entries at once, because > + * we don't want to abort all of them if one of them fails the > + * reopen */ > + bdrv_reopen(ext->new_bs, > + ext->new_bs->open_flags & ~BDRV_O_RDWR, > + NULL); > + > + st_sync->external.step = BLK_SNAPSHOT_EXT_INVALIDATED; > + return 0; > +} > + > +/* cancel the job if it have been done before */ > +static int blk_sn_cancel_external_sync(BlkTransactionStates *states, > + Error **errp) > +{ > + BlkTransactionStatesSync *st_sync = &states->st_sync; > + BlkSnapshotExternal *ext = &st_sync->external; > + if (ext->step == BLK_SNAPSHOT_EXT_INVALIDATED) { > + /* revert the bind */ > + bdrv_deappend(ext->new_bs, ext->old_bs); > + bdrv_reopen(ext->old_bs, > + ext->old_bs->open_flags & ~BDRV_O_RDWR, > + NULL); > + ext->step = BLK_SNAPSHOT_EXT_CREATED; > + } > + if (ext->step == BLK_SNAPSHOT_EXT_CREATED) { > + /* abandon new bs, and keep using the original bs. > + * no need to delete the image? */ > + bdrv_delete(ext->new_bs); > + ext->step = BLK_SNAPSHOT_EXT_CANCELED; > + } > + return 0; > +} > + > +/* > + * caller's API to add, submit request, support mixed request. > + */ > + > +BlkTransactionStates *blk_trans_st_new(void) > +{ > + BlkTransactionStates *states = g_malloc0(sizeof(BlkTransactionStates)); > + return states; > +} > + > +void blk_trans_st_delete(BlkTransactionStates **p_st) > +{ > + if ((*p_st)->err != NULL) { > + error_free((*p_st)->err); > + } > + g_free(*p_st); > + *p_st = NULL; > + return; > +} > + > +BlkTransactionStatesList *blk_trans_st_list_new(void) > +{ > + BlkTransactionStatesList *list = > + g_malloc0(sizeof(BlkTransactionStatesList)); > + QSIMPLEQ_INIT(list); > + return list; > +} > + > +void blk_trans_st_list_delete(BlkTransactionStatesList **p_list) > +{ > + BlkTransactionStates *states, *next; > + QSIMPLEQ_FOREACH_SAFE(states, (*p_list), entry, next) { > + blk_trans_st_delete(&states); > + } > + g_free(*p_list); > + *p_list = NULL; > + return; > +} > + > +int add_transaction(BlkTransactionStatesList *list, > + BlkTransactionStates *states, > + Error **errp) > +{ > + int ret; > + > + if (states->async) { > + abort(); > + } > + > + switch (states->st_sync.op) { > + case BLK_SN_SYNC_CREATE: > + if (states->st_sync.type == BLK_SNAPSHOT_INTERNAL) { This is spelled: switch(states-s>st_sync.type) { case BLK_SNAPSHOT_INTERNAL: ..... } > + ret = blk_sn_check_create_internal_sync(states, errp); > + if (ret < 0) { > + return -1; > + } > + states->blk_trans_do = blk_sn_create_internal_sync; > + states->blk_trans_cancel = blk_sn_cancel_internal_sync; > + } else if (states->st_sync.type == BLK_SNAPSHOT_EXTERNAL) { > + ret = blk_sn_check_create_external_sync(states, errp); > + if (ret < 0) { > + return -1; > + } > + states->blk_trans_do = blk_sn_create_external_sync; > + states->blk_trans_invalid = blk_sn_invalid_external_sync; > + states->blk_trans_cancel = blk_sn_cancel_external_sync; > + } else { > + error_setg_replace(errp, "Operation %d for type %d not > supprted.", > + states->st_sync.op, states->st_sync.type); > + return -1; > + } > + break; > + case BLK_SN_SYNC_DELETE: > + if (states->st_sync.type == BLK_SNAPSHOT_INTERNAL) { > + ret = blk_sn_check_delete_internal_sync(states, errp); > + if (ret < 0) { > + return -1; > + } > + states->blk_trans_do = blk_sn_delete_internal_sync; > + /* this operation can't be canceled. */ > + } else if (states->st_sync.type == BLK_SNAPSHOT_EXTERNAL) { > + /* sync delete an external snapshot, not support now, > + use blk commit instead. */ > + error_setg_replace(errp, "Operation %d for type %d not > supprted.", > + states->st_sync.op, states->st_sync.type); > + return -1; > + } else { > + error_setg_replace(errp, "Operation %d for type %d not > supprted.", > + states->st_sync.op, states->st_sync.type); > + return -1; > + } > + break; > + default: > + return -1; > + } > + > + /* Todo, in async case const char* member should be duplicated, but > + we do not support it now so direct enlist the request. */ > + QSIMPLEQ_INSERT_TAIL(list, states, entry); > + return 0; > +} > + > +int submit_transaction(BlkTransactionStatesList *list, Error **errp) > +{ > + BlkTransactionStates *states = NULL; > + int ret = 0; > + bool error_set = false; > + > + /* drain all i/o before any snapshots */ > + bdrv_drain_all(); > + > + /* step 1, do the action, that is create/delete snapshots in sync case */ > + QSIMPLEQ_FOREACH(states, list, entry) { > + if (states->async) { > + abort(); > + } else { > + ret = states->blk_trans_do(states, &states->err); > + if (ret < 0) { > + if ((!error_set) && (states->err)) { Parens are not needed here if (!error_set && states->err) { > + *errp = error_copy(states->err); > + error_set = TRUE; TRUE is a constant, here you want "true" lowercase. > + } > + goto delete_and_fail; > + } > + } > + } > + > + /* step 2, update emulator */ > + QSIMPLEQ_FOREACH(states, list, entry) { > + if (states->async) { > + abort(); > + } else { > + if (states->blk_trans_invalid) { > + ret = states->blk_trans_invalid(states, &states->err); > + if (ret < 0) { > + if ((!error_set) && (states->err)) { > + *errp = error_copy(states->err); > + error_set = TRUE; This snip is repeated in all the loops, can't we factor it out? > + } > + goto delete_and_fail; > + } > + } > + } > + } > + > + /* success */ > + return 0; > + > +delete_and_fail: > + /* > + * failure, and it is all-or-none, cancel all if error got. > + */ > + QSIMPLEQ_FOREACH(states, list, entry) { > + if (states->async) { > + abort(); > + } else { > + if (states->blk_trans_cancel) { > + ret = states->blk_trans_cancel(states, &states->err); > + if ((ret < 0) && (!error_set) && (states->err)) { > + *errp = error_copy(states->err); > + error_set = TRUE; > + } > + } > + } > + } > + return -1; > +} > + > static void blockdev_do_action(int kind, void *data, Error **errp) > { > BlockdevAction action;