On 5/20/25 1:29 PM, Fiona Ebner wrote: > This is in preparation to mark bdrv_drained_begin() as GRAPH_UNLOCKED. > > More granular draining is not trivially possible, because > bdrv_snapshot_delete() can recursively call itself. > > The return value of bdrv_all_delete_snapshot() changes from -1 to > -errno propagated from failed sub-calls. This is fine for the existing > callers of bdrv_all_delete_snapshot(). > > Signed-off-by: Fiona Ebner <f.eb...@proxmox.com> > --- > > Changes in v2: > * Use 'must be drained' instead of 'needs to be drained'. > * Use goto for cleanup/error handling in bdrv_all_delete_snapshot(). > * Don't use atomics to access bs->quiesce_counter. > > block/snapshot.c | 26 +++++++++++++++----------- > blockdev.c | 25 +++++++++++++++++-------- > qemu-img.c | 2 ++ > 3 files changed, 34 insertions(+), 19 deletions(-) > > diff --git a/block/snapshot.c b/block/snapshot.c > index 22567f1fb9..9f300a78bd 100644 > --- a/block/snapshot.c > +++ b/block/snapshot.c > @@ -327,7 +327,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs, > > /** > * Delete an internal snapshot by @snapshot_id and @name. > - * @bs: block device used in the operation > + * @bs: block device used in the operation, must be drained > * @snapshot_id: unique snapshot ID, or NULL > * @name: snapshot name, or NULL > * @errp: location to store error > @@ -358,6 +358,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs, > > GLOBAL_STATE_CODE(); > > + assert(bs->quiesce_counter > 0); > + > if (!drv) { > error_setg(errp, "Device '%s' has no medium", > bdrv_get_device_name(bs)); > @@ -368,9 +370,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs, > return -EINVAL; > } > > - /* drain all pending i/o before deleting snapshot */ > - bdrv_drained_begin(bs); > - > if (drv->bdrv_snapshot_delete) { > ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp); > } else if (fallback_bs) { > @@ -382,7 +381,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs, > ret = -ENOTSUP; > } > > - bdrv_drained_end(bs); > return ret; > } > > @@ -571,19 +569,22 @@ int bdrv_all_delete_snapshot(const char *name, > ERRP_GUARD(); > g_autoptr(GList) bdrvs = NULL; > GList *iterbdrvs; > + int ret = 0; > > GLOBAL_STATE_CODE(); > - GRAPH_RDLOCK_GUARD_MAINLOOP(); > > - if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < > 0) { > - return -1; > + bdrv_drain_all_begin(); > + bdrv_graph_rdlock_main_loop(); > + > + ret = bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp); > + if (ret < 0) { > + goto out; > } > > iterbdrvs = bdrvs; > while (iterbdrvs) { > BlockDriverState *bs = iterbdrvs->data; > QEMUSnapshotInfo sn1, *snapshot = &sn1; > - int ret = 0; > > if ((devices || bdrv_all_snapshots_includes_bs(bs)) && > bdrv_snapshot_find(bs, snapshot, name) >= 0) > @@ -594,13 +595,16 @@ int bdrv_all_delete_snapshot(const char *name, > if (ret < 0) { > error_prepend(errp, "Could not delete snapshot '%s' on '%s': ", > name, bdrv_get_device_or_node_name(bs)); > - return -1; > + goto out; > } > > iterbdrvs = iterbdrvs->next; > } > > - return 0; > +out: > + bdrv_graph_rdunlock_main_loop(); > + bdrv_drain_all_end(); > + return ret; > } > > > diff --git a/blockdev.c b/blockdev.c > index 21443b4514..3982f9776b 100644 > --- a/blockdev.c > +++ b/blockdev.c > @@ -1132,39 +1132,41 @@ SnapshotInfo > *qmp_blockdev_snapshot_delete_internal_sync(const char *device, > int ret; > > GLOBAL_STATE_CODE(); > - GRAPH_RDLOCK_GUARD_MAINLOOP(); > + > + bdrv_drain_all_begin(); > + bdrv_graph_rdlock_main_loop(); > > bs = qmp_get_root_bs(device, errp); > if (!bs) { > - return NULL; > + goto error; > } > > if (!id && !name) { > error_setg(errp, "Name or id must be provided"); > - return NULL; > + goto error; > } > > if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, > errp)) { > - return NULL; > + goto error; > } > > ret = bdrv_snapshot_find_by_id_and_name(bs, id, name, &sn, &local_err); > if (local_err) { > error_propagate(errp, local_err); > - return NULL; > + goto error; > } > if (!ret) { > error_setg(errp, > "Snapshot with id '%s' and name '%s' does not exist on " > "device '%s'", > STR_OR_NULL(id), STR_OR_NULL(name), device); > - return NULL; > + goto error; > } > > bdrv_snapshot_delete(bs, id, name, &local_err); > if (local_err) { > error_propagate(errp, local_err); > - return NULL; > + goto error; > } > > info = g_new0(SnapshotInfo, 1); > @@ -1180,6 +1182,9 @@ SnapshotInfo > *qmp_blockdev_snapshot_delete_internal_sync(const char *device, > info->has_icount = true; > } > > +error: > + bdrv_graph_rdunlock_main_loop(); > + bdrv_drain_all_end(); > return info; > } > > @@ -1295,12 +1300,14 @@ static void internal_snapshot_abort(void *opaque) > Error *local_error = NULL; > > GLOBAL_STATE_CODE(); > - GRAPH_RDLOCK_GUARD_MAINLOOP(); > > if (!state->created) { > return; > } > > + bdrv_drain_all_begin(); > + bdrv_graph_rdlock_main_loop(); > + > if (bdrv_snapshot_delete(bs, sn->id_str, sn->name, &local_error) < 0) { > error_reportf_err(local_error, > "Failed to delete snapshot with id '%s' and " > @@ -1308,6 +1315,8 @@ static void internal_snapshot_abort(void *opaque) > sn->id_str, sn->name, > bdrv_get_device_name(bs)); > } > + bdrv_graph_rdunlock_main_loop(); > + bdrv_drain_all_end(); > } >
Okay, I've got a very simple and naive question to ask. We've got this pattern recurring throughout the series: > GLOBAL_STATE_CODE(); > bdrv_drain_all_begin(); > bdrv_graph_rdlock_main_loop(); > > ... > > bdrv_graph_rdunlock_main_loop(); > bdrv_drain_all_end(); bdrv_graph_rdlock_main_loop() doesn't actually take any locks, it asserts that we're running in the main thread and not in a coroutine. bdrv_graph_rdunlock_main_loop() does the same. GRAPH_RDLOCK_GUARD_MAINLOOP() does both those calls, in the beginning of a function and when leaving its scope, so essentially it also just does assert(qemu_in_main_thread() && !qemu_in_coroutine()). Therefore: 1. Is there any real benefit from using those {rdlock/rdunlock}_main_loop() constructions, or they're here due to historical reasons only? 2. Would it hurt if we only leave GRAPH_RDLOCK_GUARD_MAINLOOP() in all such occurrences? At least when it's obvious we can't get out of the main thread. That would simply deliver us from performing same checks several times, similar to what's done in commit 22/24 ("block/io: remove duplicate GLOBAL_STATE_CODE() in bdrv_do_drained_end()"). > static void internal_snapshot_clean(void *opaque) > diff --git a/qemu-img.c b/qemu-img.c > index 76ac5d3028..e81b0fbb6c 100644 > --- a/qemu-img.c > +++ b/qemu-img.c > @@ -3505,6 +3505,7 @@ static int img_snapshot(int argc, char **argv) > break; > > case SNAPSHOT_DELETE: > + bdrv_drain_all_begin(); > bdrv_graph_rdlock_main_loop(); > ret = bdrv_snapshot_find(bs, &sn, snapshot_name); > if (ret < 0) { > @@ -3520,6 +3521,7 @@ static int img_snapshot(int argc, char **argv) > } > } > bdrv_graph_rdunlock_main_loop(); > + bdrv_drain_all_end(); > break; > } >