> 2025年3月19日 22:50,Stefano Garzarella <sgarz...@redhat.com> 写道:
> 
> On Fri, Mar 14, 2025 at 06:15:32AM -0400, Haoqian He wrote:
>> This patch contains two changes:
>> 
>> 1. Add VM state change cb type VMChangeStateHandlerExt which has return
>> value for virtio devices VMChangeStateEntry. When VM state changes,
>> virtio device will call the _Ext version.
>> 
>> 2. Add return value for vm_state_notify().
> 
> Can you explain why these changes are needed?

[1] The first and second patches are both pre-patch of the third patch, and the 
reason
for these changes is in the email cover and the third patch commit message.
I will briefly explain it in the commit message of the pre-patch.

> 
>> 
>> Signed-off-by: Haoqian He <haoqian...@smartx.com>
>> ---
>> hw/block/virtio-blk.c             |  2 +-
>> hw/core/vm-change-state-handler.c | 14 ++++++++------
>> hw/scsi/scsi-bus.c                |  2 +-
>> hw/vfio/migration.c               |  2 +-
>> hw/virtio/virtio.c                |  5 +++--
>> include/system/runstate.h         | 11 ++++++++---
>> system/cpus.c                     |  4 ++--
>> system/runstate.c                 | 25 ++++++++++++++++++++-----
>> 8 files changed, 44 insertions(+), 21 deletions(-)
>> 
>> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
>> index 5135b4d8f1..4a48a16790 100644
>> --- a/hw/block/virtio-blk.c
>> +++ b/hw/block/virtio-blk.c
>> @@ -1928,7 +1928,7 @@ static void virtio_blk_device_realize(DeviceState 
>> *dev, Error **errp)
>>     * called after ->start_ioeventfd() has already set blk's AioContext.
>>     */
>>    s->change =
>> -        qdev_add_vm_change_state_handler(dev, virtio_blk_dma_restart_cb, s);
>> +        qdev_add_vm_change_state_handler(dev, virtio_blk_dma_restart_cb, 
>> NULL, s);
>> 
>>    blk_ram_registrar_init(&s->blk_ram_registrar, s->blk);
>>    blk_set_dev_ops(s->blk, &virtio_block_ops, s);
>> diff --git a/hw/core/vm-change-state-handler.c 
>> b/hw/core/vm-change-state-handler.c
>> index 7064995578..d5045b17c1 100644
>> --- a/hw/core/vm-change-state-handler.c
>> +++ b/hw/core/vm-change-state-handler.c
>> @@ -40,6 +40,7 @@ static int qdev_get_dev_tree_depth(DeviceState *dev)
>> * qdev_add_vm_change_state_handler:
>> * @dev: the device that owns this handler
>> * @cb: the callback function to be invoked
>> + * @cb_ext: the callback function with return value to be invoked
> 
> I think we need to document what happens if both `cb` and `cb_ext` are 
> specified.

Indeed, it's best if `cb` and `cb_ext` are mutually exclusive, what about 
adding an
assert to ensure this.

> Also `cb_ext` is very cryptic, I'm not good with names, but what about 
> something like `cb_ret`?

Acked.

> 
>> * @opaque: user data passed to the callback function
>> *
>> * This function works like qemu_add_vm_change_state_handler() except 
>> callbacks
>> @@ -54,21 +55,22 @@ static int qdev_get_dev_tree_depth(DeviceState *dev)
>> */
>> VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev,
>>                                                     VMChangeStateHandler *cb,
>> +                                                     
>> VMChangeStateHandlerExt *cb_ext,
>>                                                     void *opaque)
>> {
>> -    return qdev_add_vm_change_state_handler_full(dev, cb, NULL, opaque);
>> +    return qdev_add_vm_change_state_handler_full(dev, cb, NULL, cb_ext, 
>> opaque);
>> }
>> 
>> /*
>> * Exactly like qdev_add_vm_change_state_handler() but passes a prepare_cb
>> - * argument too.
>> + * and the cb_ext arguments too.
>> */
>> VMChangeStateEntry *qdev_add_vm_change_state_handler_full(
>> -    DeviceState *dev, VMChangeStateHandler *cb,
>> -    VMChangeStateHandler *prepare_cb, void *opaque)
>> +    DeviceState *dev, VMChangeStateHandler *cb, VMChangeStateHandler 
>> *prepare_cb,
>> +    VMChangeStateHandlerExt *cb_ext, void *opaque)
> 
>> {
>>    int depth = qdev_get_dev_tree_depth(dev);
>> 
>> -    return qemu_add_vm_change_state_handler_prio_full(cb, prepare_cb, 
>> opaque,
>> -                                                      depth);
>> +    return qemu_add_vm_change_state_handler_prio_full(cb, prepare_cb, 
>> cb_ext,
>> +                                                      opaque, depth);
>> }
>> diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
>> index 7d4546800f..ec098f5f0a 100644
>> --- a/hw/scsi/scsi-bus.c
>> +++ b/hw/scsi/scsi-bus.c
>> @@ -356,7 +356,7 @@ static void scsi_qdev_realize(DeviceState *qdev, Error 
>> **errp)
>>        return;
>>    }
>>    dev->vmsentry = qdev_add_vm_change_state_handler(DEVICE(dev),
>> -            scsi_dma_restart_cb, dev);
>> +            scsi_dma_restart_cb, NULL, dev);
>> }
>> 
>> static void scsi_qdev_unrealize(DeviceState *qdev)
>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>> index 416643ddd6..f531db83ea 100644
>> --- a/hw/vfio/migration.c
>> +++ b/hw/vfio/migration.c
>> @@ -1015,7 +1015,7 @@ static int vfio_migration_init(VFIODevice *vbasedev)
>>                     vfio_vmstate_change_prepare :
>>                     NULL;
>>    migration->vm_state = qdev_add_vm_change_state_handler_full(
>> -        vbasedev->dev, vfio_vmstate_change, prepare_cb, vbasedev);
>> +        vbasedev->dev, vfio_vmstate_change, prepare_cb, NULL, vbasedev);
>>    migration_add_notifier(&migration->migration_state,
>>                           vfio_migration_state_notifier);
>> 
>> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
>> index 85110bce37..5e8d4cab53 100644
>> --- a/hw/virtio/virtio.c
>> +++ b/hw/virtio/virtio.c
>> @@ -3419,7 +3419,7 @@ void virtio_cleanup(VirtIODevice *vdev)
>>    qemu_del_vm_change_state_handler(vdev->vmstate);
>> }
>> 
>> -static void virtio_vmstate_change(void *opaque, bool running, RunState 
>> state)
>> +static int virtio_vmstate_change(void *opaque, bool running, RunState state)
>> {
>>    VirtIODevice *vdev = opaque;
>>    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
>> @@ -3438,6 +3438,7 @@ static void virtio_vmstate_change(void *opaque, bool 
>> running, RunState state)
>>    if (!backend_run) {
>>        virtio_set_status(vdev, vdev->status);
>>    }
>> +    return 0;
>> }
>> 
>> void virtio_instance_init_common(Object *proxy_obj, void *data,
>> @@ -3489,7 +3490,7 @@ void virtio_init(VirtIODevice *vdev, uint16_t 
>> device_id, size_t config_size)
>>        vdev->config = NULL;
>>    }
>>    vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
>> -            virtio_vmstate_change, vdev);
>> +            NULL, virtio_vmstate_change, vdev);
> 
> IIUC virtio_vmstate_change now returns always 0, so what is the point of
> this change?
> 
> I'd also do this change in a separate commit if it's really needed.

According to [1], the pre-patch just adds a return value for vm_state_notify,
and the next patches returns an error in the virtio and vhost-user code.

> 
>>    vdev->device_endian = virtio_default_endian();
>>    vdev->use_guest_notifier_mask = true;
>> }
>> diff --git a/include/system/runstate.h b/include/system/runstate.h
>> index bffc3719d4..af33ea92b6 100644
>> --- a/include/system/runstate.h
>> +++ b/include/system/runstate.h
>> @@ -12,6 +12,7 @@ bool runstate_needs_reset(void);
>> void runstate_replay_enable(void);
>> 
>> typedef void VMChangeStateHandler(void *opaque, bool running, RunState 
>> state);
>> +typedef int VMChangeStateHandlerExt(void *opaque, bool running, RunState 
>> state);
> 
> Ditto about "Ext"
> 
>> 
>> VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler 
>> *cb,
>>                                                     void *opaque);
>> @@ -20,21 +21,25 @@ VMChangeStateEntry 
>> *qemu_add_vm_change_state_handler_prio(
>> VMChangeStateEntry *
>> qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
>>                                           VMChangeStateHandler *prepare_cb,
>> +                                           VMChangeStateHandlerExt *cb_ext,
>>                                           void *opaque, int priority);
>> VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev,
>>                                                     VMChangeStateHandler *cb,
>> +                                                     
>> VMChangeStateHandlerExt *cb_ext,
>>                                                     void *opaque);
>> VMChangeStateEntry *qdev_add_vm_change_state_handler_full(
>> -    DeviceState *dev, VMChangeStateHandler *cb,
>> -    VMChangeStateHandler *prepare_cb, void *opaque);
>> +    DeviceState *dev, VMChangeStateHandler *cb, VMChangeStateHandler 
>> *prepare_cb,
>> +    VMChangeStateHandlerExt *cb_ext, void *opaque);
>> void qemu_del_vm_change_state_handler(VMChangeStateEntry *e);
>> /**
>> * vm_state_notify: Notify the state of the VM
>> *
>> * @running: whether the VM is running or not.
>> * @state: the #RunState of the VM.
>> + *
>> + * Return the result of the callback which has return value.
> 
> What if the callback has no return value?

vm_state_notify must have a return value. If all cb have no return value, it
will return 0 and the upper layer will do no additional processing as before.

> 
>> */
>> -void vm_state_notify(bool running, RunState state);
>> +int vm_state_notify(bool running, RunState state);
>> 
>> static inline bool shutdown_caused_by_guest(ShutdownCause cause)
>> {
>> diff --git a/system/cpus.c b/system/cpus.c
>> index 2cc5f887ab..6e1cf5720f 100644
>> --- a/system/cpus.c
>> +++ b/system/cpus.c
>> @@ -299,14 +299,14 @@ static int do_vm_stop(RunState state, bool send_stop)
>>        if (oldstate == RUN_STATE_RUNNING) {
>>            pause_all_vcpus();
>>        }
>> -        vm_state_notify(0, state);
>> +        ret = vm_state_notify(0, state);
>>        if (send_stop) {
>>            qapi_event_send_stop();
>>        }
>>    }
>> 
>>    bdrv_drain_all();
>> -    ret = bdrv_flush_all();
>> +    ret |= bdrv_flush_all();
> 
> Are we sure this is the right thing to do?
> If vm_state_notify() failed, shouldn't we go out first, and then why put them 
> in or?
> 
> I think it should be explained in the commit or in a comment here.

Even if vm_state_notify() failed, it might be better to flush as before.

> 
>>    trace_vm_stop_flush_all(ret);
>> 
>>    return ret;
>> diff --git a/system/runstate.c b/system/runstate.c
>> index 272801d307..2219cec409 100644
>> --- a/system/runstate.c
>> +++ b/system/runstate.c
>> @@ -297,6 +297,7 @@ void qemu_system_vmstop_request(RunState state)
>> struct VMChangeStateEntry {
>>    VMChangeStateHandler *cb;
>>    VMChangeStateHandler *prepare_cb;
>> +    VMChangeStateHandlerExt *cb_ext;
>>    void *opaque;
>>    QTAILQ_ENTRY(VMChangeStateEntry) entries;
>>    int priority;
>> @@ -320,14 +321,15 @@ static QTAILQ_HEAD(, VMChangeStateEntry) 
>> vm_change_state_head =
>> VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
>>        VMChangeStateHandler *cb, void *opaque, int priority)
>> {
>> -    return qemu_add_vm_change_state_handler_prio_full(cb, NULL, opaque,
>> -                                                      priority);
>> +    return qemu_add_vm_change_state_handler_prio_full(cb, NULL, NULL,
>> +                                                      opaque, priority);
>> }
>> 
>> /**
>> * qemu_add_vm_change_state_handler_prio_full:
>> * @cb: the main callback to invoke
>> * @prepare_cb: a callback to invoke before the main callback
>> + * @cb_ext: the main callback to invoke with return value
>> * @opaque: user data passed to the callbacks
>> * @priority: low priorities execute first when the vm runs and the reverse is
>> *            true when the vm stops
>> @@ -344,6 +346,7 @@ VMChangeStateEntry 
>> *qemu_add_vm_change_state_handler_prio(
>> VMChangeStateEntry *
>> qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
>>                                           VMChangeStateHandler *prepare_cb,
>> +                                           VMChangeStateHandlerExt *cb_ext,
>>                                           void *opaque, int priority)
>> {
>>    VMChangeStateEntry *e;
>> @@ -352,6 +355,7 @@ 
>> qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
>>    e = g_malloc0(sizeof(*e));
>>    e->cb = cb;
>>    e->prepare_cb = prepare_cb;
>> +    e->cb_ext = cb_ext;
>>    e->opaque = opaque;
>>    e->priority = priority;
>> 
>> @@ -379,9 +383,10 @@ void 
>> qemu_del_vm_change_state_handler(VMChangeStateEntry *e)
>>    g_free(e);
>> }
>> 
>> -void vm_state_notify(bool running, RunState state)
>> +int vm_state_notify(bool running, RunState state)
>> {
>>    VMChangeStateEntry *e, *next;
>> +    int ret = 0;
>> 
>>    trace_vm_state_notify(running, state, RunState_str(state));
>> 
>> @@ -393,7 +398,12 @@ void vm_state_notify(bool running, RunState state)
>>        }
>> 
>>        QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
>> -            e->cb(e->opaque, running, state);
>> +            if (e->cb) {
>> +                e->cb(e->opaque, running, state);
>> +            } else if (e->cb_ext) {
>> +                // no need to process the result when starting VM
> 
> Why?
> 
> (a good comment should explain more why than what we're doing which is pretty 
> clear)

Acked.
Since we only want to know the return value of callback when the stopping device
live migration.

> 
>> +                e->cb_ext(e->opaque, running, state);
>> +            }
>>        }
>>    } else {
>>        QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
>> @@ -403,9 +413,14 @@ void vm_state_notify(bool running, RunState state)
>>        }
>> 
>>        QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
>> -            e->cb(e->opaque, running, state);
>> +            if (e->cb) {
>> +                e->cb(e->opaque, running, state);
>> +            } else if (e->cb_ext) {
>> +                ret |= e->cb_ext(e->opaque, running, state);
> 
> I think putting them in or should be documented or at least explained 
> somewhere. It's not clear to me, but it's true that I don't know this code.

Let's add some comment here.

> 
>> +            }
>>        }
>>    }
>> +    return ret;
>> }
>> 
>> static ShutdownCause reset_requested;
>> -- 
>> 2.48.1



Reply via email to