On Tue, 12 Nov 2019 22:35:17 +0530 Kirti Wankhede <kwankh...@nvidia.com> wrote:
> VM state change handler gets called on change in VM's state. This is used to > set > VFIO device state to _RUNNING. > > Signed-off-by: Kirti Wankhede <kwankh...@nvidia.com> > Reviewed-by: Neo Jia <c...@nvidia.com> > --- > hw/vfio/migration.c | 69 > +++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/trace-events | 2 ++ > include/hw/vfio/vfio-common.h | 4 +++ > 3 files changed, 75 insertions(+) > > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > index c17bd1b0b934..28981a759e6c 100644 > --- a/hw/vfio/migration.c > +++ b/hw/vfio/migration.c > @@ -10,6 +10,7 @@ > #include "qemu/osdep.h" > #include <linux/vfio.h> > > +#include "sysemu/runstate.h" > #include "hw/vfio/vfio-common.h" > #include "cpu.h" > #include "migration/migration.h" > @@ -74,6 +75,67 @@ err: > return ret; > } > > +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t set_flags, > + uint32_t clear_flags) > +{ Perhaps a mask and value interface like we have elsewhere? > + VFIOMigration *migration = vbasedev->migration; > + VFIORegion *region = &migration->region; > + uint32_t device_state; > + int ret = 0; > + > + /* same flags should not be set or clear */ > + assert(!(set_flags & clear_flags)); mask/value avoids this sort of thing. > + device_state = (vbasedev->device_state | set_flags) & ~clear_flags; Don't we need to re-read device_state from the region? We can't predict what those reserved bits will be used for, they could be volatile. If we adopt that a reset returns to running, our cached state may be stale. > + > + switch (device_state & VFIO_DEVICE_STATE_MASK) { > + case VFIO_DEVICE_STATE_INVALID_CASE1: > + case VFIO_DEVICE_STATE_INVALID_CASE2: > + return -EINVAL; > + } I like the VALID macro better. > + > + ret = pwrite(vbasedev->fd, &device_state, sizeof(device_state), > + region->fd_offset + offsetof(struct > vfio_device_migration_info, > + device_state)); > + if (ret < 0) { > + error_report("%s: Failed to set device state %d %s", > + vbasedev->name, ret, strerror(errno)); > + return ret; > + } > + > + vbasedev->device_state = device_state; Are we opposed to re-reading device_state, here and in the error case above? > + trace_vfio_migration_set_state(vbasedev->name, device_state); > + return 0; > +} > + > +static void vfio_vmstate_change(void *opaque, int running, RunState state) > +{ > + VFIODevice *vbasedev = opaque; > + > + if ((vbasedev->vm_running != running)) { > + int ret; > + uint32_t set_flags = 0, clear_flags = 0; > + > + if (running) { > + set_flags = VFIO_DEVICE_STATE_RUNNING; > + if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) { > + clear_flags = VFIO_DEVICE_STATE_RESUMING; > + } > + } else { > + clear_flags = VFIO_DEVICE_STATE_RUNNING; > + } > + > + ret = vfio_migration_set_state(vbasedev, set_flags, clear_flags); > + if (ret) { > + error_report("%s: Failed to set device state 0x%x", > + vbasedev->name, set_flags & ~clear_flags); > + } > + vbasedev->vm_running = running; We're effectively storing running both in vbasedev->device_state and vbasedev->vm_running, why? Seems like this could trivially know the initial state of the device is running. > + trace_vfio_vmstate_change(vbasedev->name, running, > RunState_str(state), > + set_flags & ~clear_flags); > + } > +} > + > static int vfio_migration_init(VFIODevice *vbasedev, > struct vfio_region_info *info) > { > @@ -89,6 +151,9 @@ static int vfio_migration_init(VFIODevice *vbasedev, > return ret; > } > > + vbasedev->vm_state = > qemu_add_vm_change_state_handler(vfio_vmstate_change, > + vbasedev); > + > return 0; > } > > @@ -127,6 +192,10 @@ add_blocker: > > void vfio_migration_finalize(VFIODevice *vbasedev) > { > + if (vbasedev->vm_state) { > + qemu_del_vm_change_state_handler(vbasedev->vm_state); > + } > + > if (vbasedev->migration_blocker) { > migrate_del_blocker(vbasedev->migration_blocker); > error_free(vbasedev->migration_blocker); > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index 191a726a1312..3d15bacd031a 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -146,3 +146,5 @@ vfio_display_edid_write_error(void) "" > > # migration.c > vfio_migration_probe(char *name, uint32_t index) " (%s) Region %d" > +vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d" > +vfio_vmstate_change(char *name, int running, const char *reason, uint32_t > dev_state) " (%s) running %d reason %s device state %d" > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index 927511897a44..6573acd6738e 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -29,6 +29,7 @@ > #ifdef CONFIG_LINUX > #include <linux/vfio.h> > #endif > +#include "sysemu/sysemu.h" > > #define VFIO_MSG_PREFIX "vfio %s: " > > @@ -120,6 +121,9 @@ typedef struct VFIODevice { > unsigned int flags; > VFIOMigration *migration; > Error *migration_blocker; > + uint32_t device_state; > + VMChangeStateEntry *vm_state; > + int vm_running; Isn't this effectively a bool per our usage. Field ordering is wasteful. > } VFIODevice; > > struct VFIODeviceOps {