On Sun, 18 Oct 2020 02:00:44 +0530 Kirti Wankhede <kwankh...@nvidia.com> wrote:
> On 9/26/2020 1:50 AM, Alex Williamson wrote: > > On Wed, 23 Sep 2020 04:54:07 +0530 > > Kirti Wankhede <kwankh...@nvidia.com> wrote: > > > >> VM state change handler gets called on change in VM's state. This is used > >> to set > >> VFIO device state to _RUNNING. > >> > >> Signed-off-by: Kirti Wankhede <kwankh...@nvidia.com> > >> Reviewed-by: Neo Jia <c...@nvidia.com> > >> Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> > >> --- > >> hw/vfio/migration.c | 136 > >> ++++++++++++++++++++++++++++++++++++++++++ > >> hw/vfio/trace-events | 3 +- > >> include/hw/vfio/vfio-common.h | 4 ++ > >> 3 files changed, 142 insertions(+), 1 deletion(-) > >> > >> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > >> index 2f760f1f9c47..a30d628ba963 100644 > >> --- a/hw/vfio/migration.c > >> +++ b/hw/vfio/migration.c > >> @@ -10,6 +10,7 @@ > >> #include "qemu/osdep.h" > >> #include <linux/vfio.h> > >> > >> +#include "sysemu/runstate.h" > >> #include "hw/vfio/vfio-common.h" > >> #include "cpu.h" > >> #include "migration/migration.h" > >> @@ -22,6 +23,58 @@ > >> #include "exec/ram_addr.h" > >> #include "pci.h" > >> #include "trace.h" > >> +#include "hw/hw.h" > >> + > >> +static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int > >> count, > >> + off_t off, bool iswrite) > >> +{ > >> + int ret; > >> + > >> + ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : > >> + pread(vbasedev->fd, val, count, off); > >> + if (ret < count) { > >> + error_report("vfio_mig_%s%d %s: failed at offset 0x%lx, err: %s", > >> + iswrite ? "write" : "read", count * 8, > >> + vbasedev->name, off, strerror(errno)); > > > > This would suggest from the log that there's, for example, a > > vfio_mig_read8 function, which doesn't exist. > > > > Changing to: > error_report("vfio_mig_%s %d byte %s: failed at offset 0x%lx, err: %s", > iswrite ? "write" : "read", count, > vbasedev->name, off, strerror(errno)); > Hope this address your concern. > > >> + return (ret < 0) ? ret : -EINVAL; > >> + } > >> + return 0; > >> +} > >> + > >> +static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count, > >> + off_t off, bool iswrite) > >> +{ > >> + int ret, done = 0; > >> + __u8 *tbuf = buf; > >> + > >> + while (count) { > >> + int bytes = 0; > >> + > >> + if (count >= 8 && !(off % 8)) { > >> + bytes = 8; > >> + } else if (count >= 4 && !(off % 4)) { > >> + bytes = 4; > >> + } else if (count >= 2 && !(off % 2)) { > >> + bytes = 2; > >> + } else { > >> + bytes = 1; > >> + } > >> + > >> + ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite); > >> + if (ret) { > >> + return ret; > >> + } > >> + > >> + count -= bytes; > >> + done += bytes; > >> + off += bytes; > >> + tbuf += bytes; > >> + } > >> + return done; > >> +} > >> + > >> +#define vfio_mig_read(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, > >> false) > >> +#define vfio_mig_write(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, > >> true) > >> > >> static void vfio_migration_region_exit(VFIODevice *vbasedev) > >> { > >> @@ -70,6 +123,82 @@ err: > >> return ret; > >> } > >> > >> +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, > >> + uint32_t value) > >> +{ > >> + VFIOMigration *migration = vbasedev->migration; > >> + VFIORegion *region = &migration->region; > >> + off_t dev_state_off = region->fd_offset + > >> + offsetof(struct vfio_device_migration_info, > >> device_state); > >> + uint32_t device_state; > >> + int ret; > >> + > >> + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), > >> + dev_state_off); > >> + if (ret < 0) { > >> + return ret; > >> + } > >> + > >> + device_state = (device_state & mask) | value; > > > > Agree with Connie that mask and value args are not immediately obvious > > how they're used. I don't have a naming convention that would be more > > clear and the names do make some sense once they're understood, but a > > comment to indicate mask bits are preserved, value bits are set, > > remaining bits are cleared would probably help the reader. > > > > Added comment. > > >> + > >> + if (!VFIO_DEVICE_STATE_VALID(device_state)) { > >> + return -EINVAL; > >> + } > >> + > >> + ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), > >> + dev_state_off); > >> + if (ret < 0) { > >> + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), > >> + dev_state_off); > >> + if (ret < 0) { > >> + return ret; > > > > Seems like we're in pretty bad shape here, should this be combined with > > below to trigger a hw_error? > > > > Ok. > > >> + } > >> + > >> + if (VFIO_DEVICE_STATE_IS_ERROR(device_state)) { > >> + hw_error("%s: Device is in error state 0x%x", > >> + vbasedev->name, device_state); > >> + return -EFAULT; > >> + } > >> + } > >> + > >> + vbasedev->device_state = device_state; > >> + trace_vfio_migration_set_state(vbasedev->name, device_state); > >> + return 0; > > > > So we return success even if we failed to write the desired state as > > long as we were able to read back any non-error state? > > vbasedev->device_state remains correct, but it seems confusing form a > > caller perspective that a set-state can succeed but it's then necessary > > to check the state. > > > > Correcting here. If vfio_mig_write() had retured error, return error > from vfio_migration_set_state() > > >> +} > >> + > >> +static void vfio_vmstate_change(void *opaque, int running, RunState state) > >> +{ > >> + VFIODevice *vbasedev = opaque; > >> + > >> + if ((vbasedev->vm_running != running)) { > >> + int ret; > >> + uint32_t value = 0, mask = 0; > >> + > >> + if (running) { > >> + value = VFIO_DEVICE_STATE_RUNNING; > >> + if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) { > >> + mask = ~VFIO_DEVICE_STATE_RESUMING; > >> + } > >> + } else { > >> + mask = ~VFIO_DEVICE_STATE_RUNNING; > >> + } > >> + > >> + ret = vfio_migration_set_state(vbasedev, mask, value); > >> + if (ret) { > >> + /* > >> + * vm_state_notify() doesn't support reporting failure. If > >> such > >> + * error reporting support added in furure, migration should > >> be > >> + * aborted. > >> + */ > >> + error_report("%s: Failed to set device state 0x%x", > >> + vbasedev->name, value & mask); > >> + } > > > > Here for instance we assume that success means the device is now in the > > desired state, but we'd actually need to evaluate > > vbasedev->device_state to determine that. > > > > Updating. > > >> + vbasedev->vm_running = running; > >> + trace_vfio_vmstate_change(vbasedev->name, running, > >> RunState_str(state), > >> + value & mask); > >> + } > >> +} > >> + > >> static int vfio_migration_init(VFIODevice *vbasedev, > >> struct vfio_region_info *info) > >> { > >> @@ -87,8 +216,11 @@ static int vfio_migration_init(VFIODevice *vbasedev, > >> vbasedev->name); > >> g_free(vbasedev->migration); > >> vbasedev->migration = NULL; > >> + return ret; > >> } > >> > >> + vbasedev->vm_state = > >> qemu_add_vm_change_state_handler(vfio_vmstate_change, > >> + vbasedev); > >> return ret; > >> } > >> > >> @@ -131,6 +263,10 @@ add_blocker: > >> > >> void vfio_migration_finalize(VFIODevice *vbasedev) > >> { > >> + if (vbasedev->vm_state) { > >> + qemu_del_vm_change_state_handler(vbasedev->vm_state); > >> + } > >> + > >> if (vbasedev->migration_blocker) { > >> migrate_del_blocker(vbasedev->migration_blocker); > >> error_free(vbasedev->migration_blocker); > >> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > >> index 8fe913175d85..6524734bf7b4 100644 > >> --- a/hw/vfio/trace-events > >> +++ b/hw/vfio/trace-events > >> @@ -149,4 +149,5 @@ vfio_display_edid_write_error(void) "" > >> > >> # migration.c > >> vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" > >> - > >> +vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d" > >> +vfio_vmstate_change(char *name, int running, const char *reason, uint32_t > >> dev_state) " (%s) running %d reason %s device state %d" > >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > >> index 8275c4c68f45..25e3b1a3b90a 100644 > >> --- a/include/hw/vfio/vfio-common.h > >> +++ b/include/hw/vfio/vfio-common.h > >> @@ -29,6 +29,7 @@ > >> #ifdef CONFIG_LINUX > >> #include <linux/vfio.h> > >> #endif > >> +#include "sysemu/sysemu.h" > >> > >> #define VFIO_MSG_PREFIX "vfio %s: " > >> > >> @@ -119,6 +120,9 @@ typedef struct VFIODevice { > >> unsigned int flags; > >> VFIOMigration *migration; > >> Error *migration_blocker; > >> + VMChangeStateEntry *vm_state; > >> + uint32_t device_state; > >> + int vm_running; > > > > Could these be placed in VFIOMigration? Thanks, > > > > I think device_state should be part of VFIODevice since its about device > rather than only related to migration, others can be moved to VFIOMigration. But these are only valid when migration is supported and thus when VFIOMigration exists. Thanks, Alex