On 2011-02-07 14:40, Glauber Costa wrote: > On Mon, 2011-02-07 at 13:36 +0100, Jan Kiszka wrote: >> On 2011-02-07 13:27, Glauber Costa wrote: >>> On Mon, 2011-02-07 at 12:19 +0100, Jan Kiszka wrote: >>>> If kvmclock is used, which implies the kernel supports it, register a >>>> kvmclock device with the sysbus. Its main purpose is to save and restore >>>> the kernel state on migration, but this will also allow to visualize it >>>> one day. >>>> >>>> Signed-off-by: Jan Kiszka <jan.kis...@siemens.com> >>>> CC: Glauber Costa <glom...@redhat.com> >>>> --- >>>> Makefile.target | 4 +- >>>> hw/kvmclock.c | 125 >>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++++ >>>> hw/kvmclock.h | 14 ++++++ >>>> hw/pc_piix.c | 31 +++++++++++--- >>>> 4 files changed, 165 insertions(+), 9 deletions(-) >>>> create mode 100644 hw/kvmclock.c >>>> create mode 100644 hw/kvmclock.h >>>> >>>> diff --git a/Makefile.target b/Makefile.target >>>> index b0ba95f..30232fa 100644 >>>> --- a/Makefile.target >>>> +++ b/Makefile.target >>>> @@ -37,7 +37,7 @@ ifndef CONFIG_HAIKU >>>> LIBS+=-lm >>>> endif >>>> >>>> -kvm.o kvm-all.o vhost.o vhost_net.o: QEMU_CFLAGS+=$(KVM_CFLAGS) >>>> +kvm.o kvm-all.o vhost.o vhost_net.o kvmclock.o: QEMU_CFLAGS+=$(KVM_CFLAGS) >>>> >>>> config-target.h: config-target.h-timestamp >>>> config-target.h-timestamp: config-target.mak >>>> @@ -218,7 +218,7 @@ obj-i386-y += cirrus_vga.o apic.o ioapic.o piix_pci.o >>>> obj-i386-y += vmmouse.o vmport.o hpet.o applesmc.o >>>> obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o >>>> obj-i386-y += debugcon.o multiboot.o >>>> -obj-i386-y += pc_piix.o >>>> +obj-i386-y += pc_piix.o kvmclock.o >>>> obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o >>>> >>>> # shared objects >>>> diff --git a/hw/kvmclock.c b/hw/kvmclock.c >>>> new file mode 100644 >>>> index 0000000..b6ceddf >>>> --- /dev/null >>>> +++ b/hw/kvmclock.c >>>> @@ -0,0 +1,125 @@ >>>> +/* >>>> + * QEMU KVM support, paravirtual clock device >>>> + * >>>> + * Copyright (C) 2011 Siemens AG >>>> + * >>>> + * Authors: >>>> + * Jan Kiszka <jan.kis...@siemens.com> >>>> + * >>>> + * This work is licensed under the terms of the GNU GPL version 2. >>>> + * See the COPYING file in the top-level directory. >>>> + * >>>> + */ >>>> + >>>> +#include "qemu-common.h" >>>> +#include "sysemu.h" >>>> +#include "sysbus.h" >>>> +#include "kvm.h" >>>> +#include "kvmclock.h" >>>> + >>>> +#if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ADJUST_CLOCK) >>>> + >>>> +#include <linux/kvm.h> >>>> +#include <linux/kvm_para.h> >>>> + >>>> +typedef struct KVMClockState { >>>> + SysBusDevice busdev; >>>> + uint64_t clock; >>>> + bool clock_valid; >>>> +} KVMClockState; >>>> + >>>> +static void kvmclock_pre_save(void *opaque) >>>> +{ >>>> + KVMClockState *s = opaque; >>>> + struct kvm_clock_data data; >>>> + int ret; >>>> + >>>> + if (s->clock_valid) { >>>> + return; >>>> + } >>>> + ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); >>>> + if (ret < 0) { >>>> + fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); >>>> + data.clock = 0; >>>> + } >>>> + s->clock = data.clock; >>>> + /* >>>> + * If the VM is stopped, declare the clock state valid to avoid >>>> re-reading >>>> + * it on next vmsave (which would return a different value). Will be >>>> reset >>>> + * when the VM is continued. >>>> + */ >>>> + s->clock_valid = !vm_running; >>>> +} >>>> + >>>> +static int kvmclock_post_load(void *opaque, int version_id) >>>> +{ >>>> + KVMClockState *s = opaque; >>>> + struct kvm_clock_data data; >>>> + >>>> + data.clock = s->clock; >>>> + data.flags = 0; >>>> + return kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data); >>>> +} >>>> + >>>> +static void kvmclock_vm_state_change(void *opaque, int running, int >>>> reason) >>>> +{ >>>> + KVMClockState *s = opaque; >>>> + >>>> + if (running) { >>>> + s->clock_valid = false; >>>> + } >>>> +} >>>> + >>>> +static int kvmclock_init(SysBusDevice *dev) >>>> +{ >>>> + KVMClockState *s = FROM_SYSBUS(KVMClockState, dev); >>>> + >>>> + qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); >>>> + return 0; >>>> +} >>>> + >>>> +static const VMStateDescription kvmclock_vmsd = { >>>> + .name = "kvmclock", >>>> + .version_id = 1, >>>> + .minimum_version_id = 1, >>>> + .minimum_version_id_old = 1, >>>> + .pre_save = kvmclock_pre_save, >>>> + .post_load = kvmclock_post_load, >>>> + .fields = (VMStateField[]) { >>>> + VMSTATE_UINT64(clock, KVMClockState), >>>> + VMSTATE_END_OF_LIST() >>>> + } >>>> +}; >>>> + >>>> +static SysBusDeviceInfo kvmclock_info = { >>>> + .qdev.name = "kvmclock", >>>> + .qdev.size = sizeof(KVMClockState), >>>> + .qdev.vmsd = &kvmclock_vmsd, >>>> + .qdev.no_user = 1, >>>> + .init = kvmclock_init, >>>> +}; >>>> + >>>> +/* Note: Must be called after VCPU initialization. */ >>>> +void kvmclock_create(void) >>>> +{ >>>> + if (kvm_enabled() && >>>> + first_cpu->cpuid_kvm_features & (1ULL << >>>> KVM_FEATURE_CLOCKSOURCE)) { >>>> + sysbus_create_simple("kvmclock", -1, NULL); >>>> + } >>>> +} >>>> + >>>> +static void kvmclock_register_device(void) >>>> +{ >>>> + if (kvm_enabled()) { >>>> + sysbus_register_withprop(&kvmclock_info); >>>> + } >>>> +} >>>> + >>>> +device_init(kvmclock_register_device); >>>> + >>>> +#else /* !(CONFIG_KVM_PARA && KVM_CAP_ADJUST_CLOCK) */ >>>> + >>>> +void kvmclock_create(void) >>>> +{ >>>> +} >>>> +#endif /* !(CONFIG_KVM_PARA && KVM_CAP_ADJUST_CLOCK) */ >>>> diff --git a/hw/kvmclock.h b/hw/kvmclock.h >>>> new file mode 100644 >>>> index 0000000..7a83cbe >>>> --- /dev/null >>>> +++ b/hw/kvmclock.h >>>> @@ -0,0 +1,14 @@ >>>> +/* >>>> + * QEMU KVM support, paravirtual clock device >>>> + * >>>> + * Copyright (C) 2011 Siemens AG >>>> + * >>>> + * Authors: >>>> + * Jan Kiszka <jan.kis...@siemens.com> >>>> + * >>>> + * This work is licensed under the terms of the GNU GPL version 2. >>>> + * See the COPYING file in the top-level directory. >>>> + * >>>> + */ >>>> + >>>> +void kvmclock_create(void); >>>> diff --git a/hw/pc_piix.c b/hw/pc_piix.c >>>> index 7b74473..9bc4659 100644 >>>> --- a/hw/pc_piix.c >>>> +++ b/hw/pc_piix.c >>>> @@ -32,6 +32,7 @@ >>>> #include "boards.h" >>>> #include "ide.h" >>>> #include "kvm.h" >>>> +#include "kvmclock.h" >>>> #include "sysemu.h" >>>> #include "sysbus.h" >>>> #include "arch_init.h" >>>> @@ -66,7 +67,8 @@ static void pc_init1(ram_addr_t ram_size, >>>> const char *kernel_cmdline, >>>> const char *initrd_filename, >>>> const char *cpu_model, >>>> - int pci_enabled) >>>> + int pci_enabled, >>>> + int kvmclock_enabled) >>>> >>> What exactly is your motivation to that ? I think mid/long-term >>> we should be making machine initialization more common among >>> architectures, not introducing more arch specific, or even worse, kvm >>> specific parameters here. >>> >>> I'd like to understand what do we gain from that, since opting kvmclock >>> in our out is done by cpuid anyway - no need for a specific machine. >> >> Is that really the case? I thought we were already shipping versions >> where that CPU feature was enabled by default. If not, I'll happily drop >> that admittedly clumsy approach above. > > Yes, AFAIK, kvmclock is enabled by default, disabled by cpuid-leaf, as > in > -cpu kvm64,-kvmclock > > So your test for cpuid bit before starting kvmclock should already cover > it. >
No, just the contrary: As kvmclock was always enabled in older versions and the compat machines also expose it, we cannot rely on the flag to enable this new (and therefore 0.15-only) vmstate. Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux