On 09/05/2013 07:16 PM, Alexander Graf wrote: > > On 05.09.2013, at 06:54, Alexey Kardashevskiy wrote: > >> On 09/05/2013 02:30 PM, David Gibson wrote: >>> On Tue, Sep 03, 2013 at 05:31:42PM +1000, Alexey Kardashevskiy wrote: >>>> This allows guests to have a different timebase origin from the host. >>>> >>>> This is needed for migration, where a guest can migrate from one host >>>> to another and the two hosts might have a different timebase origin. >>>> However, the timebase seen by the guest must not go backwards, and >>>> should go forwards only by a small amount corresponding to the time >>>> taken for the migration. >>>> >>>> This is only supported for recent POWER hardware which has the TBU40 >>>> (timebase upper 40 bits) register. That includes POWER6, 7, 8 but not >>>> 970. >>>> >>>> This adds kvm_access_one_reg() to access a special register which is not >>>> in env->spr. >>>> >>>> The feature must be present in the host kernel. >>>> >>>> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> >>>> --- >>>> >>>> This is an RFC but not a final patch. Can break something but I just do >>>> not see what. >>>> >>>> --- >>>> hw/ppc/ppc.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ >>>> include/hw/ppc/ppc.h | 4 ++++ >>>> target-ppc/kvm.c | 23 +++++++++++++++++++++++ >>>> target-ppc/machine.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ >>>> trace-events | 3 +++ >>>> 5 files changed, 123 insertions(+) >>>> >>>> diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c >>>> index 1e3cab3..7d08c9a 100644 >>>> --- a/hw/ppc/ppc.c >>>> +++ b/hw/ppc/ppc.c >>>> @@ -31,6 +31,7 @@ >>>> #include "hw/loader.h" >>>> #include "sysemu/kvm.h" >>>> #include "kvm_ppc.h" >>>> +#include "trace.h" >>>> >>>> //#define PPC_DEBUG_IRQ >>>> #define PPC_DEBUG_TB >>>> @@ -796,6 +797,54 @@ static void cpu_ppc_set_tb_clk (void *opaque, >>>> uint32_t freq) >>>> cpu_ppc_store_purr(cpu, 0x0000000000000000ULL); >>>> } >>>> >>>> +/* >>>> + * Calculate timebase on the destination side of migration >>> >>>> + * We calculate new timebase offset as shown below: >>>> + * 1) Gtb2 = Gtb1 + max(tod2 - tod1, 0) >>>> + * Gtb2 = tb2 + off2 >>>> + * Gtb1 = tb1 + off1 >>>> + * 2) tb2 + off2 = tb1 + off1 + max(tod2 - tod1, 0) >>>> + * 3) off2 = tb1 - tb2 + off1 + max(tod2 - tod1, 0) >>>> + * >>>> + * where: >>>> + * Gtb2 - destination guest timebase >>>> + * tb2 - destination host timebase >>>> + * off2 - destination timebase offset >>>> + * tod2 - destination time of the day >>>> + * Gtb1 - source guest timebase >>>> + * tb1 - source host timebase >>>> + * off1 - source timebase offset >>>> + * tod1 - source time of the day >>>> + * >>>> + * The result we want is in @off2 >>>> + * >>>> + * Two conditions must be met for @off2: >>>> + * 1) off2 must be multiple of 2^24 ticks as it will be set via TBU40 SPR >>>> + * 2) Gtb2 >= Gtb1 >>> >>> What about the TCG case, where there is not host timebase, only a a >>> host system clock? >> >> >> cpu_get_real_ticks() returns ticks, this is what the patch cares about. >> What is the difference between KVM and TCG here? >> >> >>>> + */ >>>> +void cpu_ppc_adjust_tb_offset(ppc_tb_t *tb_env) >>>> +{ >>>> + uint64_t tb2, tod2, off2; >>>> + int ratio = tb_env->tb_freq / 1000000; >>>> + struct timeval tv; >>>> + >>>> + tb2 = cpu_get_real_ticks(); >>>> + gettimeofday(&tv, NULL); >>>> + tod2 = tv.tv_sec * 1000000 + tv.tv_usec; >>>> + >>>> + off2 = tb_env->timebase - tb2 + tb_env->tb_offset; >>>> + if (tod2 > tb_env->time_of_the_day) { >>>> + off2 += (tod2 - tb_env->time_of_the_day) * ratio; >>>> + } >>>> + off2 = ROUND_UP(off2, 1 << 24); >>>> + >>>> + trace_ppc_tb_adjust(tb_env->tb_offset, off2, >>>> + (int64_t)off2 - tb_env->tb_offset); >>>> + >>>> + tb_env->tb_offset = off2; >>>> +} >>>> + >>>> /* Set up (once) timebase frequency (in Hz) */ >>>> clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq) >>>> { >>>> diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h >>>> index 132ab97..235871c 100644 >>>> --- a/include/hw/ppc/ppc.h >>>> +++ b/include/hw/ppc/ppc.h >>>> @@ -32,6 +32,9 @@ struct ppc_tb_t { >>>> uint64_t purr_start; >>>> void *opaque; >>>> uint32_t flags; >>>> + /* Cached values for live migration purposes */ >>>> + uint64_t timebase; >>>> + uint64_t time_of_the_day; >>> >>> How is the time of day encoded here? >> >> >> Microseconds. I'll put a comment here, I just thought it is quite obvious >> as gettimeofday() returns microseconds. >> >> >>>> }; >>>> >>>> /* PPC Timers flags */ >>>> @@ -46,6 +49,7 @@ struct ppc_tb_t { >>>> */ >>>> >>>> uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t >>>> tb_offset); >>>> +void cpu_ppc_adjust_tb_offset(ppc_tb_t *tb_env); >>>> clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq); >>>> /* Embedded PowerPC DCR management */ >>>> typedef uint32_t (*dcr_read_cb)(void *opaque, int dcrn); >>>> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c >>>> index 7af9e3d..93df955 100644 >>>> --- a/target-ppc/kvm.c >>>> +++ b/target-ppc/kvm.c >>>> @@ -35,6 +35,7 @@ >>>> #include "hw/sysbus.h" >>>> #include "hw/ppc/spapr.h" >>>> #include "hw/ppc/spapr_vio.h" >>>> +#include "hw/ppc/ppc.h" >>>> #include "sysemu/watchdog.h" >>>> >>>> //#define DEBUG_KVM >>>> @@ -761,6 +762,22 @@ static int kvm_put_vpa(CPUState *cs) >>>> } >>>> #endif /* TARGET_PPC64 */ >>>> >>>> +static int kvm_access_one_reg(CPUState *cs, bool set, __u64 id, >>>> void *addr) >>> >>> I think it would be nicer to have seperate set_one_reg and get_one_reg >>> functions, rather than using a direction parameter. >> >> >> I am regularly getting requests from Alex to merge functions which look >> almost the same. Please do not make it worse :) > > The reason I ask you to merge function is to reduce code duplication. The API > should still look sane and I think what David suggests makes a lot of sense. > In normal QEMU fashion, that translates to: > > static int kvm_access_one_reg(CPUState *cs, uint64_t id, void *addr, bool set) > { > .... > } > > int kvm_set_one_reg(CPUState *cs, uint64_t id, void *addr) > { > return kvm_access_one_reg(cs, id, addr, true); > } > > int kvm_get_one_reg(CPUState *cs, uint64_t id, void *addr) > { > return kvm_access_one_reg(cs, id, addr, false); > } > > Also, this code should live in generic KVM code that can be used by more than > just the PPC target. > > >> >> >>>> +{ >>>> + struct kvm_one_reg reg = { >>>> + .id = id, >>>> + .addr = (uintptr_t)addr, >>>> + }; >>>> + int ret = kvm_vcpu_ioctl(cs, set ? KVM_SET_ONE_REG : KVM_GET_ONE_REG, >>>> ®); >>>> + >>>> + if (ret) { >>>> + DPRINTF("Unable to %s time base offset to KVM: %s\n", >>>> + set ? "set" : "get", strerror(errno)); >>>> + } >>>> + >>>> + return ret; >>>> +} >>>> + >>>> int kvm_arch_put_registers(CPUState *cs, int level) >>>> { >>>> PowerPCCPU *cpu = POWERPC_CPU(cs); >>>> @@ -873,6 +890,9 @@ int kvm_arch_put_registers(CPUState *cs, int level) >>>> DPRINTF("Warning: Unable to set VPA information to KVM\n"); >>>> } >>>> } >>>> + >>>> + kvm_access_one_reg(cs, 1, KVM_REG_PPC_TB_OFFSET, >>>> + &env->tb_env->tb_offset); >>> >>> Hrm. It may be too late, but would it make more sense for qemu to >>> just calculate the "ideal" offset - not 24-bit aligned, and have the >>> kernel round that up to a value suitable for TBU40. I'm thinking that >>> might be more robust if someone decides that POWER10 should have a >>> TBU50 or a TBU30 instead of TBU40. >> >> >> No, not late, the kernel patchset has not been noticed yet by anyone :) >> Just a little remark here - QEMU sets one value to the kernel as an offset >> but when it will be about to migrate again and read this offset from the >> kernel, it will be different (rounded up) from what QEMU set. Not a problem >> though. >> >> >>>> #endif /* TARGET_PPC64 */ >>>> } >>>> >>>> @@ -1082,6 +1102,9 @@ int kvm_arch_get_registers(CPUState *cs) >>>> DPRINTF("Warning: Unable to get VPA information from >>>> KVM\n"); >>>> } >>>> } >>>> + >>>> + kvm_access_one_reg(cs, 0, KVM_REG_PPC_TB_OFFSET, >>>> + &env->tb_env->tb_offset); >>>> #endif >>>> } >>>> >>>> diff --git a/target-ppc/machine.c b/target-ppc/machine.c >>>> index 12e1512..d1ffc7f 100644 >>>> --- a/target-ppc/machine.c >>>> +++ b/target-ppc/machine.c >>>> @@ -1,5 +1,6 @@ >>>> #include "hw/hw.h" >>>> #include "hw/boards.h" >>>> +#include "hw/ppc/ppc.h" >>>> #include "sysemu/kvm.h" >>>> #include "helper_regs.h" >>>> >>>> @@ -459,6 +460,45 @@ static const VMStateDescription vmstate_tlbmas = { >>>> } >>>> }; >>>> >>>> +static void timebase_pre_save(void *opaque) >>>> +{ >>>> + ppc_tb_t *tb_env = opaque; >>>> + struct timeval tv; >>>> + >>>> + gettimeofday(&tv, NULL); >>>> + tb_env->time_of_the_day = tv.tv_sec * 1000000 + tv.tv_usec; >>>> + tb_env->timebase = cpu_get_real_ticks(); >>>> +} >>>> + >>>> +static int timebase_post_load(void *opaque, int version_id) >>>> +{ >>>> + ppc_tb_t *tb_env = opaque; >>>> + >>>> + if (!tb_env) { >>>> + printf("NO TB!\n"); >>>> + return -1; >>>> + } >>>> + cpu_ppc_adjust_tb_offset(tb_env); >>>> + >>>> + return 0; >>>> +} >>>> + >>>> +static const VMStateDescription vmstate_timebase = { >>>> + .name = "cpu/timebase", >>>> + .version_id = 1, >>>> + .minimum_version_id = 1, >>>> + .minimum_version_id_old = 1, >>>> + .pre_save = timebase_pre_save, >>>> + .post_load = timebase_post_load, >>>> + .fields = (VMStateField []) { >>>> + VMSTATE_UINT64(timebase, ppc_tb_t), >>>> + VMSTATE_INT64(tb_offset, ppc_tb_t), >>> >>> tb_offset is inherently a local concept, since it depends on the host >>> timebase. So how can it belong in the migration stream? >> >> >> I do not have pure guest timebase in QEMU and I need it on the destination. >> But I have host timebase + offset to calculate it. And tb_offset is already >> in ppc_tb_t. It looked logical to me to send the existing field and add >> only the missing part. > > I still don't understand. You want the guest visible timebase in the > migration stream, no?
Yes. I do not really understand the problem here (and I am not playing dump). Do you suggest sending just the guest timebase and do not send the host timebase and the offset (one number instead of two)? I can do that, makes sense, no problem, thanks for the idea. -- Alexey