On Wed, Dec 02, 2020 at 08:04:06PM +0100, Alexander Graf wrote: > From: Peter Collingbourne <p...@google.com> > > Sleep on WFI until the VTIMER is due but allow ourselves to be woken > up on IPI. > > Signed-off-by: Peter Collingbourne <p...@google.com> > [agraf: Remove unused 'set' variable, always advance PC on WFX trap] > Signed-off-by: Alexander Graf <ag...@csgraf.de> > --- > accel/hvf/hvf-cpus.c | 5 ++-- > include/sysemu/hvf_int.h | 1 + > target/arm/hvf/hvf.c | 55 ++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 58 insertions(+), 3 deletions(-) > > diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c > index e613c22ad0..a981ccde70 100644 > --- a/accel/hvf/hvf-cpus.c > +++ b/accel/hvf/hvf-cpus.c > @@ -337,15 +337,14 @@ static int hvf_init_vcpu(CPUState *cpu) > cpu->hvf = g_malloc0(sizeof(*cpu->hvf)); > > /* init cpu signals */ > - sigset_t set; > struct sigaction sigact; > > memset(&sigact, 0, sizeof(sigact)); > sigact.sa_handler = dummy_signal; > sigaction(SIG_IPI, &sigact, NULL); > > - pthread_sigmask(SIG_BLOCK, NULL, &set); > - sigdelset(&set, SIG_IPI); > + pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask); > + sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI); > > #ifdef __aarch64__ > r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, > NULL); > diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h > index 5f15119184..13adf6ea77 100644 > --- a/include/sysemu/hvf_int.h > +++ b/include/sysemu/hvf_int.h > @@ -62,6 +62,7 @@ extern HVFState *hvf_state; > struct hvf_vcpu_state { > uint64_t fd; > void *exit; > + sigset_t unblock_ipi_mask; > }; > > void assert_hvf_ok(hv_return_t ret); > diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c > index 5ecce36d4a..79aeeb237b 100644 > --- a/target/arm/hvf/hvf.c > +++ b/target/arm/hvf/hvf.c > @@ -2,6 +2,7 @@ > * QEMU Hypervisor.framework support for Apple Silicon > > * Copyright 2020 Alexander Graf <ag...@csgraf.de> > + * Copyright 2020 Google LLC > * > * This work is licensed under the terms of the GNU GPL, version 2 or later. > * See the COPYING file in the top-level directory. > @@ -18,6 +19,7 @@ > #include "sysemu/hw_accel.h" > > #include <Hypervisor/Hypervisor.h> > +#include <mach/mach_time.h> > > #include "exec/address-spaces.h" > #include "hw/irq.h" > @@ -413,6 +415,7 @@ int hvf_arch_init_vcpu(CPUState *cpu) > > void hvf_kick_vcpu_thread(CPUState *cpu) > { > + cpus_kick_thread(cpu); > hv_vcpus_exit(&cpu->hvf->fd, 1); > } > > @@ -468,6 +471,18 @@ static int hvf_inject_interrupts(CPUState *cpu) > return 0; > } > > +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts) > +{ > + /* > + * Use pselect to sleep so that other threads can IPI us while we're > + * sleeping. > + */ > + qatomic_mb_set(&cpu->thread_kicked, false); > + qemu_mutex_unlock_iothread();
I raised a concern earlier, but I don't for sure if a kick could be lost right here. On x86 it could be lost. > + pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask); > + qemu_mutex_lock_iothread(); > +} > + > int hvf_vcpu_exec(CPUState *cpu) > { > ARMCPU *arm_cpu = ARM_CPU(cpu); > @@ -579,6 +594,46 @@ int hvf_vcpu_exec(CPUState *cpu) > } > case EC_WFX_TRAP: > advance_pc = true; > + if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request & > + (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) { > + > + uint64_t ctl; > + r = hv_vcpu_get_sys_reg(cpu->hvf->fd, > HV_SYS_REG_CNTV_CTL_EL0, > + &ctl); > + assert_hvf_ok(r); > + > + if (!(ctl & 1) || (ctl & 2)) { > + /* Timer disabled or masked, just wait for an IPI. */ > + hvf_wait_for_ipi(cpu, NULL); > + break; > + } > + > + uint64_t cval; > + r = hv_vcpu_get_sys_reg(cpu->hvf->fd, > HV_SYS_REG_CNTV_CVAL_EL0, > + &cval); > + assert_hvf_ok(r); > + > + int64_t ticks_to_sleep = cval - mach_absolute_time(); Apple reference recommends to use [1]: clock_gettime_nsec_np(CLOCK_UPTIME_RAW) It, internally in Libc, invokes mach_absolute_time() [2]. 1. https://developer.apple.com/documentation/kernel/1462446-mach_absolute_time 2. https://opensource.apple.com/source/Libc/Libc-1158.1.2/gen/clock_gettime.c.auto.html Thanks, Roman > + if (ticks_to_sleep < 0) { > + break; > + } > + > + uint64_t seconds = ticks_to_sleep / arm_cpu->gt_cntfrq_hz; > + uint64_t nanos = > + (ticks_to_sleep - arm_cpu->gt_cntfrq_hz * seconds) * > + 1000000000 / arm_cpu->gt_cntfrq_hz; > + > + /* > + * Don't sleep for less than 2ms. This is believed to improve > + * latency of message passing workloads. > + */ > + if (!seconds && nanos < 2000000) { > + break; > + } > + > + struct timespec ts = { seconds, nanos }; > + hvf_wait_for_ipi(cpu, &ts); > + } > break; > case EC_AA64_HVC: > cpu_synchronize_state(cpu); > -- > 2.24.3 (Apple Git-128) >