On Mon, Jun 1, 2015 at 6:17 PM, Jason J. Herne <jjhe...@linux.vnet.ibm.com> wrote: > Provide a method to throttle guest cpu execution. CPUState is augmented with > timeout controls and throttle start/stop functions. To throttle the guest cpu > the caller simply has to call the throttle start function and provide a ratio > of > sleep time to normal execution time. > > Signed-off-by: Jason J. Herne <jjhe...@linux.vnet.ibm.com> > Reviewed-by: Matthew Rosato <mjros...@linux.vnet.ibm.com> > --- > cpus.c | 62 > +++++++++++++++++++++++++++++++++++++++++++++++++++++++ > include/qom/cpu.h | 46 +++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 108 insertions(+) > > diff --git a/cpus.c b/cpus.c > index de6469f..7568357 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -64,6 +64,9 @@ > > #endif /* CONFIG_LINUX */ > > +/* Number of ms between cpu throttle operations */ > +#define CPU_THROTTLE_TIMESLICE 10 > + > static CPUState *next_cpu; > int64_t max_delay; > int64_t max_advance; > @@ -919,6 +922,65 @@ static void qemu_kvm_wait_io_event(CPUState *cpu) > qemu_wait_io_event_common(cpu); > } > > +static void cpu_throttle_thread(void *opq) > +{ > + CPUState *cpu = (CPUState *)opq; > + long sleeptime_ms = (long)(cpu->throttle_ratio * CPU_THROTTLE_TIMESLICE); > + > + /* Stop the timer if needed */ > + if (cpu->throttle_timer_stop) { > + timer_del(cpu->throttle_timer); > + timer_free(cpu->throttle_timer); > + cpu->throttle_timer = NULL; > + return; > + } > + > + qemu_mutex_unlock_iothread(); > + g_usleep(sleeptime_ms * 1000); /* Convert ms to us for usleep call */ > + qemu_mutex_lock_iothread(); > + > + timer_mod(cpu->throttle_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + > + CPU_THROTTLE_TIMESLICE); > +} > + > +static void cpu_throttle_timer_pop(void *opq) > +{ > + CPUState *cpu = (CPUState *)opq; > + > + async_run_on_cpu(cpu, cpu_throttle_thread, cpu); > +} > + > +void cpu_throttle_start(CPUState *cpu, float throttle_ratio) > +{ > + assert(throttle_ratio > 0); > + cpu->throttle_ratio = throttle_ratio; > + > + if (!cpu_throttle_active(cpu)) { > + cpu->throttle_timer = timer_new_ms(QEMU_CLOCK_REALTIME, > + cpu_throttle_timer_pop, cpu); > + timer_mod(cpu->throttle_timer, > qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + > + CPU_THROTTLE_TIMESLICE); > + cpu->throttle_timer_stop = false; > + } > +} > + > +void cpu_throttle_stop(CPUState *cpu) > +{ > + assert(cpu_throttle_active(cpu)); > + cpu->throttle_timer_stop = true; > +} > + > +bool cpu_throttle_active(CPUState *cpu) > +{ > + return (cpu->throttle_timer != NULL); > +} > + > +float cpu_throttle_get_ratio(CPUState *cpu) > +{ > + assert(cpu_throttle_active(cpu)); > + return cpu->throttle_ratio; > +} > + > static void *qemu_kvm_cpu_thread_fn(void *arg) > { > CPUState *cpu = arg; > diff --git a/include/qom/cpu.h b/include/qom/cpu.h > index 39f0f19..9d16e6a 100644 > --- a/include/qom/cpu.h > +++ b/include/qom/cpu.h > @@ -310,6 +310,11 @@ struct CPUState { > uint32_t can_do_io; > int32_t exception_index; /* used by m68k TCG */ > > + /* vcpu throttling controls */ > + QEMUTimer *throttle_timer; > + bool throttle_timer_stop; > + float throttle_ratio; > + > /* Note that this is accessed at the start of every TB via a negative > offset from AREG0. Leave this field at the end so as to make the > (absolute value) offset as small as possible. This reduces code > @@ -553,6 +558,47 @@ CPUState *qemu_get_cpu(int index); > */ > bool cpu_exists(int64_t id); > > +/** > + * cpu_throttle_start: > + * @cpu: The vcpu to throttle > + * > + * Throttles a vcpu by forcing it to sleep. The duration of the sleep is a > + * ratio of sleep time to running time. A ratio of 1.0 corresponds to a 50% > + * duty cycle (example: 10ms sleep for every 10ms awake). > + * > + * cpu_throttle_start can be called as needed to adjust the throttle ratio. > + * Once the throttling starts, it will remain in effect until > cpu_throttle_stop > + * is called. > + */ > +void cpu_throttle_start(CPUState *cpu, float throttle_ratio); > + > +/** > + * cpu_throttle_stop: > + * @cpu: The vcpu to stop throttling > + * > + * Stops the vcpu throttling started by cpu_throttle_start. > + */ > +void cpu_throttle_stop(CPUState *cpu); > + > +/** > + * cpu_throttle_active: > + * @cpu: The vcpu to check > + * > + * Returns %true if this vcpu is currently being throttled, %false otherwise. > + */ > +bool cpu_throttle_active(CPUState *cpu); > + > +/** > + * cpu_throttle_get_ratio: > + * @cpu: The vcpu whose throttle ratio to return. > + * > + * Returns the ratio being used to throttle this vcpu. See cpu_throttle_start > + * for details. > + * > + * Returns The ratio being used to throttle this vcpu. > + */ > +float cpu_throttle_get_ratio(CPUState *cpu); > + > #ifndef CONFIG_USER_ONLY > > typedef void (*CPUInterruptHandler)(CPUState *, int); > -- > 1.9.1 > >
Thanks Jason, this patch would be quite interesting as it eliminates slight overhead from scheduler when cgroups are actively used for same task (~5% for per-vcpu cgroup layout simular to libvirt`s one for guest perf numa bench). Are you planning to add wakeup frequency throttler as well to same interface?