In lppaca region, add a new attribute idle_hint which can allow guest scheduler for better cpu selection. Hypervisor can update idle_hint attribute based on the prediction that if vCPU needs to be scheduled then can it be scheduled instantly or not.
Signed-off-by: Parth Shah <pa...@linux.ibm.com> --- arch/powerpc/include/asm/idle_hint.h | 28 +++++++++++++++++++++++ arch/powerpc/include/asm/lppaca.h | 3 ++- arch/powerpc/kvm/book3s.h | 2 ++ arch/powerpc/kvm/book3s_hv.c | 34 ++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/include/asm/idle_hint.h diff --git a/arch/powerpc/include/asm/idle_hint.h b/arch/powerpc/include/asm/idle_hint.h new file mode 100644 index 000000000000..165d65c0275b --- /dev/null +++ b/arch/powerpc/include/asm/idle_hint.h @@ -0,0 +1,28 @@ +#ifndef _ASM_POWERPC_IDLEHINT_H +#define _ASM_POWERPC_IDLEHINT_H + +#include <linux/kvm_host.h> + +extern void kvmppc_idle_hint_set(struct kvm_vcpu *vcpu, int idle_hint); + +extern int idle_hint_is_active; + +extern void set_idle_hint(int cpu, int value); + +static inline int prev_cpu_of_kvm(struct kvm_vcpu *vcpu) +{ + struct pid *pid; + struct task_struct *task = NULL; + + rcu_read_lock(); + pid = rcu_dereference(vcpu->pid); + if (pid) + task = get_pid_task(pid, PIDTYPE_PID); + rcu_read_unlock(); + + if (!task) + return -1; + + return task_cpu(task); +} +#endif diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index c390ec377bae..ee790a566036 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -111,7 +111,8 @@ struct lppaca { __be32 page_ins; /* CMO Hint - # page ins by OS */ u8 reserved11[148]; volatile __be64 dtl_idx; /* Dispatch Trace Log head index */ - u8 reserved12[96]; + volatile __be32 idle_hint; /* Can vCPU be scheduled instantly? */ + u8 reserved12[92]; } ____cacheline_aligned; #define lppaca_of(cpu) (*paca_ptrs[cpu]->lppaca_ptr) diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 740e51def5a5..61b0741c139a 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -7,6 +7,8 @@ #ifndef __POWERPC_KVM_BOOK3S_H__ #define __POWERPC_KVM_BOOK3S_H__ +#include <asm/idle_hint.h> + extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, struct kvm_memory_slot *memslot); extern bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index bc0813644666..c008be20294d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -447,6 +447,7 @@ static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) { vpa->__old_status |= LPPACA_OLD_SHARED_PROC; vpa->yield_count = cpu_to_be32(1); + vpa->idle_hint = cpu_to_be32(0); } static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v, @@ -911,6 +912,17 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target) return kvm_vcpu_yield_to(target); } +void kvmppc_idle_hint_set(struct kvm_vcpu *vcpu, int idle_hint) +{ + struct lppaca *lppaca; + + if (!vcpu) return; + + lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr; + if (lppaca) + lppaca->idle_hint = cpu_to_be32(idle_hint); +} + static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu) { int yield_count = 0; @@ -2803,6 +2815,28 @@ static int on_primary_thread(void) return 1; } +void set_idle_hint_for_kvm(struct kvm *kvm, int cpu, int value) +{ + int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (cpu == prev_cpu_of_kvm(vcpu)) { + kvmppc_idle_hint_set(vcpu, value); + } + } +} + +void set_idle_hint(int cpu, int value) +{ + struct kvm *kvm; + struct kvm *tmp; + + list_for_each_entry_safe(kvm, tmp, &vm_list, vm_list) { + set_idle_hint_for_kvm(kvm, cpu, value); + } +} + /* * A list of virtual cores for each physical CPU. * These are vcores that could run but their runner VCPU tasks are -- 2.26.3