H_IDLE_HINT is a new hcall introduced to provide a hint to the guestOS indicating if a given vCPU can be scheduled instantly or not.
The task scheduler generally prefers previous cpu of a task if it is available_idle. So if a prev_cpu of the corresponding vCPU task_struct is found to be available_idle or sched_idle then hint guestOS that the given vCPU can be scheduled instantly by the hypervisor. Signed-off-by: Parth Shah <pa...@linux.ibm.com> --- arch/powerpc/include/asm/hvcall.h | 3 ++- arch/powerpc/kvm/book3s_hv.c | 13 +++++++++++++ arch/powerpc/kvm/trace_hv.h | 1 + include/linux/kvm_host.h | 1 + include/linux/sched.h | 1 + kernel/sched/core.c | 13 +++++++++++++ kernel/sched/fair.c | 12 ++++++++++++ kernel/sched/sched.h | 1 + virt/kvm/kvm_main.c | 17 +++++++++++++++++ 9 files changed, 61 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index c98f5141e3fc..c91e27840c03 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -315,7 +315,8 @@ #define H_SCM_HEALTH 0x400 #define H_SCM_PERFORMANCE_STATS 0x418 #define H_RPT_INVALIDATE 0x448 -#define MAX_HCALL_OPCODE H_RPT_INVALIDATE +#define H_IDLE_HINT 0x44C +#define MAX_HCALL_OPCODE H_IDLE_HINT /* Scope args for H_SCM_UNBIND_ALL */ #define H_UNBIND_SCOPE_ALL (0x1) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6f612d240392..0472b8a1302f 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -931,6 +931,17 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) if (tvcpu->arch.ceded) kvmppc_fast_vcpu_kick_hv(tvcpu); break; + case H_IDLE_HINT: + target = kvmppc_get_gpr(vcpu, 4); + tvcpu = kvmppc_find_vcpu(vcpu->kvm, target); + if (!tvcpu) { + ret = H_PARAMETER; + break; + } + ret = kvm_vcpu_provide_idle_hint(tvcpu); + kvmppc_set_gpr(vcpu, 4, ret); + ret = H_SUCCESS; + break; case H_CONFER: target = kvmppc_get_gpr(vcpu, 4); if (target == -1) @@ -1145,6 +1156,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd) case H_CEDE: case H_PROD: case H_CONFER: + case H_IDLE_HINT: case H_REGISTER_VPA: case H_SET_MODE: case H_LOGICAL_CI_LOAD: @@ -5359,6 +5371,7 @@ static unsigned int default_hcall_list[] = { H_PROD, H_CONFER, H_REGISTER_VPA, + H_IDLE_HINT, #ifdef CONFIG_KVM_XICS H_EOI, H_CPPR, diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index 830a126e095d..d0302a917eaf 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -46,6 +46,7 @@ {H_CEDE, "H_CEDE"}, \ {H_CONFER, "H_CONFER"}, \ {H_PROD, "H_PROD"}, \ + {H_IDLE_HINT, "H_IDLE_HINT"}, \ {H_GET_PPP, "H_GET_PPP"}, \ {H_SET_PPP, "H_SET_PPP"}, \ {H_PURR, "H_PURR"}, \ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f3b1013fb22c..78fb0465cd65 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -843,6 +843,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible); +unsigned long kvm_vcpu_provide_idle_hint(struct kvm_vcpu *target); void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_reload_remote_mmus(struct kvm *kvm); diff --git a/include/linux/sched.h b/include/linux/sched.h index 6e3a5eeec509..3dea2a4ff58d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1688,6 +1688,7 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma extern int yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); +extern unsigned long get_idle_hint(struct task_struct *p); /** * task_nice - return the nice value of a given task. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ff74fca39ed2..2962bf97ab13 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6812,6 +6812,19 @@ int __sched yield_to(struct task_struct *p, bool preempt) } EXPORT_SYMBOL_GPL(yield_to); +/* + * Provide hint to the VM indicating if the previous vCPU can be scheduled + * instantly or not. + */ +unsigned long __sched get_idle_hint(struct task_struct *p) +{ + unsigned long ret = 0; + if (p->sched_class->get_idle_hint) + ret = p->sched_class->get_idle_hint(p); + return ret; +} +EXPORT_SYMBOL_GPL(get_idle_hint); + int io_schedule_prepare(void) { int old_iowait = current->in_iowait; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 04a3ce20da67..16701a3da5dc 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7236,6 +7236,16 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p) return true; } +static unsigned long get_idle_hint_fair(struct task_struct *p) +{ + unsigned int prev_cpu = task_cpu(p); + + if (available_idle_cpu(prev_cpu) || sched_idle_cpu(prev_cpu)) + return 1; + + return 0; +} + #ifdef CONFIG_SMP /************************************************** * Fair scheduling class load-balancing methods. @@ -11264,6 +11274,8 @@ DEFINE_SCHED_CLASS(fair) = { .task_change_group = task_change_group_fair, #endif + .get_idle_hint = get_idle_hint_fair, + #ifdef CONFIG_UCLAMP_TASK .uclamp_enabled = 1, #endif diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index bb09988451a0..09b1e35d8331 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1871,6 +1871,7 @@ struct sched_class { #ifdef CONFIG_FAIR_GROUP_SCHED void (*task_change_group)(struct task_struct *p, int type); #endif + unsigned long (*get_idle_hint)(struct task_struct *p); }; static inline void put_prev_task(struct rq *rq, struct task_struct *prev) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8367d88ce39b..5d750ae2fe0a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2907,6 +2907,23 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target) } EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); +unsigned long kvm_vcpu_provide_idle_hint(struct kvm_vcpu *target) +{ + struct pid *pid; + struct task_struct *task = NULL; + + rcu_read_lock(); + pid = rcu_dereference(target->pid); + if (pid) + task = get_pid_task(pid, PIDTYPE_PID); + rcu_read_unlock(); + if (!task) + return 0; + + return get_idle_hint(task); +} +EXPORT_SYMBOL_GPL(kvm_vcpu_provide_idle_hint); + /* * Helper that checks whether a VCPU is eligible for directed yield. * Most eligible candidate to yield is decided by following heuristics: -- 2.26.2