Add a per-thread core scheduling interface which allows a thread to tag itself and enable core scheduling. Based on discussion at OSPM with maintainers, we propose a prctl(2) interface accepting values of 0 or 1. 1 - enable core scheduling for the task. 0 - disable core scheduling for the task.
Special cases: (1) The core-scheduling patchset contains a CGroup interface as well. In order for us to respect users of that interface, we avoid overriding the tag if a task was CGroup-tagged because the task becomes inconsistent with the CGroup tag. Instead return -EBUSY. (2) If a task is prctl-tagged, allow the CGroup interface to override the task's tag. ChromeOS will use core-scheduling to securely enable hyperthreading. This cuts down the keypress latency in Google docs from 150ms to 50ms while improving the camera streaming frame rate by ~3%. Signed-off-by: Joel Fernandes (Google) <j...@joelfernandes.org> --- include/linux/sched.h | 6 ++++ include/uapi/linux/prctl.h | 3 ++ kernel/sched/core.c | 57 ++++++++++++++++++++++++++++++++++++++ kernel/sys.c | 3 ++ 4 files changed, 69 insertions(+) diff --git a/include/linux/sched.h b/include/linux/sched.h index fe6ae59fcadbe..8a40a093aa2ca 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1986,6 +1986,12 @@ static inline void rseq_execve(struct task_struct *t) #endif +#ifdef CONFIG_SCHED_CORE +int task_set_core_sched(int set, struct task_struct *tsk); +#else +int task_set_core_sched(int set, struct task_struct *tsk) { return -ENOTSUPP; } +#endif + void __exit_umh(struct task_struct *tsk); static inline void exit_umh(struct task_struct *tsk) diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 07b4f8131e362..dba0c70f9cce6 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -238,4 +238,7 @@ struct prctl_mm_map { #define PR_SET_IO_FLUSHER 57 #define PR_GET_IO_FLUSHER 58 +/* Core scheduling per-task interface */ +#define PR_SET_CORE_SCHED 59 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 684359ff357e7..780514d03da47 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3320,6 +3320,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) #endif #ifdef CONFIG_SCHED_CORE RB_CLEAR_NODE(&p->core_node); + + /* + * If task is using prctl(2) for tagging, do the prctl(2)-style tagging + * for the child as well. + */ + if (current->core_cookie && ((unsigned long)current == current->core_cookie)) + task_set_core_sched(1, p); #endif return 0; } @@ -7857,6 +7864,56 @@ void __cant_sleep(const char *file, int line, int preempt_offset) EXPORT_SYMBOL_GPL(__cant_sleep); #endif +#ifdef CONFIG_SCHED_CORE + +/* Ensure that all siblings have rescheduled once */ +static int task_set_core_sched_stopper(void *data) +{ + return 0; +} + +int task_set_core_sched(int set, struct task_struct *tsk) +{ + if (!tsk) + tsk = current; + + if (set > 1) + return -ERANGE; + + if (!static_branch_likely(&sched_smt_present)) + return -EINVAL; + + /* + * If cookie was set previously, return -EBUSY if either of the + * following are true: + * 1. Task was previously tagged by CGroup method. + * 2. Task or its parent were tagged by prctl(). + * + * Note that, if CGroup tagging is done after prctl(), then that would + * override the cookie. However, if prctl() is done after task was + * added to tagged CGroup, then the prctl() returns -EBUSY. + */ + if (!!tsk->core_cookie == set) { + if ((tsk->core_cookie == (unsigned long)tsk) || + (tsk->core_cookie == (unsigned long)tsk->sched_task_group)) { + return -EBUSY; + } + } + + if (set) + sched_core_get(); + + tsk->core_cookie = set ? (unsigned long)tsk : 0; + + stop_machine(task_set_core_sched_stopper, NULL, NULL); + + if (!set) + sched_core_put(); + + return 0; +} +#endif + #ifdef CONFIG_MAGIC_SYSRQ void normalize_rt_tasks(void) { diff --git a/kernel/sys.c b/kernel/sys.c index d325f3ab624a9..5c3bcf40dcb34 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2514,6 +2514,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, error = (current->flags & PR_IO_FLUSHER) == PR_IO_FLUSHER; break; + case PR_SET_CORE_SCHED: + error = task_set_core_sched(arg2, NULL); + break; default: error = -EINVAL; break; -- 2.26.2.761.g0e0b3e54be-goog