Add a per-thread core scheduling interface which allows a thread to tag
itself and enable core scheduling. Based on discussion at OSPM with
maintainers, we propose a prctl(2) interface accepting values of 0 or 1.
 1 - enable core scheduling for the task.
 0 - disable core scheduling for the task.

Special cases:
(1)
The core-scheduling patchset contains a CGroup interface as well. In
order for us to respect users of that interface, we avoid overriding the
tag if a task was CGroup-tagged because the task becomes inconsistent
with the CGroup tag. Instead return -EBUSY.

(2)
If a task is prctl-tagged, allow the CGroup interface to override
the task's tag.

ChromeOS will use core-scheduling to securely enable hyperthreading.
This cuts down the keypress latency in Google docs from 150ms to 50ms
while improving the camera streaming frame rate by ~3%.

Signed-off-by: Joel Fernandes (Google) <j...@joelfernandes.org>
---
 include/linux/sched.h      |  6 ++++
 include/uapi/linux/prctl.h |  3 ++
 kernel/sched/core.c        | 57 ++++++++++++++++++++++++++++++++++++++
 kernel/sys.c               |  3 ++
 4 files changed, 69 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index fe6ae59fcadbe..8a40a093aa2ca 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1986,6 +1986,12 @@ static inline void rseq_execve(struct task_struct *t)
 
 #endif
 
+#ifdef CONFIG_SCHED_CORE
+int task_set_core_sched(int set, struct task_struct *tsk);
+#else
+int task_set_core_sched(int set, struct task_struct *tsk) { return -ENOTSUPP; }
+#endif
+
 void __exit_umh(struct task_struct *tsk);
 
 static inline void exit_umh(struct task_struct *tsk)
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 07b4f8131e362..dba0c70f9cce6 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -238,4 +238,7 @@ struct prctl_mm_map {
 #define PR_SET_IO_FLUSHER              57
 #define PR_GET_IO_FLUSHER              58
 
+/* Core scheduling per-task interface */
+#define PR_SET_CORE_SCHED              59
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 684359ff357e7..780514d03da47 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3320,6 +3320,13 @@ int sched_fork(unsigned long clone_flags, struct 
task_struct *p)
 #endif
 #ifdef CONFIG_SCHED_CORE
        RB_CLEAR_NODE(&p->core_node);
+
+       /*
+        * If task is using prctl(2) for tagging, do the prctl(2)-style tagging
+        * for the child as well.
+        */
+       if (current->core_cookie && ((unsigned long)current == 
current->core_cookie))
+               task_set_core_sched(1, p);
 #endif
        return 0;
 }
@@ -7857,6 +7864,56 @@ void __cant_sleep(const char *file, int line, int 
preempt_offset)
 EXPORT_SYMBOL_GPL(__cant_sleep);
 #endif
 
+#ifdef CONFIG_SCHED_CORE
+
+/* Ensure that all siblings have rescheduled once */
+static int task_set_core_sched_stopper(void *data)
+{
+       return 0;
+}
+
+int task_set_core_sched(int set, struct task_struct *tsk)
+{
+       if (!tsk)
+               tsk = current;
+
+       if (set > 1)
+               return -ERANGE;
+
+       if (!static_branch_likely(&sched_smt_present))
+               return -EINVAL;
+
+       /*
+        * If cookie was set previously, return -EBUSY if either of the
+        * following are true:
+        * 1. Task was previously tagged by CGroup method.
+        * 2. Task or its parent were tagged by prctl().
+        *
+        * Note that, if CGroup tagging is done after prctl(), then that would
+        * override the cookie. However, if prctl() is done after task was
+        * added to tagged CGroup, then the prctl() returns -EBUSY.
+        */
+       if (!!tsk->core_cookie == set) {
+               if ((tsk->core_cookie == (unsigned long)tsk) ||
+                   (tsk->core_cookie == (unsigned long)tsk->sched_task_group)) 
{
+                       return -EBUSY;
+               }
+       }
+
+       if (set)
+               sched_core_get();
+
+       tsk->core_cookie = set ? (unsigned long)tsk : 0;
+
+       stop_machine(task_set_core_sched_stopper, NULL, NULL);
+
+       if (!set)
+               sched_core_put();
+
+       return 0;
+}
+#endif
+
 #ifdef CONFIG_MAGIC_SYSRQ
 void normalize_rt_tasks(void)
 {
diff --git a/kernel/sys.c b/kernel/sys.c
index d325f3ab624a9..5c3bcf40dcb34 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2514,6 +2514,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, 
unsigned long, arg3,
 
                error = (current->flags & PR_IO_FLUSHER) == PR_IO_FLUSHER;
                break;
+       case PR_SET_CORE_SCHED:
+               error = task_set_core_sched(arg2, NULL);
+               break;
        default:
                error = -EINVAL;
                break;
-- 
2.26.2.761.g0e0b3e54be-goog

Reply via email to