The tasklist_lock in the KLP transition may cause high latency under certain workloads. To address this, we can replace it with RCU.
When a new task is forked, its kernel stack is always initialized to empty[0]. As a result, we can set these new tasks to the KLP_TRANSITION_IDLE state immediately after forking. If these tasks are forked during the KLP transition but before klp_check_and_switch_task(), it is safe to switch them to the klp_target_state within klp_check_and_switch_task(). Additionally, if the klp_ftrace_handler() is triggered before the task is switched to the klp_target_state, it is also safe to perform the state transition within this ftrace handler[1]. With these changes, we can safely replace the tasklist_lock with RCU. Link: https://lore.kernel.org/all/20250213173253.ovivhuq2c5rmvkhj@jpoimboe/ [0] Link: https://lore.kernel.org/all/20250214181206.xkvxohoc4ft26uhf@jpoimboe/ [1] Signed-off-by: Yafang Shao <laoar.s...@gmail.com> --- include/linux/livepatch.h | 4 ++-- kernel/fork.c | 2 +- kernel/livepatch/patch.c | 7 ++++++- kernel/livepatch/transition.c | 35 ++++++++++++++--------------------- kernel/livepatch/transition.h | 1 + 5 files changed, 24 insertions(+), 25 deletions(-) diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 51a258c24ff5..41c424120f49 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -198,7 +198,7 @@ int klp_enable_patch(struct klp_patch *); int klp_module_coming(struct module *mod); void klp_module_going(struct module *mod); -void klp_copy_process(struct task_struct *child); +void klp_init_process(struct task_struct *child); void klp_update_patch_state(struct task_struct *task); static inline bool klp_patch_pending(struct task_struct *task) @@ -241,7 +241,7 @@ static inline int klp_module_coming(struct module *mod) { return 0; } static inline void klp_module_going(struct module *mod) {} static inline bool klp_patch_pending(struct task_struct *task) { return false; } static inline void klp_update_patch_state(struct task_struct *task) {} -static inline void klp_copy_process(struct task_struct *child) {} +static inline void klp_init_process(struct task_struct *child) {} static inline int klp_apply_section_relocs(struct module *pmod, Elf_Shdr *sechdrs, diff --git a/kernel/fork.c b/kernel/fork.c index 735405a9c5f3..da247c4d5ec5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2544,7 +2544,7 @@ __latent_entropy struct task_struct *copy_process( p->exit_signal = args->exit_signal; } - klp_copy_process(p); + klp_init_process(p); sched_core_fork(p); diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index 90408500e5a3..5e523a3fbb3c 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -95,7 +95,12 @@ static void notrace klp_ftrace_handler(unsigned long ip, patch_state = current->patch_state; - WARN_ON_ONCE(patch_state == KLP_TRANSITION_IDLE); + /* If the patch_state is KLP_TRANSITION_IDLE, it indicates the + * task was forked after klp_init_transition(). For this newly + * forked task, it is safe to switch it to klp_target_state. + */ + if (patch_state == KLP_TRANSITION_IDLE) + current->patch_state = klp_target_state; if (patch_state == KLP_TRANSITION_UNPATCHED) { /* diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index ba069459c101..ae4512e2acc9 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -23,7 +23,7 @@ static DEFINE_PER_CPU(unsigned long[MAX_STACK_ENTRIES], klp_stack_entries); struct klp_patch *klp_transition_patch; -static int klp_target_state = KLP_TRANSITION_IDLE; +int klp_target_state = KLP_TRANSITION_IDLE; static unsigned int klp_signals_cnt; @@ -294,6 +294,13 @@ static int klp_check_and_switch_task(struct task_struct *task, void *arg) { int ret; + /* If the patch_state remains KLP_TRANSITION_IDLE at this point, it + * indicates that the task was forked after klp_init_transition(). For + * this newly forked task, it is now safe to perform the switch. + */ + if (task->patch_state == KLP_TRANSITION_IDLE) + goto out; + if (task_curr(task) && task != current) return -EBUSY; @@ -301,6 +308,7 @@ static int klp_check_and_switch_task(struct task_struct *task, void *arg) if (ret) return ret; +out: clear_tsk_thread_flag(task, TIF_PATCH_PENDING); task->patch_state = klp_target_state; return 0; @@ -466,11 +474,11 @@ void klp_try_complete_transition(void) * Usually this will transition most (or all) of the tasks on a system * unless the patch includes changes to a very common function. */ - read_lock(&tasklist_lock); + rcu_read_lock(); for_each_process_thread(g, task) if (!klp_try_switch_task(task)) complete = false; - read_unlock(&tasklist_lock); + rcu_read_unlock(); /* * Ditto for the idle "swapper" tasks. @@ -694,25 +702,10 @@ void klp_reverse_transition(void) } /* Called from copy_process() during fork */ -void klp_copy_process(struct task_struct *child) +void klp_init_process(struct task_struct *child) { - - /* - * The parent process may have gone through a KLP transition since - * the thread flag was copied in setup_thread_stack earlier. Bring - * the task flag up to date with the parent here. - * - * The operation is serialized against all klp_*_transition() - * operations by the tasklist_lock. The only exceptions are - * klp_update_patch_state(current) and __klp_sched_try_switch(), but we - * cannot race with them because we are current. - */ - if (test_tsk_thread_flag(current, TIF_PATCH_PENDING)) - set_tsk_thread_flag(child, TIF_PATCH_PENDING); - else - clear_tsk_thread_flag(child, TIF_PATCH_PENDING); - - child->patch_state = current->patch_state; + clear_tsk_thread_flag(child, TIF_PATCH_PENDING); + child->patch_state = KLP_TRANSITION_IDLE; } /* diff --git a/kernel/livepatch/transition.h b/kernel/livepatch/transition.h index 322db16233de..febcf1d50fc5 100644 --- a/kernel/livepatch/transition.h +++ b/kernel/livepatch/transition.h @@ -5,6 +5,7 @@ #include <linux/livepatch.h> extern struct klp_patch *klp_transition_patch; +extern int klp_target_state; void klp_init_transition(struct klp_patch *patch, int state); void klp_cancel_transition(void); -- 2.43.5