On Tue, Nov 27, 2012 at 07:33:25PM +0100, Frederic Weisbecker wrote: > Create a new subsystem that probes on kernel boundaries > to keep track of the transitions between level contexts > with two basic initial contexts: user or kernel. > > This is an abstraction of some RCU code that use such tracking > to implement its userspace extended quiescent state. > > We need to pull this up from RCU into this new level of indirection > because this tracking is also going to be used to implement an "on > demand" generic virtual cputime accounting. A necessary step to > shutdown the tick while still accounting the cputime.
I have queued this, and if it passes tests and inspection will try pushing it for 3.8. Thanx, Paul > Signed-off-by: Frederic Weisbecker <fweis...@gmail.com> > Cc: Andrew Morton <a...@linux-foundation.org> > Cc: H. Peter Anvin <h...@zytor.com> > Cc: Ingo Molnar <mi...@kernel.org> > Cc: Paul E. McKenney <paul...@linux.vnet.ibm.com> > Cc: Peter Zijlstra <pet...@infradead.org> > Cc: Steven Rostedt <rost...@goodmis.org> > Cc: Thomas Gleixner <t...@linutronix.de> > Cc: Li Zhong <zh...@linux.vnet.ibm.com> > Cc: Gilad Ben-Yossef gi...@benyossef.com > --- > Changes since last version address Gilad's comments and include ifdef fixes. > Also CONTEXT_TRACKING_FORCE option has been moved below RCU user mode config > as it's the only user for now. > > arch/Kconfig | 15 ++-- > arch/x86/Kconfig | 2 +- > arch/x86/include/asm/{rcu.h => context_tracking.h} | 15 ++-- > arch/x86/kernel/entry_64.S | 2 +- > arch/x86/kernel/ptrace.c | 8 +- > arch/x86/kernel/signal.c | 5 +- > arch/x86/kernel/traps.c | 2 +- > arch/x86/mm/fault.c | 2 +- > include/linux/context_tracking.h | 18 ++++ > include/linux/rcupdate.h | 2 - > init/Kconfig | 28 ++++---- > kernel/Makefile | 1 + > kernel/context_tracking.c | 83 > ++++++++++++++++++++ > kernel/rcutree.c | 64 +--------------- > kernel/sched/core.c | 11 ++- > 15 files changed, 150 insertions(+), 108 deletions(-) > rename arch/x86/include/asm/{rcu.h => context_tracking.h} (63%) > create mode 100644 include/linux/context_tracking.h > create mode 100644 kernel/context_tracking.c > > diff --git a/arch/Kconfig b/arch/Kconfig > index 366ec06..cc74aae 100644 > --- a/arch/Kconfig > +++ b/arch/Kconfig > @@ -300,15 +300,16 @@ config SECCOMP_FILTER > > See Documentation/prctl/seccomp_filter.txt for details. > > -config HAVE_RCU_USER_QS > +config HAVE_CONTEXT_TRACKING > bool > help > - Provide kernel entry/exit hooks necessary for userspace > - RCU extended quiescent state. Syscalls need to be wrapped inside > - rcu_user_exit()-rcu_user_enter() through the slow path using > - TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs > - are already protected inside rcu_irq_enter/rcu_irq_exit() but > - preemption or signal handling on irq exit still need to be protected. > + Provide kernel/user boundaries probes necessary for subsystems > + that need it, such as userspace RCU extended quiescent state. > + Syscalls need to be wrapped inside user_exit()-user_enter() through > + the slow path using TIF_NOHZ flag. Exceptions handlers must be > + wrapped as well. Irqs are already protected inside > + rcu_irq_enter/rcu_irq_exit() but preemption or signal handling on > + irq exit still need to be protected. > > config HAVE_VIRT_CPU_ACCOUNTING > bool > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig > index 46c3bff..110cfad 100644 > --- a/arch/x86/Kconfig > +++ b/arch/x86/Kconfig > @@ -106,7 +106,7 @@ config X86 > select KTIME_SCALAR if X86_32 > select GENERIC_STRNCPY_FROM_USER > select GENERIC_STRNLEN_USER > - select HAVE_RCU_USER_QS if X86_64 > + select HAVE_CONTEXT_TRACKING if X86_64 > select HAVE_IRQ_TIME_ACCOUNTING > select GENERIC_KERNEL_THREAD > select GENERIC_KERNEL_EXECVE > diff --git a/arch/x86/include/asm/rcu.h > b/arch/x86/include/asm/context_tracking.h > similarity index 63% > rename from arch/x86/include/asm/rcu.h > rename to arch/x86/include/asm/context_tracking.h > index d1ac07a..1616562 100644 > --- a/arch/x86/include/asm/rcu.h > +++ b/arch/x86/include/asm/context_tracking.h > @@ -1,27 +1,26 @@ > -#ifndef _ASM_X86_RCU_H > -#define _ASM_X86_RCU_H > +#ifndef _ASM_X86_CONTEXT_TRACKING_H > +#define _ASM_X86_CONTEXT_TRACKING_H > > #ifndef __ASSEMBLY__ > - > -#include <linux/rcupdate.h> > +#include <linux/context_tracking.h> > #include <asm/ptrace.h> > > static inline void exception_enter(struct pt_regs *regs) > { > - rcu_user_exit(); > + user_exit(); > } > > static inline void exception_exit(struct pt_regs *regs) > { > -#ifdef CONFIG_RCU_USER_QS > +#ifdef CONFIG_CONTEXT_TRACKING > if (user_mode(regs)) > - rcu_user_enter(); > + user_enter(); > #endif > } > > #else /* __ASSEMBLY__ */ > > -#ifdef CONFIG_RCU_USER_QS > +#ifdef CONFIG_CONTEXT_TRACKING > # define SCHEDULE_USER call schedule_user > #else > # define SCHEDULE_USER call schedule > diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S > index 0c58952..98faeb3 100644 > --- a/arch/x86/kernel/entry_64.S > +++ b/arch/x86/kernel/entry_64.S > @@ -56,7 +56,7 @@ > #include <asm/ftrace.h> > #include <asm/percpu.h> > #include <asm/asm.h> > -#include <asm/rcu.h> > +#include <asm/context_tracking.h> > #include <asm/smap.h> > #include <linux/err.h> > > diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c > index eff5b8c..65b88a5 100644 > --- a/arch/x86/kernel/ptrace.c > +++ b/arch/x86/kernel/ptrace.c > @@ -21,7 +21,7 @@ > #include <linux/signal.h> > #include <linux/perf_event.h> > #include <linux/hw_breakpoint.h> > -#include <linux/rcupdate.h> > +#include <linux/context_tracking.h> > > #include <asm/uaccess.h> > #include <asm/pgtable.h> > @@ -1461,7 +1461,7 @@ long syscall_trace_enter(struct pt_regs *regs) > { > long ret = 0; > > - rcu_user_exit(); > + user_exit(); > > /* > * If we stepped into a sysenter/syscall insn, it trapped in > @@ -1516,7 +1516,7 @@ void syscall_trace_leave(struct pt_regs *regs) > * or do_notify_resume(), in which case we can be in RCU > * user mode. > */ > - rcu_user_exit(); > + user_exit(); > > audit_syscall_exit(regs); > > @@ -1534,5 +1534,5 @@ void syscall_trace_leave(struct pt_regs *regs) > if (step || test_thread_flag(TIF_SYSCALL_TRACE)) > tracehook_report_syscall_exit(regs, step); > > - rcu_user_enter(); > + user_enter(); > } > diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c > index 29ad351..20ecac1 100644 > --- a/arch/x86/kernel/signal.c > +++ b/arch/x86/kernel/signal.c > @@ -22,6 +22,7 @@ > #include <linux/uaccess.h> > #include <linux/user-return-notifier.h> > #include <linux/uprobes.h> > +#include <linux/context_tracking.h> > > #include <asm/processor.h> > #include <asm/ucontext.h> > @@ -816,7 +817,7 @@ static void do_signal(struct pt_regs *regs) > void > do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) > { > - rcu_user_exit(); > + user_exit(); > > #ifdef CONFIG_X86_MCE > /* notify userspace of pending MCEs */ > @@ -840,7 +841,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, > __u32 thread_info_flags) > if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) > fire_user_return_notifiers(); > > - rcu_user_enter(); > + user_enter(); > } > > void signal_fault(struct pt_regs *regs, void __user *frame, char *where) > diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c > index 8276dc6..eb85866 100644 > --- a/arch/x86/kernel/traps.c > +++ b/arch/x86/kernel/traps.c > @@ -55,7 +55,7 @@ > #include <asm/i387.h> > #include <asm/fpu-internal.h> > #include <asm/mce.h> > -#include <asm/rcu.h> > +#include <asm/context_tracking.h> > > #include <asm/mach_traps.h> > > diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c > index 8e13ecb..b0b1f1d 100644 > --- a/arch/x86/mm/fault.c > +++ b/arch/x86/mm/fault.c > @@ -18,7 +18,7 @@ > #include <asm/pgalloc.h> /* pgd_*(), ... */ > #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ > #include <asm/fixmap.h> /* VSYSCALL_START > */ > -#include <asm/rcu.h> /* exception_enter(), ... */ > +#include <asm/context_tracking.h> /* exception_enter(), ... > */ > > /* > * Page fault error code bits: > diff --git a/include/linux/context_tracking.h > b/include/linux/context_tracking.h > new file mode 100644 > index 0000000..e24339c > --- /dev/null > +++ b/include/linux/context_tracking.h > @@ -0,0 +1,18 @@ > +#ifndef _LINUX_CONTEXT_TRACKING_H > +#define _LINUX_CONTEXT_TRACKING_H > + > +#ifdef CONFIG_CONTEXT_TRACKING > +#include <linux/sched.h> > + > +extern void user_enter(void); > +extern void user_exit(void); > +extern void context_tracking_task_switch(struct task_struct *prev, > + struct task_struct *next); > +#else > +static inline void user_enter(void) { } > +static inline void user_exit(void) { } > +static inline void context_tracking_task_switch(struct task_struct *prev, > + struct task_struct *next) { } > +#endif /* !CONFIG_CONTEXT_TRACKING */ > + > +#endif > diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h > index 8fe7c18..275aa3f 100644 > --- a/include/linux/rcupdate.h > +++ b/include/linux/rcupdate.h > @@ -222,8 +222,6 @@ extern void rcu_user_enter(void); > extern void rcu_user_exit(void); > extern void rcu_user_enter_after_irq(void); > extern void rcu_user_exit_after_irq(void); > -extern void rcu_user_hooks_switch(struct task_struct *prev, > - struct task_struct *next); > #else > static inline void rcu_user_enter(void) { } > static inline void rcu_user_exit(void) { } > diff --git a/init/Kconfig b/init/Kconfig > index 5ac6ee0..2054e04 100644 > --- a/init/Kconfig > +++ b/init/Kconfig > @@ -486,9 +486,13 @@ config PREEMPT_RCU > This option enables preemptible-RCU code that is common between > the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. > > +config CONTEXT_TRACKING > + bool > + > config RCU_USER_QS > bool "Consider userspace as in RCU extended quiescent state" > - depends on HAVE_RCU_USER_QS && SMP > + depends on HAVE_CONTEXT_TRACKING && SMP > + select CONTEXT_TRACKING > help > This option sets hooks on kernel / userspace boundaries and > puts RCU in extended quiescent state when the CPU runs in > @@ -497,24 +501,20 @@ config RCU_USER_QS > try to keep the timer tick on for RCU. > > Unless you want to hack and help the development of the full > - tickless feature, you shouldn't enable this option. It also > + dynticks mode, you shouldn't enable this option. It also > adds unnecessary overhead. > > If unsure say N > > -config RCU_USER_QS_FORCE > - bool "Force userspace extended QS by default" > - depends on RCU_USER_QS > +config CONTEXT_TRACKING_FORCE > + bool "Force context tracking" > + depends on CONTEXT_TRACKING > help > - Set the hooks in user/kernel boundaries by default in order to > - test this feature that treats userspace as an extended quiescent > - state until we have a real user like a full adaptive nohz option. > - > - Unless you want to hack and help the development of the full > - tickless feature, you shouldn't enable this option. It adds > - unnecessary overhead. > - > - If unsure say N > + Probe on user/kernel boundaries by default in order to > + test the features that rely on it such as userspace RCU extended > + quiescent states. > + This test is there for debugging until we have a real user like the > + full dynticks mode. > > config RCU_FANOUT > int "Tree-based hierarchical RCU fanout value" > diff --git a/kernel/Makefile b/kernel/Makefile > index 0dfeca4..f90bbfc 100644 > --- a/kernel/Makefile > +++ b/kernel/Makefile > @@ -110,6 +110,7 @@ obj-$(CONFIG_USER_RETURN_NOTIFIER) += > user-return-notifier.o > obj-$(CONFIG_PADATA) += padata.o > obj-$(CONFIG_CRASH_DUMP) += crash_dump.o > obj-$(CONFIG_JUMP_LABEL) += jump_label.o > +obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o > > $(obj)/configs.o: $(obj)/config_data.h > > diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c > new file mode 100644 > index 0000000..e0e07fd > --- /dev/null > +++ b/kernel/context_tracking.c > @@ -0,0 +1,83 @@ > +#include <linux/context_tracking.h> > +#include <linux/rcupdate.h> > +#include <linux/sched.h> > +#include <linux/percpu.h> > +#include <linux/hardirq.h> > + > +struct context_tracking { > + /* > + * When active is false, hooks are not set to > + * minimize overhead: TIF flags are cleared > + * and calls to user_enter/exit are ignored. This > + * may be further optimized using static keys. > + */ > + bool active; > + enum { > + IN_KERNEL = 0, > + IN_USER, > + } state; > +}; > + > +static DEFINE_PER_CPU(struct context_tracking, context_tracking) = { > +#ifdef CONFIG_CONTEXT_TRACKING_FORCE > + .active = true, > +#endif > +}; > + > +void user_enter(void) > +{ > + unsigned long flags; > + > + /* > + * Some contexts may involve an exception occuring in an irq, > + * leading to that nesting: > + * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() > + * This would mess up the dyntick_nesting count though. And rcu_irq_*() > + * helpers are enough to protect RCU uses inside the exception. So > + * just return immediately if we detect we are in an IRQ. > + */ > + if (in_interrupt()) > + return; > + > + WARN_ON_ONCE(!current->mm); > + > + local_irq_save(flags); > + if (__this_cpu_read(context_tracking.active) && > + __this_cpu_read(context_tracking.state) != IN_USER) { > + __this_cpu_write(context_tracking.state, IN_USER); > + rcu_user_enter(); > + } > + local_irq_restore(flags); > +} > + > +void user_exit(void) > +{ > + unsigned long flags; > + > + /* > + * Some contexts may involve an exception occuring in an irq, > + * leading to that nesting: > + * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() > + * This would mess up the dyntick_nesting count though. And rcu_irq_*() > + * helpers are enough to protect RCU uses inside the exception. So > + * just return immediately if we detect we are in an IRQ. > + */ > + if (in_interrupt()) > + return; > + > + local_irq_save(flags); > + if (__this_cpu_read(context_tracking.state) == IN_USER) { > + __this_cpu_write(context_tracking.state, IN_KERNEL); > + rcu_user_exit(); > + } > + local_irq_restore(flags); > +} > + > +void context_tracking_task_switch(struct task_struct *prev, > + struct task_struct *next) > +{ > + if (__this_cpu_read(context_tracking.active)) { > + clear_tsk_thread_flag(prev, TIF_NOHZ); > + set_tsk_thread_flag(next, TIF_NOHZ); > + } > +} > diff --git a/kernel/rcutree.c b/kernel/rcutree.c > index 7733eb5..e441b77 100644 > --- a/kernel/rcutree.c > +++ b/kernel/rcutree.c > @@ -207,9 +207,6 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch); > DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { > .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, > .dynticks = ATOMIC_INIT(1), > -#if defined(CONFIG_RCU_USER_QS) && !defined(CONFIG_RCU_USER_QS_FORCE) > - .ignore_user_qs = true, > -#endif > }; > > static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ > @@ -420,29 +417,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter); > */ > void rcu_user_enter(void) > { > - unsigned long flags; > - struct rcu_dynticks *rdtp; > - > - /* > - * Some contexts may involve an exception occuring in an irq, > - * leading to that nesting: > - * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() > - * This would mess up the dyntick_nesting count though. And rcu_irq_*() > - * helpers are enough to protect RCU uses inside the exception. So > - * just return immediately if we detect we are in an IRQ. > - */ > - if (in_interrupt()) > - return; > - > - WARN_ON_ONCE(!current->mm); > - > - local_irq_save(flags); > - rdtp = &__get_cpu_var(rcu_dynticks); > - if (!rdtp->ignore_user_qs && !rdtp->in_user) { > - rdtp->in_user = true; > - rcu_eqs_enter(true); > - } > - local_irq_restore(flags); > + rcu_eqs_enter(1); > } > > /** > @@ -579,27 +554,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit); > */ > void rcu_user_exit(void) > { > - unsigned long flags; > - struct rcu_dynticks *rdtp; > - > - /* > - * Some contexts may involve an exception occuring in an irq, > - * leading to that nesting: > - * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() > - * This would mess up the dyntick_nesting count though. And rcu_irq_*() > - * helpers are enough to protect RCU uses inside the exception. So > - * just return immediately if we detect we are in an IRQ. > - */ > - if (in_interrupt()) > - return; > - > - local_irq_save(flags); > - rdtp = &__get_cpu_var(rcu_dynticks); > - if (rdtp->in_user) { > - rdtp->in_user = false; > - rcu_eqs_exit(true); > - } > - local_irq_restore(flags); > + rcu_eqs_exit(1); > } > > /** > @@ -722,21 +677,6 @@ int rcu_is_cpu_idle(void) > } > EXPORT_SYMBOL(rcu_is_cpu_idle); > > -#ifdef CONFIG_RCU_USER_QS > -void rcu_user_hooks_switch(struct task_struct *prev, > - struct task_struct *next) > -{ > - struct rcu_dynticks *rdtp; > - > - /* Interrupts are disabled in context switch */ > - rdtp = &__get_cpu_var(rcu_dynticks); > - if (!rdtp->ignore_user_qs) { > - clear_tsk_thread_flag(prev, TIF_NOHZ); > - set_tsk_thread_flag(next, TIF_NOHZ); > - } > -} > -#endif /* #ifdef CONFIG_RCU_USER_QS */ > - > #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) > > /* > diff --git a/kernel/sched/core.c b/kernel/sched/core.c > index 36f2608..80f80df 100644 > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -72,6 +72,7 @@ > #include <linux/slab.h> > #include <linux/init_task.h> > #include <linux/binfmts.h> > +#include <linux/context_tracking.h> > > #include <asm/switch_to.h> > #include <asm/tlb.h> > @@ -1886,8 +1887,8 @@ context_switch(struct rq *rq, struct task_struct *prev, > spin_release(&rq->lock.dep_map, 1, _THIS_IP_); > #endif > > + context_tracking_task_switch(prev, next); > /* Here we just switch the register state and the stack. */ > - rcu_user_hooks_switch(prev, next); > switch_to(prev, next, prev); > > barrier(); > @@ -2911,7 +2912,7 @@ asmlinkage void __sched schedule(void) > } > EXPORT_SYMBOL(schedule); > > -#ifdef CONFIG_RCU_USER_QS > +#ifdef CONFIG_CONTEXT_TRACKING > asmlinkage void __sched schedule_user(void) > { > /* > @@ -2920,9 +2921,9 @@ asmlinkage void __sched schedule_user(void) > * we haven't yet exited the RCU idle mode. Do it here manually until > * we find a better solution. > */ > - rcu_user_exit(); > + user_exit(); > schedule(); > - rcu_user_enter(); > + user_enter(); > } > #endif > > @@ -3027,7 +3028,7 @@ asmlinkage void __sched preempt_schedule_irq(void) > /* Catch callers which need to be fixed */ > BUG_ON(ti->preempt_count || !irqs_disabled()); > > - rcu_user_exit(); > + user_exit(); > do { > add_preempt_count(PREEMPT_ACTIVE); > local_irq_enable(); > -- > 1.7.5.4 > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/