On Thu, Jan 24, 2019 at 03:58:59PM -0800, Alexei Starovoitov wrote: > On Thu, Jan 24, 2019 at 07:01:09PM +0100, Peter Zijlstra wrote: > > > > Thanks for having kernel/locking people on Cc... > > > > On Wed, Jan 23, 2019 at 08:13:55PM -0800, Alexei Starovoitov wrote: > > > > > Implementation details: > > > - on !SMP bpf_spin_lock() becomes nop > > > > Because no BPF program is preemptible? I don't see any assertions or > > even a comment that says this code is non-preemptible. > > > > AFAICT some of the BPF_RUN_PROG things are under rcu_read_lock() only, > > which is not sufficient. > > nope. all bpf prog types disable preemption. That is must have for all > sorts of things to work properly. > If there is a prog type that doing rcu_read_lock only it's a serious bug. > About a year or so ago we audited everything specifically to make > sure everything disables preemption before calling bpf progs. > I'm pretty sure nothing crept in in the mean time.
Do we want something like (the completely untested) below to avoid having to manually audit this over and over? --- include/linux/filter.h | 2 +- include/linux/kernel.h | 9 +++++++-- kernel/sched/core.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index d531d4250bff..4ab51e78da36 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -513,7 +513,7 @@ struct sk_filter { struct bpf_prog *prog; }; -#define BPF_PROG_RUN(filter, ctx) (*(filter)->bpf_func)(ctx, (filter)->insnsi) +#define BPF_PROG_RUN(filter, ctx) ({ cant_sleep(); (*(filter)->bpf_func)(ctx, (filter)->insnsi); }) #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 8f0e68e250a7..f4cea3260a28 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -245,8 +245,10 @@ extern int _cond_resched(void); #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP - void ___might_sleep(const char *file, int line, int preempt_offset); - void __might_sleep(const char *file, int line, int preempt_offset); +extern void ___might_sleep(const char *file, int line, int preempt_offset); +extern void __might_sleep(const char *file, int line, int preempt_offset); +extern void __cant_sleep(const char *file, int line, int preempt_offset); + /** * might_sleep - annotation for functions that can sleep * @@ -259,6 +261,8 @@ extern int _cond_resched(void); */ # define might_sleep() \ do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) +# define cant_sleep() \ + do { __cant_sleep(__FILE__, __LINE__, 0); } while (0) # define sched_annotate_sleep() (current->task_state_change = 0) #else static inline void ___might_sleep(const char *file, int line, @@ -266,6 +270,7 @@ extern int _cond_resched(void); static inline void __might_sleep(const char *file, int line, int preempt_offset) { } # define might_sleep() do { might_resched(); } while (0) +# define cant_sleep() do { } while (0) # define sched_annotate_sleep() do { } while (0) #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ee7763641348..799c285f4e0f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6162,6 +6162,34 @@ void ___might_sleep(const char *file, int line, int preempt_offset) add_taint(TAINT_WARN, LOCKDEP_STILL_OK); } EXPORT_SYMBOL(___might_sleep); + +void __cant_sleep(const char *file, int line, int preempt_offset) +{ + static unsigned long prev_jiffy; + + if (irqs_disabled()) + return; + + if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) + return; + + if (preempt_count() > preempt_offset) + return; + + if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) + return; + prev_jiffy = jiffies; + + printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line); + printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", + in_atomic(), irqs_disabled(), + current->pid, current->comm); + + debug_show_held_locks(current); + dump_stack(); + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); +} +EXPORT_SYMBOL_GPL(__cant_sleep); #endif #ifdef CONFIG_MAGIC_SYSRQ