On Tue, Jun 16, 2015 at 02:38:53PM +0200, Daniel Wagner wrote: > On 06/16/2015 02:27 PM, Paul E. McKenney wrote: > > On Mon, Jun 15, 2015 at 10:45:05PM -0700, Alexei Starovoitov wrote: > >> On 6/15/15 7:14 PM, Paul E. McKenney wrote: > >>> > >>> Why do you believe that it is better to fix it within call_rcu()? > >> > >> found it: > >> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c > >> index 8cf7304b2867..a3be09d482ae 100644 > >> --- a/kernel/rcu/tree.c > >> +++ b/kernel/rcu/tree.c > >> @@ -935,9 +935,9 @@ bool notrace rcu_is_watching(void) > >> { > >> bool ret; > >> > >> - preempt_disable(); > >> + preempt_disable_notrace(); > >> ret = __rcu_is_watching(); > >> - preempt_enable(); > >> + preempt_enable_notrace(); > >> return ret; > >> } > >> > >> the rcu_is_watching() and __rcu_is_watching() are already marked > >> notrace, so imo it's a good 'fix'. > >> What was happening is that the above preempt_enable was triggering > >> recursive call_rcu that was indeed messing 'rdp' that was > >> prepared by __call_rcu and before __call_rcu_core could use that. > > > >> btw, also noticed that local_irq_save done by note_gp_changes > >> is partially redundant. In __call_rcu_core path the irqs are > >> already disabled. > > > > But you said earlier that nothing happened when interrupts were > > disabled. And interrupts are disabled across the call to > > rcu_is_watching() in __call_rcu_core(). So why did those calls > > to preempt_disable() and preempt_enable() cause trouble? > > Just for the record: Using a thread for freeing the memory is curing the > problem without the need to modify rcu_is_watching.
I must confess to liking this approach better than guaranteeing full-up reentrancy in call_rcu() and kfree_rcu(). ;-) Thanx, Paul > Here is the hack: > > diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c > index 83c209d..8d73be3 100644 > --- a/kernel/bpf/hashtab.c > +++ b/kernel/bpf/hashtab.c > @@ -13,6 +13,8 @@ > #include <linux/jhash.h> > #include <linux/filter.h> > #include <linux/vmalloc.h> > +#include <linux/kthread.h> > +#include <linux/spinlock.h> > > struct bpf_htab { > struct bpf_map map; > @@ -27,10 +29,14 @@ struct bpf_htab { > struct htab_elem { > struct hlist_node hash_node; > struct rcu_head rcu; > + struct list_head list; > u32 hash; > char key[0] __aligned(8); > }; > > +static LIST_HEAD(elem_freelist); > +static DEFINE_SPINLOCK(elem_freelist_lock); > + > /* Called from syscall */ > static struct bpf_map *htab_map_alloc(union bpf_attr *attr) > { > @@ -228,6 +234,7 @@ static int htab_map_update_elem(struct bpf_map *map, > void *key, void *value, > memcpy(l_new->key + round_up(key_size, 8), value, map->value_size); > > l_new->hash = htab_map_hash(l_new->key, key_size); > + INIT_LIST_HEAD(&l_new->list); > > /* bpf_map_update_elem() can be called in_irq() */ > spin_lock_irqsave(&htab->lock, flags); > @@ -300,11 +307,17 @@ static int htab_map_delete_elem(struct bpf_map > *map, void *key) > if (l) { > hlist_del_rcu(&l->hash_node); > htab->count--; > - kfree_rcu(l, rcu); > + /* kfree_rcu(l, rcu); */ > ret = 0; > } > > spin_unlock_irqrestore(&htab->lock, flags); > + > + if (l) { > + spin_lock_irqsave(&elem_freelist_lock, flags); > + list_add(&l->list, &elem_freelist); > + spin_unlock_irqrestore(&elem_freelist_lock, flags); > + } > return ret; > } > > @@ -359,9 +372,31 @@ static struct bpf_map_type_list htab_type > __read_mostly = { > .type = BPF_MAP_TYPE_HASH, > }; > > +static int free_thread(void *arg) > +{ > + unsigned long flags; > + struct htab_elem *l; > + > + while (!kthread_should_stop()) { > + spin_lock_irqsave(&elem_freelist_lock, flags); > + while (!list_empty(&elem_freelist)) { > + l = list_entry(elem_freelist.next, > + struct htab_elem, list); > + list_del(&l->list); > + kfree(l); > + } > + spin_unlock_irqrestore(&elem_freelist_lock, flags); > + } > + > + return 0; > +} > + > static int __init register_htab_map(void) > { > bpf_register_map_type(&htab_type); > + > + kthread_run(free_thread, NULL, "free_thread"); > + > return 0; > } > late_initcall(register_htab_map); > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/