On Fri, Oct 04, 2013 at 08:46:40PM +0200, Oleg Nesterov wrote: > Add rcu_sync_struct->exclusive boolean set by rcu_sync_init(), > it obviously controls the exclusiveness of rcu_sync_enter(). > This is what percpu_down_write() actually wants. > > We turn ->gp_wait into "struct completion gp_comp", it is used > as a resource counter in "exclusive" mode. Otherwise we only use > its completion->wait member for wait_event/wake_up_all. We never > mix the completion/wait_queue_head_t operations. > > Note: it would be more clean to do __complete_locked() under > ->rss_lock in rcu_sync_exit() in the "else" branch, but we don't > have this trivial helper.
Something equivalent in available functions would be: rss->gp_comp.done++; __wake_up_locked_key(&rss->gp_comp.wait, TASK_NORMAL, NULL); > struct rcu_sync_struct { > int gp_state; > int gp_count; > - wait_queue_head_t gp_wait; > + struct completion gp_comp; > > int cb_state; > struct rcu_head cb_head; > > + bool exclusive; > struct rcu_sync_ops *ops; > }; I suppose we have a hole before or after cb_state to fit exclusive in., now it looks like we're going to create another hole before the *ops pointer. > @@ -4,7 +4,7 @@ > enum { GP_IDLE = 0, GP_PENDING, GP_PASSED }; > enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY }; > > -#define rss_lock gp_wait.lock > +#define rss_lock gp_comp.wait.lock Should we, for convenience, also do: #define rss_wait gp_comp.wait > #ifdef CONFIG_PROVE_RCU > #define __INIT_HELD(func) .held = func, > @@ -33,11 +33,13 @@ struct rcu_sync_ops rcu_sync_ops_array[] = { > }, > }; > > -void rcu_sync_init(struct rcu_sync_struct *rss, enum rcu_sync_type type) > +void rcu_sync_init(struct rcu_sync_struct *rss, > + enum rcu_sync_type type, bool excl) > { > memset(rss, 0, sizeof(*rss)); > - init_waitqueue_head(&rss->gp_wait); > + init_completion(&rss->gp_comp); > rss->ops = rcu_sync_ops_array + type; > + rss->exclusive = excl; > } > > void rcu_sync_enter(struct rcu_sync_struct *rss) > @@ -56,9 +58,13 @@ void rcu_sync_enter(struct rcu_sync_struct *rss) > if (need_sync) { > rss->ops->sync(); > rss->gp_state = GP_PASSED; > - wake_up_all(&rss->gp_wait); > + if (!rss->exclusive) > + wake_up_all(&rss->gp_comp.wait); > } else if (need_wait) { > - wait_event(rss->gp_wait, rss->gp_state == GP_PASSED); > + if (!rss->exclusive) > + wait_event(rss->gp_comp.wait, rss->gp_state == > GP_PASSED); > + else > + wait_for_completion(&rss->gp_comp); I'm still not entirely sure why we need the completion; we already have the gp_count variable and a waitqueue; together those should be able to implement the condition/semaphore variable, no? wait_for_completion: spin_lock_irq(&rss->rss_lock); if (rss->gp_count > 0) { __wait_event_locked(rss->gp_wait, (rss->gp_count > 0), TASK_UNINTERRUPTIBLE, 1, 0); } spin_unlock_irq(&rss->rss_lock); complete: bool excl = rss->excl; spin_lock_irq(&rss->rss_lock); if (!--rss_gp_count) { __wake_up_locked_key(&rss->gp_comp.wait, TASK_NORMAL, NULL); } spin_unlock_irq(&rss->rss_lock); All we would need would be something like: --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -422,7 +422,7 @@ do { \ }) -#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \ +#define __wait_event_locked(wq, condition, state, exclusive, irq) \ ({ \ int __ret = 0; \ DEFINE_WAIT(__wait); \ @@ -431,8 +431,8 @@ do { \ do { \ if (likely(list_empty(&__wait.task_list))) \ __add_wait_queue_tail(&(wq), &__wait); \ - set_current_state(TASK_INTERRUPTIBLE); \ - if (signal_pending(current)) { \ + set_current_state(state); \ + if (___wait_signal_pending(state)) { \ __ret = -ERESTARTSYS; \ break; \ } \ @@ -451,6 +451,8 @@ do { \ __ret; \ }) +#define __wait_event_interruptible_locked(wq, condition, exclusive, irq)\ + __wait_event_locked(wq, condition, TASK_INTERRUPTIBLE, exclusive, irq) /** * wait_event_interruptible_locked - sleep until a condition gets true -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/