On Thu, Jul 27, 2017 at 02:59:43PM -0400, Mathieu Desnoyers wrote:
> diff --git a/kernel/membarrier.c b/kernel/membarrier.c
> index 9f9284f37f8d..8c6c0f96f617 100644
> --- a/kernel/membarrier.c
> +++ b/kernel/membarrier.c
> @@ -19,10 +19,81 @@
>  #include <linux/tick.h>
>  
>  /*
> + * XXX For cpu_rq(). Should we rather move
> + * membarrier_private_expedited() to sched/core.c or create
> + * sched/membarrier.c ?

The later perhaps.

> +static void membarrier_private_expedited(void)
> +{
> +     int cpu, this_cpu;
> +     cpumask_var_t tmpmask;
> +
> +     if (num_online_cpus() == 1)
> +             return;
> +
> +     /*
> +      * Matches memory barriers around rq->curr modification in
> +      * scheduler.
> +      */
> +     smp_mb();       /* system call entry is not a mb. */
> +
> +     if (!alloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {

Why GFP_NOWAIT ? and falback. There seems to be a desire to make this a
nonblocking syscall. Should we document this somewhere?

> +             /* Fallback for OOM. */
> +             membarrier_private_expedited_ipi_each();
> +             goto end;
> +     }
> +
> +     this_cpu = raw_smp_processor_id();

This is a tad dodgy, you might want to put in a comment on how migrating
this thread is ok.

> +     for_each_online_cpu(cpu) {

One would also need cpus_read_lock() if you rely on the online mask.

> +             struct task_struct *p;
> +
> +             if (cpu == this_cpu)
> +                     continue;
> +             rcu_read_lock();
> +             p = task_rcu_dereference(&cpu_rq(cpu)->curr);
> +             if (p && p->mm == current->mm)
> +                     __cpumask_set_cpu(cpu, tmpmask);
> +             rcu_read_unlock();
> +     }
> +     smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
> +     free_cpumask_var(tmpmask);
> +end:
> +     /*
> +     * Memory barrier on the caller thread _after_ we finished
> +     * waiting for the last IPI. Matches memory barriers around
> +     * rq->curr modification in scheduler.
> +     */
> +     smp_mb();       /* exit from system call is not a mb */
> +}

> @@ -2737,6 +2757,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
>  
>       mm = next->mm;
>       oldmm = prev->active_mm;
> +     membarrier_expedited_mb_after_set_current(mm, oldmm);
>       /*
>        * For paravirt, this is coupled with an exit in switch_to to
>        * combine the page table reload and the switch backend into

As said on IRC, we have finish_task_switch()->if (mm)
mmdrop(mm)->atomic_dec_and_test() providing a smp_mb(). We just need to
deal with the !mm case.

Reply via email to