Hi Peterz

Thanks a lot for the overwhelming response :)

I will look into the change history to have a pervasive understanding of
the framework.


On Wed, May 18, 2016 at 02:40:52PM +0200, Peter Zijlstra wrote:
> On Wed, May 18, 2016 at 02:30:44PM +0200, Peter Zijlstra wrote:
> >  void cpu_idle (void)
> >  {
> > +   int cpu = smp_processor_id();
> > +
> >     /* endless idle loop with no priority at all */
> >     while (1) {
> >             while (!need_resched()) {
> >                     void (*idle)(void);
> > -                   /*
> > -                    * Mark this as an RCU critical section so that
> > -                    * synchronize_kernel() in the unload path waits
> > -                    * for our completion.
> > -                    */
> > -                   rcu_read_lock();
> > +
> > +                   if (cpu_isset(cpu, cpu_idle_map))
> > +                           cpu_clear(cpu, cpu_idle_map);
> > +                   rmb();
> >                     idle = pm_idle;
> >  
> >                     if (!idle)
> >                             idle = default_idle;
> >  
> > -                   irq_stat[smp_processor_id()].idle_timestamp = jiffies;
> > +                   irq_stat[cpu].idle_timestamp = jiffies;
> >                     idle();
> > -                   rcu_read_unlock();
> >             }
> >             schedule();
> >     }
> >  }
> >  
> > +void cpu_idle_wait(void)
> > +{
> > +   int cpu;
> > +   cpumask_t map;
> > +
> > +   for_each_online_cpu(cpu)
> > +           cpu_set(cpu, cpu_idle_map);
> > +
> > +   wmb();
> > +   do {
> > +           ssleep(1);
> > +           cpus_and(map, cpu_idle_map, cpu_online_map);
> > +   } while (!cpus_empty(map));
> > +}
> > +EXPORT_SYMBOL_GPL(cpu_idle_wait);
> 
> 
> Which then got 'wrecked' by the below commit.
> 
> That commit removes the cpu_idle_state, and thereby removes the need for
> the rmb(), since you cannot 'order' one load.
> 
> All the idle loop needs to guarantee (and in today's code that's
> non-obvious) is that it _must_ reload all values on every loop.
> 
> 
> ---
> commit 783e391b7b5b273cd20856d8f6f4878da8ec31b3
> Author: Venki Pallipadi <venkatesh.pallip...@intel.com>
> Date:   Thu Apr 10 09:49:58 2008 -0700
> 
>     x86: Simplify cpu_idle_wait
>     
>     This patch also resolves hangs on boot:
>       http://lkml.org/lkml/2008/2/23/263
>       http://bugzilla.kernel.org/show_bug.cgi?id=10093
>     
>     The bug was causing once-in-few-reboots 10-15 sec wait during boot on
>     certain laptops.
>     
>     Earlier commit 40d6a146629b98d8e322b6f9332b182c7cbff3df added
>     smp_call_function in cpu_idle_wait() to kick cpus that are in tickless
>     idle.  Looking at cpu_idle_wait code at that time, code seemed to be
>     over-engineered for a case which is rarely used (while changing idle
>     handler).
>     
>     Below is a simplified version of cpu_idle_wait, which just makes a dummy
>     smp_call_function to all cpus, to make them come out of old idle handler
>     and start using the new idle handler.  It eliminates code in the idle
>     loop to handle cpu_idle_wait.
>     
>     Signed-off-by: Venkatesh Pallipadi <venkatesh.pallip...@intel.com>
>     Signed-off-by: Linus Torvalds <torva...@linux-foundation.org>
> 
> diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
> index be3c7a299f02..43930e73f657 100644
> --- a/arch/x86/kernel/process_32.c
> +++ b/arch/x86/kernel/process_32.c
> @@ -82,7 +82,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
>   */
>  void (*pm_idle)(void);
>  EXPORT_SYMBOL(pm_idle);
> -static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
>  
>  void disable_hlt(void)
>  {
> @@ -190,9 +189,6 @@ void cpu_idle(void)
>               while (!need_resched()) {
>                       void (*idle)(void);
>  
> -                     if (__get_cpu_var(cpu_idle_state))
> -                             __get_cpu_var(cpu_idle_state) = 0;
> -
>                       check_pgt_cache();
>                       rmb();
>                       idle = pm_idle;
> @@ -220,40 +216,19 @@ static void do_nothing(void *unused)
>  {
>  }
>  
> +/*
> + * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
> + * pm_idle and update to new pm_idle value. Required while changing pm_idle
> + * handler on SMP systems.
> + *
> + * Caller must have changed pm_idle to the new value before the call. Old
> + * pm_idle value will not be used by any CPU after the return of this 
> function.
> + */
>  void cpu_idle_wait(void)
>  {
> -     unsigned int cpu, this_cpu = get_cpu();
> -     cpumask_t map, tmp = current->cpus_allowed;
> -
> -     set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
> -     put_cpu();
> -
> -     cpus_clear(map);
> -     for_each_online_cpu(cpu) {
> -             per_cpu(cpu_idle_state, cpu) = 1;
> -             cpu_set(cpu, map);
> -     }
> -
> -     __get_cpu_var(cpu_idle_state) = 0;
> -
> -     wmb();
> -     do {
> -             ssleep(1);
> -             for_each_online_cpu(cpu) {
> -                     if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, 
> cpu))
> -                             cpu_clear(cpu, map);
> -             }
> -             cpus_and(map, map, cpu_online_map);
> -             /*
> -              * We waited 1 sec, if a CPU still did not call idle
> -              * it may be because it is in idle and not waking up
> -              * because it has nothing to do.
> -              * Give all the remaining CPUS a kick.
> -              */
> -             smp_call_function_mask(map, do_nothing, NULL, 0);
> -     } while (!cpus_empty(map));
> -
> -     set_cpus_allowed(current, tmp);
> +     smp_mb();
> +     /* kick all the CPUs so that they exit out of pm_idle */
> +     smp_call_function(do_nothing, NULL, 0, 1);
>  }
>  EXPORT_SYMBOL_GPL(cpu_idle_wait);
>  
> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
> index 3baf9b9f4c87..46c4c546b499 100644
> --- a/arch/x86/kernel/process_64.c
> +++ b/arch/x86/kernel/process_64.c
> @@ -63,7 +63,6 @@ EXPORT_SYMBOL(boot_option_idle_override);
>   */
>  void (*pm_idle)(void);
>  EXPORT_SYMBOL(pm_idle);
> -static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
>  
>  static ATOMIC_NOTIFIER_HEAD(idle_notifier);
>  
> @@ -173,9 +172,6 @@ void cpu_idle(void)
>               while (!need_resched()) {
>                       void (*idle)(void);
>  
> -                     if (__get_cpu_var(cpu_idle_state))
> -                             __get_cpu_var(cpu_idle_state) = 0;
> -
>                       rmb();
>                       idle = pm_idle;
>                       if (!idle)
> @@ -207,40 +203,19 @@ static void do_nothing(void *unused)
>  {
>  }
>  
> +/*
> + * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
> + * pm_idle and update to new pm_idle value. Required while changing pm_idle
> + * handler on SMP systems.
> + *
> + * Caller must have changed pm_idle to the new value before the call. Old
> + * pm_idle value will not be used by any CPU after the return of this 
> function.
> + */
>  void cpu_idle_wait(void)
>  {
> -     unsigned int cpu, this_cpu = get_cpu();
> -     cpumask_t map, tmp = current->cpus_allowed;
> -
> -     set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
> -     put_cpu();
> -
> -     cpus_clear(map);
> -     for_each_online_cpu(cpu) {
> -             per_cpu(cpu_idle_state, cpu) = 1;
> -             cpu_set(cpu, map);
> -     }
> -
> -     __get_cpu_var(cpu_idle_state) = 0;
> -
> -     wmb();
> -     do {
> -             ssleep(1);
> -             for_each_online_cpu(cpu) {
> -                     if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, 
> cpu))
> -                             cpu_clear(cpu, map);
> -             }
> -             cpus_and(map, map, cpu_online_map);
> -             /*
> -              * We waited 1 sec, if a CPU still did not call idle
> -              * it may be because it is in idle and not waking up
> -              * because it has nothing to do.
> -              * Give all the remaining CPUS a kick.
> -              */
> -             smp_call_function_mask(map, do_nothing, 0, 0);
> -     } while (!cpus_empty(map));
> -
> -     set_cpus_allowed(current, tmp);
> +     smp_mb();
> +     /* kick all the CPUs so that they exit out of pm_idle */
> +     smp_call_function(do_nothing, NULL, 0, 1);
>  }
>  EXPORT_SYMBOL_GPL(cpu_idle_wait);
>  

Reply via email to