On Wed, Nov 14, 2018 at 03:45:52AM +0100, Frederic Weisbecker wrote:
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index d458d65..27e0544 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -265,6 +265,8 @@ struct task_cputime {
>  enum vtime_state {
>       /* Task is sleeping or running in a CPU with VTIME inactive: */
>       VTIME_INACTIVE = 0,
> +     /* Task has passed exit_notify() */
> +     VTIME_DEAD,

How does it make sense for VTIME_DEAD > VTIME_INACTIVE ?

>       /* Task is idle */
>       VTIME_IDLE,
>       /* Task runs in kernelspace in a CPU with VTIME active: */


> diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
> index f64afd7..a0c3a82 100644
> --- a/kernel/sched/cputime.c
> +++ b/kernel/sched/cputime.c
> @@ -813,17 +813,31 @@ void vtime_task_switch_generic(struct task_struct *prev)
>  {
>       struct vtime *vtime = &prev->vtime;
>  
> -     write_seqcount_begin(&vtime->seqcount);
> -     if (vtime->state == VTIME_IDLE)
> -             vtime_account_idle(prev);
> -     else
> -             __vtime_account_kernel(prev, vtime);
> -     vtime->state = VTIME_INACTIVE;
> -     vtime->cpu = -1;
> -     write_seqcount_end(&vtime->seqcount);
> +     /*
> +      * Flush the prev task vtime, unless it has passed
> +      * vtime_exit_task(), in which case there is nothing
> +      * left to account.
> +      */
> +     if (vtime->state != VTIME_DEAD) {
> +             write_seqcount_begin(&vtime->seqcount);
> +             if (vtime->state == VTIME_IDLE)
> +                     vtime_account_idle(prev);
> +             else
> +                     __vtime_account_kernel(prev, vtime);
> +             vtime->state = VTIME_INACTIVE;
> +             vtime->cpu = -1;
> +             write_seqcount_end(&vtime->seqcount);
> +     }
>  
>       vtime = &current->vtime;
>  
> +     /*
> +      * Ignore the next task if it has been preempted after
> +      * vtime_exit_task().
> +      */
> +     if (vtime->state == VTIME_DEAD)
> +             return;
> +
>       write_seqcount_begin(&vtime->seqcount);
>       if (is_idle_task(current))
>               vtime->state = VTIME_IDLE;

Bit inconsistent; having the one as a indent and the other as an early
return.

> @@ -850,6 +864,30 @@ void vtime_init_idle(struct task_struct *t, int cpu)
>       local_irq_restore(flags);
>  }
>  
> +/*
> + * This is the final settlement point after which we don't account
> + * anymore vtime for this task.
> + */
> +void vtime_exit_task(struct task_struct *t)
> +{
> +     struct vtime *vtime = &t->vtime;
> +     unsigned long flags;

Note that the code in vtime_task_switch_generic() (above) relies on @t
== current (which is true, but not explicit).

> +     local_irq_save(flags);
> +     write_seqcount_begin(&vtime->seqcount);
> +     /*
> +      * A task that has never run on a nohz_full CPU hasn't
> +      * been tracked by vtime. Thus it's in VTIME_INACTIVE
> +      * state. Nothing to account for it.
> +      */
> +     if (vtime->state != VTIME_INACTIVE)
> +             vtime_account_system(t, vtime);
> +     vtime->state = VTIME_DEAD;
> +     vtime->cpu = -1;
> +     write_seqcount_end(&vtime->seqcount);
> +     local_irq_restore(flags);
> +}
> +
>  u64 task_gtime(struct task_struct *t)
>  {
>       struct vtime *vtime = &t->vtime;
> -- 
> 2.7.4
> 

Reply via email to