Hi Qais,
here are some more 2c from me...

On Mon, Jun 29, 2020 at 18:26:33 +0200, Qais Yousef <qais.you...@arm.com> 
wrote...

[...]

> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 235b2cae00a0..8d80d6091d86 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -794,6 +794,26 @@ unsigned int sysctl_sched_uclamp_util_max = 
> SCHED_CAPACITY_SCALE;
>  /* All clamps are required to be less or equal than these values */
>  static struct uclamp_se uclamp_default[UCLAMP_CNT];
>  
> +/*
> + * This static key is used to reduce the uclamp overhead in the fast path. It
> + * primarily disables the call to uclamp_rq_{inc, dec}() in
> + * enqueue/dequeue_task().
> + *
> + * This allows users to continue to enable uclamp in their kernel config with
> + * minimum uclamp overhead in the fast path.
> + *
> + * As soon as userspace modifies any of the uclamp knobs, the static key is
> + * enabled, since we have an actual users that make use of uclamp
> + * functionality.
> + *
> + * The knobs that would enable this static key are:
> + *
> + *   * A task modifying its uclamp value with sched_setattr().
> + *   * An admin modifying the sysctl_sched_uclamp_{min, max} via procfs.
> + *   * An admin modifying the cgroup cpu.uclamp.{min, max}

I guess this list can be obtained with a grep or git changelog, moreover
this text will require maintenance.

What about replacing this full comment with something shorted like:

---8<---
      Static key to reduce uclamp overhead in the fast path by disabling
      calls to uclamp_rq_{inc, dec}().
---8<---

> + */
> +DEFINE_STATIC_KEY_FALSE(sched_uclamp_used);
> +
>  /* Integer rounded range for each bucket */
>  #define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, 
> UCLAMP_BUCKETS)
>  
> @@ -994,9 +1014,30 @@ static inline void uclamp_rq_dec_id(struct rq *rq, 
> struct task_struct *p,
>       lockdep_assert_held(&rq->lock);
>  
>       bucket = &uc_rq->bucket[uc_se->bucket_id];
> -     SCHED_WARN_ON(!bucket->tasks);
> +
> +     /*
> +      * bucket->tasks could be zero if sched_uclamp_used was enabled while
> +      * the current task was running, hence we could end up with unbalanced
> +      * call to uclamp_rq_dec_id().
> +      *
> +      * Need to be careful of the following enqeueue/dequeue order
> +      * problem too
> +      *
> +      *      enqueue(taskA)
> +      *      // sched_uclamp_used gets enabled
> +      *      enqueue(taskB)
> +      *      dequeue(taskA)
> +      *      // bucket->tasks is now 0
> +      *      dequeue(taskB)
> +      *
> +      * where we could end up with uc_se->active of the task set to true and
> +      * the wrong bucket[uc_se->bucket_id].value.
> +      *
> +      * Hence always make sure we reset things properly.
> +      */
>       if (likely(bucket->tasks))
>               bucket->tasks--;
> +
>       uc_se->active = false;

Better than v4, what about just using this active flag?

---8<---
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8f360326861e..465a7645713b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -990,6 +990,13 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct 
task_struct *p,
 
        lockdep_assert_held(&rq->lock);
 
+       /*
+        * If a task was already enqueue at uclamp enable time
+        * nothing has been accounted for it.
+        */
+       if (unlikely(!uc_se->active))
+               return;
+
        bucket = &uc_rq->bucket[uc_se->bucket_id];
        SCHED_WARN_ON(!bucket->tasks);
        if (likely(bucket->tasks))
---8<---

This will allow also to keep in all the ref count checks we have,
e.g. the SChed_WARN_ON().


>       /*
> @@ -1032,6 +1073,13 @@ static inline void uclamp_rq_inc(struct rq *rq, struct 
> task_struct *p)
>  {
>       enum uclamp_id clamp_id;
>  
> +     /*
> +      * Avoid any overhead until uclamp is actually used by the userspace.
> +      * Including the branch if we use static_branch_likely()

I still find this last sentence hard to parse, but perhaps it's just me
still missing a breakfast :)

> +      */
> +     if (!static_branch_unlikely(&sched_uclamp_used))
> +             return;

I'm also still wondering if the optimization is still working when we
have that ! in front.

Had a check at:

   
https://elixir.bootlin.com/linux/latest/source/include/linux/jump_label.h#L399

and AFAIU, it all boils down to cook a __branch_check()'s compiler hint,
and ISTR that those are "anti-patterns"?

That said we do have some usages for this pattern too:

$ git grep '!static_branch_unlikely' | wc -l       36
$ git grep 'static_branch_unlikely' | wc -l       220

?

> +
>       if (unlikely(!p->sched_class->uclamp_enabled))
>               return;
>  

[...]

> +/**
> + * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values.
> + * @rq:              The rq to clamp against. Must not be NULL.
> + * @util:    The util value to clamp.
> + * @p:               The task to clamp against. Can be NULL if you want to 
> clamp
> + *           against @rq only.
> + *
> + * Clamps the passed @util to the max(@rq, @p) effective uclamp values.
> + *
> + * If sched_uclamp_used static key is disabled, then just return the util
> + * without any clamping since uclamp aggregation at the rq level in the fast
> + * path is disabled, rendering this operation a NOP.
> + *
> + * Use uclamp_eff_value() if you don't care about uclamp values at rq level. 
> It
> + * will return the correct effective uclamp value of the task even if the
> + * static key is disabled.

Well, if you don't care about rq, you don't call a uclamp_rq_* method.

I would say that the above paragraph is redundant, moreover it adds some
cross-reference to a different method (name) which required maintenance.

What about removing it?

> + */
>  static __always_inline
>  unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
>                                 struct task_struct *p)
>  {
> -     unsigned long min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value);
> -     unsigned long max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
> +     unsigned long min_util;
> +     unsigned long max_util;
> +
> +     if (!static_branch_likely(&sched_uclamp_used))
> +             return util;
> +
> +     min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value);
> +     max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);

I think moving the initialization is not required, the compiler should
be smart enough to place theme where's better.

>       if (p) {
>               min_util = max(min_util, uclamp_eff_value(p, UCLAMP_MIN));
> @@ -2371,6 +2396,11 @@ unsigned long uclamp_rq_util_with(struct rq *rq, 
> unsigned long util,
>  
>       return clamp(util, min_util, max_util);
>  }
> +
> +static inline bool uclamp_is_enabled(void)
> +{
> +     return static_branch_likely(&sched_uclamp_used);
> +}

Looks like here we mix up terms, which can be confusing.
AFAIKS, we use:
- *_enabled for the sched class flags (compile time)
- *_used    for the user-space opting in (run time)

Thus, perhaps we can just use the same pattern used by the
sched_numa_balancing static key:

  $ git grep sched_numa_balancing
  kernel/sched/core.c:DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
  kernel/sched/core.c:            static_branch_enable(&sched_numa_balancing);
  kernel/sched/core.c:            static_branch_disable(&sched_numa_balancing);
  kernel/sched/core.c:    int state = 
static_branch_likely(&sched_numa_balancing);
  kernel/sched/fair.c:    if (!static_branch_likely(&sched_numa_balancing))
  kernel/sched/fair.c:    if (!static_branch_likely(&sched_numa_balancing))
  kernel/sched/fair.c:    if (!static_branch_likely(&sched_numa_balancing))
  kernel/sched/fair.c:    if (static_branch_unlikely(&sched_numa_balancing))
  kernel/sched/sched.h:extern struct static_key_false sched_numa_balancing;

IOW: unconditionally define sched_uclamp_used as non static in core.c,
and use it directly on schedutil too.

>  #else /* CONFIG_UCLAMP_TASK */
>  static inline
>  unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
> @@ -2378,6 +2408,11 @@ unsigned long uclamp_rq_util_with(struct rq *rq, 
> unsigned long util,
>  {
>       return util;
>  }
> +
> +static inline bool uclamp_is_enabled(void)
> +{
> +     return false;
> +}
>  #endif /* CONFIG_UCLAMP_TASK */
>  
>  #ifdef arch_scale_freq_capacity

Best,
Patrick

Reply via email to