From: Rafael J. Wysocki <rafael.j.wyso...@intel.com>

A subsequent change set will introduce a new cpufreq governor using
CPU utilization information from the scheduler, so introduce
cpufreq_update_util() (again) to allow that information to be passed to
the new governor and make cpufreq_trigger_update() call it internally.

To that end, add a new ->update_util callback pointer to struct
freq_update_hook to be set by entities that want to use the util
and max arguments and make cpufreq_update_util() use that callback
if available or the ->func callback that only takes the time argument
otherwise.

In addition to that, arrange helpers to set/clear the utilization
update hooks in such a way that the full ->update_util callbacks
can only be set by code inside the kernel/sched/ directory.

Update the current users of cpufreq_set_freq_update_hook() to use
the new helpers.

Signed-off-by: Rafael J. Wysocki <rafael.j.wyso...@intel.com>
---

New patch.  Maybe slightly over the top, but at least it should be clear
who uses the util and max arguments and who doesn't use them after it.

---
 drivers/cpufreq/cpufreq_governor.c |   76 +++++++++++++--------------
 drivers/cpufreq/intel_pstate.c     |    8 +-
 include/linux/sched.h              |   10 +--
 kernel/sched/cpufreq.c             |  101 +++++++++++++++++++++++++++++--------
 kernel/sched/fair.c                |    8 ++
 kernel/sched/sched.h               |   16 +++++
 6 files changed, 150 insertions(+), 69 deletions(-)

Index: linux-pm/include/linux/sched.h
===================================================================
--- linux-pm.orig/include/linux/sched.h
+++ linux-pm/include/linux/sched.h
@@ -2363,15 +2363,15 @@ static inline bool sched_can_stop_tick(v
 #endif
 
 #ifdef CONFIG_CPU_FREQ
-void cpufreq_trigger_update(u64 time);
-
 struct freq_update_hook {
        void (*func)(struct freq_update_hook *hook, u64 time);
+       void (*update_util)(struct freq_update_hook *hook, u64 time,
+                           unsigned long util, unsigned long max);
 };
 
-void cpufreq_set_freq_update_hook(int cpu, struct freq_update_hook *hook);
-#else
-static inline void cpufreq_trigger_update(u64 time) {}
+void cpufreq_set_freq_update_hook(int cpu, struct freq_update_hook *hook,
+                       void (*func)(struct freq_update_hook *hook, u64 time));
+void cpufreq_clear_freq_update_hook(int cpu);
 #endif
 
 #ifdef CONFIG_SCHED_AUTOGROUP
Index: linux-pm/kernel/sched/cpufreq.c
===================================================================
--- linux-pm.orig/kernel/sched/cpufreq.c
+++ linux-pm/kernel/sched/cpufreq.c
@@ -9,12 +9,12 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/sched.h>
+#include "sched.h"
 
 static DEFINE_PER_CPU(struct freq_update_hook *, cpufreq_freq_update_hook);
 
 /**
- * cpufreq_set_freq_update_hook - Populate the CPU's freq_update_hook pointer.
+ * set_freq_update_hook - Populate the CPU's freq_update_hook pointer.
  * @cpu: The CPU to set the pointer for.
  * @hook: New pointer value.
  *
@@ -27,23 +27,96 @@ static DEFINE_PER_CPU(struct freq_update
  * accessed via the old update_util_data pointer or invoke synchronize_sched()
  * right after this function to avoid use-after-free.
  */
-void cpufreq_set_freq_update_hook(int cpu, struct freq_update_hook *hook)
+static void set_freq_update_hook(int cpu, struct freq_update_hook *hook)
 {
-       if (WARN_ON(hook && !hook->func))
+       rcu_assign_pointer(per_cpu(cpufreq_freq_update_hook, cpu), hook);
+}
+
+/**
+ * cpufreq_set_freq_update_hook - Set the CPU's frequency update callback.
+ * @cpu: The CPU to set the callback for.
+ * @hook: New freq_update_hook pointer value.
+ * @func: Callback function to use with the new hook.
+ */
+void cpufreq_set_freq_update_hook(int cpu, struct freq_update_hook *hook,
+                       void (*func)(struct freq_update_hook *hook, u64 time))
+{
+       if (WARN_ON(!hook || !func))
                return;
 
-       rcu_assign_pointer(per_cpu(cpufreq_freq_update_hook, cpu), hook);
+       hook->func = func;
+       set_freq_update_hook(cpu, hook);
 }
 EXPORT_SYMBOL_GPL(cpufreq_set_freq_update_hook);
 
 /**
+ * cpufreq_set_update_util_hook - Set the CPU's utilization update callback.
+ * @cpu: The CPU to set the callback for.
+ * @hook: New freq_update_hook pointer value.
+ * @update_util: Callback function to use with the new hook.
+ */
+void cpufreq_set_update_util_hook(int cpu, struct freq_update_hook *hook,
+               void (*update_util)(struct freq_update_hook *hook, u64 time,
+                                   unsigned long util, unsigned long max))
+{
+       if (WARN_ON(!hook || !update_util))
+               return;
+
+       hook->update_util = update_util;
+       set_freq_update_hook(cpu, hook);
+}
+EXPORT_SYMBOL_GPL(cpufreq_set_update_util_hook);
+
+/**
+ * cpufreq_set_update_util_hook - Clear the CPU's freq_update_hook pointer.
+ * @cpu: The CPU to clear the pointer for.
+ */
+void cpufreq_clear_freq_update_hook(int cpu)
+{
+       set_freq_update_hook(cpu, NULL);
+}
+EXPORT_SYMBOL_GPL(cpufreq_clear_freq_update_hook);
+
+/**
+ * cpufreq_update_util - Take a note about CPU utilization changes.
+ * @time: Current time.
+ * @util: CPU utilization.
+ * @max: CPU capacity.
+ *
+ * This function is called on every invocation of update_load_avg() on the CPU
+ * whose utilization is being updated.
+ *
+ * It can only be called from RCU-sched read-side critical sections.
+ */
+void cpufreq_update_util(u64 time, unsigned long util, unsigned long max)
+{
+       struct freq_update_hook *hook;
+
+#ifdef CONFIG_LOCKDEP
+       WARN_ON(debug_locks && !rcu_read_lock_sched_held());
+#endif
+
+       hook = rcu_dereference(*this_cpu_ptr(&cpufreq_freq_update_hook));
+       /*
+        * If this isn't inside of an RCU-sched read-side critical section, hook
+        * may become NULL after the check below.
+        */
+       if (hook) {
+               if (hook->update_util)
+                       hook->update_util(hook, time, util, max);
+               else
+                       hook->func(hook, time);
+       }
+}
+
+/**
  * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
  * @time: Current time.
  *
  * The way cpufreq is currently arranged requires it to evaluate the CPU
  * performance state (frequency/voltage) on a regular basis.  To facilitate
- * that, this function is called by update_load_avg() in CFS when executed for
- * the current CPU's runqueue.
+ * that, cpufreq_update_util() is called by update_load_avg() in CFS when
+ * executed for the current CPU's runqueue.
  *
  * However, this isn't sufficient to prevent the CPU from being stuck in a
  * completely inadequate performance level for too long, because the calls
@@ -57,17 +130,5 @@ EXPORT_SYMBOL_GPL(cpufreq_set_freq_updat
  */
 void cpufreq_trigger_update(u64 time)
 {
-       struct freq_update_hook *hook;
-
-#ifdef CONFIG_LOCKDEP
-       WARN_ON(debug_locks && !rcu_read_lock_sched_held());
-#endif
-
-       hook = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_freq_update_hook));
-       /*
-        * If this isn't inside of an RCU-sched read-side critical section, hook
-        * may become NULL after the check below.
-        */
-       if (hook)
-               hook->func(hook, time);
+       cpufreq_update_util(time, ULONG_MAX, 0);
 }
Index: linux-pm/kernel/sched/fair.c
===================================================================
--- linux-pm.orig/kernel/sched/fair.c
+++ linux-pm/kernel/sched/fair.c
@@ -2839,6 +2839,8 @@ static inline void update_load_avg(struc
                update_tg_load_avg(cfs_rq, 0);
 
        if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
+               unsigned long max = rq->cpu_capacity_orig;
+
                /*
                 * There are a few boundary cases this might miss but it should
                 * get called often enough that that should (hopefully) not be
@@ -2847,9 +2849,11 @@ static inline void update_load_avg(struc
                 * the next tick/schedule should update.
                 *
                 * It will not get called when we go idle, because the idle
-                * thread is a different class (!fair).
+                * thread is a different class (!fair), nor will the utilization
+-               * number include things like RT tasks.
                 */
-               cpufreq_trigger_update(rq_clock(rq));
+               cpufreq_update_util(rq_clock(rq),
+                                   min(cfs_rq->avg.util_avg, max), max);
        }
 }
 
Index: linux-pm/kernel/sched/sched.h
===================================================================
--- linux-pm.orig/kernel/sched/sched.h
+++ linux-pm/kernel/sched/sched.h
@@ -1739,3 +1739,19 @@ static inline u64 irq_time_read(int cpu)
 }
 #endif /* CONFIG_64BIT */
 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
+
+#ifdef CONFIG_CPU_FREQ
+void cpufreq_update_util(u64 time, unsigned long util, unsigned long max);
+void cpufreq_trigger_update(u64 time);
+void cpufreq_set_update_util_hook(int cpu, struct freq_update_hook *hook,
+               void (*update_util)(struct freq_update_hook *hook, u64 time,
+                                   unsigned long util, unsigned long max));
+static inline void cpufreq_clear_update_util_hook(int cpu)
+{
+       cpufreq_clear_freq_update_hook(cpu);
+}
+#else
+static inline void cpufreq_update_util(u64 time, unsigned long util,
+                                      unsigned long max) {}
+static inline void cpufreq_trigger_update(u64 time) {}
+#endif /* CONFIG_CPU_FREQ */
Index: linux-pm/drivers/cpufreq/intel_pstate.c
===================================================================
--- linux-pm.orig/drivers/cpufreq/intel_pstate.c
+++ linux-pm/drivers/cpufreq/intel_pstate.c
@@ -1088,8 +1088,8 @@ static int intel_pstate_init_cpu(unsigne
        intel_pstate_busy_pid_reset(cpu);
        intel_pstate_sample(cpu, 0);
 
-       cpu->update_hook.func = intel_pstate_freq_update;
-       cpufreq_set_freq_update_hook(cpunum, &cpu->update_hook);
+       cpufreq_set_freq_update_hook(cpunum, &cpu->update_hook,
+                                    intel_pstate_freq_update);
 
        pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
 
@@ -1173,7 +1173,7 @@ static void intel_pstate_stop_cpu(struct
 
        pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);
 
-       cpufreq_set_freq_update_hook(cpu_num, NULL);
+       cpufreq_clear_freq_update_hook(cpu_num);
        synchronize_sched();
 
        if (hwp_active)
@@ -1441,7 +1441,7 @@ out:
        get_online_cpus();
        for_each_online_cpu(cpu) {
                if (all_cpu_data[cpu]) {
-                       cpufreq_set_freq_update_hook(cpu, NULL);
+                       cpufreq_clear_freq_update_hook(cpu);
                        synchronize_sched();
                        kfree(all_cpu_data[cpu]);
                }
Index: linux-pm/drivers/cpufreq/cpufreq_governor.c
===================================================================
--- linux-pm.orig/drivers/cpufreq/cpufreq_governor.c
+++ linux-pm/drivers/cpufreq/cpufreq_governor.c
@@ -211,43 +211,6 @@ unsigned int dbs_update(struct cpufreq_p
 }
 EXPORT_SYMBOL_GPL(dbs_update);
 
-static void gov_set_freq_update_hooks(struct policy_dbs_info *policy_dbs,
-                               unsigned int delay_us)
-{
-       struct cpufreq_policy *policy = policy_dbs->policy;
-       int cpu;
-
-       gov_update_sample_delay(policy_dbs, delay_us);
-       policy_dbs->last_sample_time = 0;
-
-       for_each_cpu(cpu, policy->cpus) {
-               struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu);
-
-               cpufreq_set_freq_update_hook(cpu, &cdbs->update_hook);
-       }
-}
-
-static inline void gov_clear_freq_update_hooks(struct cpufreq_policy *policy)
-{
-       int i;
-
-       for_each_cpu(i, policy->cpus)
-               cpufreq_set_freq_update_hook(i, NULL);
-
-       synchronize_sched();
-}
-
-static void gov_cancel_work(struct cpufreq_policy *policy)
-{
-       struct policy_dbs_info *policy_dbs = policy->governor_data;
-
-       gov_clear_freq_update_hooks(policy_dbs->policy);
-       irq_work_sync(&policy_dbs->irq_work);
-       cancel_work_sync(&policy_dbs->work);
-       atomic_set(&policy_dbs->work_count, 0);
-       policy_dbs->work_in_progress = false;
-}
-
 static void dbs_work_handler(struct work_struct *work)
 {
        struct policy_dbs_info *policy_dbs;
@@ -334,6 +297,44 @@ static void dbs_freq_update_handler(stru
        irq_work_queue(&policy_dbs->irq_work);
 }
 
+static void gov_set_freq_update_hooks(struct policy_dbs_info *policy_dbs,
+                               unsigned int delay_us)
+{
+       struct cpufreq_policy *policy = policy_dbs->policy;
+       int cpu;
+
+       gov_update_sample_delay(policy_dbs, delay_us);
+       policy_dbs->last_sample_time = 0;
+
+       for_each_cpu(cpu, policy->cpus) {
+               struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu);
+
+               cpufreq_set_freq_update_hook(cpu, &cdbs->update_hook,
+                                            dbs_freq_update_handler);
+       }
+}
+
+static inline void gov_clear_freq_update_hooks(struct cpufreq_policy *policy)
+{
+       int i;
+
+       for_each_cpu(i, policy->cpus)
+               cpufreq_clear_freq_update_hook(i);
+
+       synchronize_sched();
+}
+
+static void gov_cancel_work(struct cpufreq_policy *policy)
+{
+       struct policy_dbs_info *policy_dbs = policy->governor_data;
+
+       gov_clear_freq_update_hooks(policy_dbs->policy);
+       irq_work_sync(&policy_dbs->irq_work);
+       cancel_work_sync(&policy_dbs->work);
+       atomic_set(&policy_dbs->work_count, 0);
+       policy_dbs->work_in_progress = false;
+}
+
 static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy 
*policy,
                                                     struct dbs_governor *gov)
 {
@@ -356,7 +357,6 @@ static struct policy_dbs_info *alloc_pol
                struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
 
                j_cdbs->policy_dbs = policy_dbs;
-               j_cdbs->update_hook.func = dbs_freq_update_handler;
        }
        return policy_dbs;
 }

Reply via email to