On Monday 01 Mar 2021 at 12:21:17 (+0530), Viresh Kumar wrote:
> This patch attempts to make it generic enough so other parts of the
> kernel can also provide their own implementation of scale_freq_tick()
> callback, which is called by the scheduler periodically to update the
> per-cpu freq_scale variable.
> 
> The implementations now need to provide 'struct scale_freq_data' for the
> CPUs for which they have hardware counters available, and a callback
> gets registered for each possible CPU in a per-cpu variable.
> 
> The arch specific (or ARM AMU) counters are updated to adapt to this and
> they take the highest priority if they are available, i.e. they will be
> used instead of CPPC based counters for example.
> 
> The special code to rebuild the sched domains, in case invariance status
> change for the system, is moved out of arm64 specific code and is added
> to arch_topology.c.
> 
> Note that this also defines SCALE_FREQ_SOURCE_CPUFREQ but doesn't use it
> and it is added to show that cpufreq is also acts as source of
> information for FIE and will be used by default if no other counters are
> supported for a platform.
> 
> Reviewed-by: Ionela Voinescu <ionela.voine...@arm.com>
> Tested-by: Ionela Voinescu <ionela.voine...@arm.com>
> Signed-off-by: Viresh Kumar <viresh.ku...@linaro.org>
> ---
>  arch/arm64/include/asm/topology.h |  10 +--
>  arch/arm64/kernel/topology.c      | 105 +++++++++++-------------------
>  drivers/base/arch_topology.c      |  85 ++++++++++++++++++++++--
>  include/linux/arch_topology.h     |  14 +++-
>  4 files changed, 134 insertions(+), 80 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/topology.h 
> b/arch/arm64/include/asm/topology.h
> index 3b8dca4eb08d..ec2db3419c41 100644
> --- a/arch/arm64/include/asm/topology.h
> +++ b/arch/arm64/include/asm/topology.h
> @@ -17,17 +17,9 @@ int pcibus_to_node(struct pci_bus *bus);
>  #include <linux/arch_topology.h>
>  
>  void update_freq_counters_refs(void);
> -void topology_scale_freq_tick(void);
> -
> -#ifdef CONFIG_ARM64_AMU_EXTN
> -/*
> - * Replace task scheduler's default counter-based
> - * frequency-invariance scale factor setting.
> - */
> -#define arch_scale_freq_tick topology_scale_freq_tick
> -#endif /* CONFIG_ARM64_AMU_EXTN */
>  
>  /* Replace task scheduler's default frequency-invariant accounting */
> +#define arch_scale_freq_tick topology_scale_freq_tick
>  #define arch_set_freq_scale topology_set_freq_scale
>  #define arch_scale_freq_capacity topology_get_freq_scale
>  #define arch_scale_freq_invariant topology_scale_freq_invariant
> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
> index e08a4126453a..47fca7376c93 100644
> --- a/arch/arm64/kernel/topology.c
> +++ b/arch/arm64/kernel/topology.c
> @@ -199,12 +199,47 @@ static int freq_inv_set_max_ratio(int cpu, u64 
> max_rate, u64 ref_rate)
>       return 0;
>  }
>  
> -static DEFINE_STATIC_KEY_FALSE(amu_fie_key);
> -#define amu_freq_invariant() static_branch_unlikely(&amu_fie_key)
> +static void amu_scale_freq_tick(void)
> +{
> +     u64 prev_core_cnt, prev_const_cnt;
> +     u64 core_cnt, const_cnt, scale;
> +
> +     prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
> +     prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
> +
> +     update_freq_counters_refs();
> +
> +     const_cnt = this_cpu_read(arch_const_cycles_prev);
> +     core_cnt = this_cpu_read(arch_core_cycles_prev);
> +
> +     if (unlikely(core_cnt <= prev_core_cnt ||
> +                  const_cnt <= prev_const_cnt))
> +             return;
> +
> +     /*
> +      *          /\core    arch_max_freq_scale
> +      * scale =  ------- * --------------------
> +      *          /\const   SCHED_CAPACITY_SCALE
> +      *
> +      * See validate_cpu_freq_invariance_counters() for details on
> +      * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT.
> +      */
> +     scale = core_cnt - prev_core_cnt;
> +     scale *= this_cpu_read(arch_max_freq_scale);
> +     scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT,
> +                       const_cnt - prev_const_cnt);
> +
> +     scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
> +     this_cpu_write(freq_scale, (unsigned long)scale);
> +}
> +
> +static struct scale_freq_data amu_sfd = {
> +     .source = SCALE_FREQ_SOURCE_ARCH,
> +     .set_freq_scale = amu_scale_freq_tick,
> +};
>  
>  static void amu_fie_setup(const struct cpumask *cpus)
>  {
> -     bool invariant;
>       int cpu;
>  
>       /* We are already set since the last insmod of cpufreq driver */
> @@ -221,25 +256,10 @@ static void amu_fie_setup(const struct cpumask *cpus)
>  
>       cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus);
>  
> -     invariant = topology_scale_freq_invariant();
> -
> -     /* We aren't fully invariant yet */
> -     if (!invariant && !cpumask_equal(amu_fie_cpus, cpu_present_mask))
> -             return;
> -
> -     static_branch_enable(&amu_fie_key);
> +     topology_set_scale_freq_source(&amu_sfd, amu_fie_cpus);
>  
>       pr_debug("CPUs[%*pbl]: counters will be used for FIE.",
>                cpumask_pr_args(cpus));
> -
> -     /*
> -      * Task scheduler behavior depends on frequency invariance support,
> -      * either cpufreq or counter driven. If the support status changes as
> -      * a result of counter initialisation and use, retrigger the build of
> -      * scheduling domains to ensure the information is propagated properly.
> -      */
> -     if (!invariant)
> -             rebuild_sched_domains_energy();
>  }
>  
>  static int init_amu_fie_callback(struct notifier_block *nb, unsigned long 
> val,
> @@ -283,53 +303,6 @@ static int __init init_amu_fie(void)
>  }
>  core_initcall(init_amu_fie);
>  
> -bool arch_freq_counters_available(const struct cpumask *cpus)
> -{
> -     return amu_freq_invariant() &&
> -            cpumask_subset(cpus, amu_fie_cpus);
> -}
> -
> -void topology_scale_freq_tick(void)
> -{
> -     u64 prev_core_cnt, prev_const_cnt;
> -     u64 core_cnt, const_cnt, scale;
> -     int cpu = smp_processor_id();
> -
> -     if (!amu_freq_invariant())
> -             return;
> -
> -     if (!cpumask_test_cpu(cpu, amu_fie_cpus))
> -             return;
> -
> -     prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
> -     prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
> -
> -     update_freq_counters_refs();
> -
> -     const_cnt = this_cpu_read(arch_const_cycles_prev);
> -     core_cnt = this_cpu_read(arch_core_cycles_prev);
> -
> -     if (unlikely(core_cnt <= prev_core_cnt ||
> -                  const_cnt <= prev_const_cnt))
> -             return;
> -
> -     /*
> -      *          /\core    arch_max_freq_scale
> -      * scale =  ------- * --------------------
> -      *          /\const   SCHED_CAPACITY_SCALE
> -      *
> -      * See validate_cpu_freq_invariance_counters() for details on
> -      * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT.
> -      */
> -     scale = core_cnt - prev_core_cnt;
> -     scale *= this_cpu_read(arch_max_freq_scale);
> -     scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT,
> -                       const_cnt - prev_const_cnt);
> -
> -     scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
> -     this_cpu_write(freq_scale, (unsigned long)scale);
> -}
> -
>  #ifdef CONFIG_ACPI_CPPC_LIB
>  #include <acpi/cppc_acpi.h>
>  
> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> index de8587cc119e..8f62dbf93f67 100644
> --- a/drivers/base/arch_topology.c
> +++ b/drivers/base/arch_topology.c
> @@ -21,17 +21,94 @@
>  #include <linux/sched.h>
>  #include <linux/smp.h>
>  
> +static DEFINE_PER_CPU(struct scale_freq_data *, sft_data);
> +static struct cpumask scale_freq_counters_mask;
> +static bool scale_freq_invariant;
> +
> +static bool supports_scale_freq_counters(const struct cpumask *cpus)
> +{
> +     return cpumask_subset(cpus, &scale_freq_counters_mask);
> +}
> +
>  bool topology_scale_freq_invariant(void)
>  {
>       return cpufreq_supports_freq_invariance() ||
> -            arch_freq_counters_available(cpu_online_mask);
> +            supports_scale_freq_counters(cpu_online_mask);
>  }
>  
> -__weak bool arch_freq_counters_available(const struct cpumask *cpus)
> +static void update_scale_freq_invariant(bool status)
>  {
> -     return false;
> +     if (scale_freq_invariant == status)
> +             return;
> +
> +     /*
> +      * Task scheduler behavior depends on frequency invariance support,
> +      * either cpufreq or counter driven. If the support status changes as
> +      * a result of counter initialisation and use, retrigger the build of
> +      * scheduling domains to ensure the information is propagated properly.
> +      */
> +     if (topology_scale_freq_invariant() == status) {
> +             scale_freq_invariant = status;
> +             rebuild_sched_domains_energy();
> +     }
>  }
> +
> +void topology_set_scale_freq_source(struct scale_freq_data *data,
> +                                 const struct cpumask *cpus)
> +{
> +     struct scale_freq_data *sfd;
> +     int cpu;
> +
> +     /*
> +      * Avoid calling rebuild_sched_domains() unnecessarily if FIE is
> +      * supported by cpufreq.
> +      */
> +     if (cpumask_empty(&scale_freq_counters_mask))
> +             scale_freq_invariant = topology_scale_freq_invariant();
> +
> +     for_each_cpu(cpu, cpus) {
> +             sfd = per_cpu(sft_data, cpu);
> +
> +             /* Use ARCH provided counters whenever possible */
> +             if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) {
> +                     per_cpu(sft_data, cpu) = data;
> +                     cpumask_set_cpu(cpu, &scale_freq_counters_mask);
> +             }
> +     }
> +
> +     update_scale_freq_invariant(true);
> +}
> +EXPORT_SYMBOL_GPL(topology_set_scale_freq_source);
> +
> +void topology_clear_scale_freq_source(enum scale_freq_source source,
> +                                   const struct cpumask *cpus)
> +{
> +     struct scale_freq_data *sfd;
> +     int cpu;
> +
> +     for_each_cpu(cpu, cpus) {
> +             sfd = per_cpu(sft_data, cpu);
> +
> +             if (sfd && sfd->source == source) {
> +                     per_cpu(sft_data, cpu) = NULL;
> +                     cpumask_clear_cpu(cpu, &scale_freq_counters_mask);
> +             }
> +     }
> +
> +     update_scale_freq_invariant(false);
> +}
> +EXPORT_SYMBOL_GPL(topology_clear_scale_freq_source);
> +
> +void topology_scale_freq_tick(void)
> +{
> +     struct scale_freq_data *sfd = *this_cpu_ptr(&sft_data);
> +
> +     if (sfd)
> +             sfd->set_freq_scale();
> +}
> +
>  DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
> +EXPORT_SYMBOL_GPL(freq_scale);
>  
>  void topology_set_freq_scale(const struct cpumask *cpus, unsigned long 
> cur_freq,
>                            unsigned long max_freq)
> @@ -47,7 +124,7 @@ void topology_set_freq_scale(const struct cpumask *cpus, 
> unsigned long cur_freq,
>        * want to update the scale factor with information from CPUFREQ.
>        * Instead the scale factor will be updated from arch_scale_freq_tick.
>        */
> -     if (arch_freq_counters_available(cpus))
> +     if (supports_scale_freq_counters(cpus))
>               return;
>  
>       scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
> diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
> index 0f6cd6b73a61..3bcfba5c21a7 100644
> --- a/include/linux/arch_topology.h
> +++ b/include/linux/arch_topology.h
> @@ -34,7 +34,19 @@ void topology_set_freq_scale(const struct cpumask *cpus, 
> unsigned long cur_freq,
>                            unsigned long max_freq);
>  bool topology_scale_freq_invariant(void);
>  
> -bool arch_freq_counters_available(const struct cpumask *cpus);
> +enum scale_freq_source {
> +     SCALE_FREQ_SOURCE_CPUFREQ = 0,
> +     SCALE_FREQ_SOURCE_ARCH,
> +};
> +
> +struct scale_freq_data {
> +     enum scale_freq_source source;
> +     void (*set_freq_scale)(void);
> +};
> +
> +void topology_scale_freq_tick(void);
> +void topology_set_scale_freq_source(struct scale_freq_data *data, const 
> struct cpumask *cpus);
> +void topology_clear_scale_freq_source(enum scale_freq_source source, const 
> struct cpumask *cpus);

Nit: can you split these lines?

Ionela.

>  
>  DECLARE_PER_CPU(unsigned long, thermal_pressure);
>  
> -- 
> 2.25.0.rc1.19.g042ed3e048af
> 

Reply via email to