Based on steal metrics, find the ratio of current runtime to steal. This ratio would further be used to soft offline/online the core.
Steal above a limit would indicate that there is more contention on cores and hence few cores would be soft-offlined. Steal below a limit would indicate that there are probably available cores and hence few cores would be soft-onlined. Currently only the first online CPU will calculate the steal and even this CPU will process steal metrics at a 1 second granularity. Also currently this steal processing is enabled on non-KVM shared logical partitions. Since the steal time would be portion of the processor's runtime, use a multiple to bump up the steal time so that its easier to compare with limits. Signed-off-by: Srikar Dronamraju <[email protected]> --- arch/powerpc/platforms/pseries/lpar.c | 50 ++++++++++++++++++++++++ arch/powerpc/platforms/pseries/pseries.h | 4 ++ 2 files changed, 54 insertions(+) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index dde12b27ba60..3431730a10ea 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -659,6 +659,53 @@ static int __init vcpudispatch_stats_procfs_init(void) machine_device_initcall(pseries, vcpudispatch_stats_procfs_init); #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING +#define STEAL_MULTIPLE (STEAL_RATIO * STEAL_RATIO) +#define PURR_UPDATE_TB tb_ticks_per_sec + +static void trigger_softoffline(unsigned long steal_ratio) +{ +} + +static bool should_cpu_process_steal(int cpu) +{ + if (cpu == cpumask_first(cpu_online_mask)) + return true; + + return false; +} + +static void process_steal(int cpu) +{ + static unsigned long next_tb, prev_steal; + unsigned long steal_ratio, delta_tb; + unsigned long tb = mftb(); + unsigned long steal = 0; + unsigned int i; + + if (!should_cpu_process_steal(cpu)) + return; + + if (tb < next_tb) + return; + + for_each_online_cpu(i) { + struct lppaca *lppaca = &lppaca_of(i); + + steal += be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb)); + steal += be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)); + } + + if (next_tb && prev_steal) { + delta_tb = max(tb - (next_tb - PURR_UPDATE_TB), 1); + steal_ratio = (steal - prev_steal) * STEAL_MULTIPLE; + steal_ratio /= (delta_tb * num_online_cpus()); + trigger_softoffline(steal_ratio); + } + + next_tb = tb + PURR_UPDATE_TB; + prev_steal = steal; +} + u64 pseries_paravirt_steal_clock(int cpu) { struct lppaca *lppaca = &lppaca_of(cpu); @@ -667,6 +714,9 @@ u64 pseries_paravirt_steal_clock(int cpu) steal = be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb)); steal += be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)); + if (is_shared_processor() && !is_kvm_guest()) + process_steal(cpu); + /* * VPA steal time counters are reported at TB frequency. Hence do a * conversion to ns before using. diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 3968a6970fa8..68cf25152870 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -26,6 +26,10 @@ void pSeries_machine_check_log_err(void); #ifdef CONFIG_SMP extern void smp_init_pseries(void); +#ifdef CONFIG_PPC_SPLPAR +#define STEAL_RATIO 100 +#endif + /* Get state of physical CPU from query_cpu_stopped */ int smp_query_cpu_stopped(unsigned int pcpu); #define QCSS_STOPPED 0 -- 2.43.7
