2016-03-30 13:05 GMT+02:00 Rafael J. Wysocki <raf...@kernel.org>:
> On Wed, Mar 30, 2016 at 12:17 PM, Jörg Otte <jrg.o...@gmail.com> wrote:
>> 2016-03-29 23:34 GMT+02:00 Rafael J. Wysocki <r...@rjwysocki.net>:
>>> On Tuesday, March 29, 2016 07:32:27 PM Jörg Otte wrote:
>>>> 2016-03-29 19:24 GMT+02:00 Jörg Otte <jrg.o...@gmail.com>:
>>>> > in v4.5 and earlier intel-pstate downscaled idle processors (load
>>>> > 0.1-0.2%) to minumum frequency, in my case 800MHz.
>>>> >
>>>> > Now in v4.6-rc1 the characteristic has dramatically changed. If in
>>>> > idle the processor frequency is more or less a few MHz around 2500Mhz.
>>>> > This is the maximum non turbo frequency.
>>>> >
>>>> > No difference between powersafe or performance governor.
>>>> >
>>>> > I currently use acpi_cpufreq which works as usual.
>>>> >
>>>> > Processor:
>>>> > Intel(R) Core(TM) i5-4200M CPU @ 2.50GHz (family: 0x6, model: 0x3c,
>>>> > stepping: 0x3)
>>>> >
>>>> > Last known good kernel is: 4.5.0-01127-g9256d5a
>>>> > First known bad kernel is: 4.5.0-02535-g09fd671
>>>> >
>>>> > There is
>>>> > commit 277edba Merge tag 'pm+acpi-4.6-rc1-1' of
>>>> > git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
>>>> > in between, which brought a few changes in intel_pstate.
>>>
>>> Can you please check commit a4675fbc4a7a (cpufreq: intel_pstate: Replace 
>>> timers
>>> with utilization update callbacks)?
>>>
>> Yes , this solved the problem for me.
>> I had to resolve some conflicts myself when reverting that
>> commit. Hard work :).
>
> Thanks for doing this.  Can you please post the revert patch you have used?
>

The patch is on top of 4.5.0-02535-g09fd671.
I'm not sure what gmail is doing with spaces and tabs,
so I attach the revert patch.


>> Here is a 10-seconds trace of the used frequencies when
>> in "desktop-idle":
>>
>> driver          cpu0 cpu1 cpu2 cpu3
>> -------------------------------------
>> intel_pstate (  800  928  941 1200) MHz   load:( 0.2)%
>> intel_pstate (  800  928 1181 1800) MHz   load:( 0.0)%
>> intel_pstate ( 1675 1576 1347  800) MHz   load:( 0.0)%
>> intel_pstate ( 1198 1576  842  800) MHz   load:( 0.5)%
>> intel_pstate (  800 1181 1113 1600) MHz   load:( 0.0)%
>> intel_pstate (  808 1181  805  800) MHz   load:( 0.5)%
>> intel_pstate (  844 1191  900 1082) MHz   load:( 0.3)%
>> intel_pstate (  816 1191  800  800) MHz   load:( 0.0)%
>> intel_pstate (  800  905  892 1082) MHz   load:( 0.2)%
>> intel_pstate (  945  905 1340  800) MHz   load:( 0.3)%
>
> Please also run turbostat with and without your revert patch applied.
>

turbostat without revert
Kernel: 4.5.0-02535-g09fd671
-----------------------------
CPUID(7): No-SGX
     CPU Avg_MHz   Busy% Bzy_MHz TSC_MHz
       -      13    0.53    2514    2495
       0      14    0.55    2518    2495
       1       8    0.33    2527    2495
       2      15    0.60    2506    2495
       3      16    0.62    2509    2495

turbostat after revert of commit a4675fbc4a7a
kernel: 4.5.0-reva4675fbc4a7a-02536-g77225b1
------------------------------
CPUID(7): No-SGX
     CPU Avg_MHz   Busy% Bzy_MHz TSC_MHz
       -       4    0.35    1142    2494
       0       1    0.11    1016    2494
       1       2    0.17     961    2494
       2      10    0.82    1215    2494
       3       3    0.29    1086    2494
     CPU Avg_MHz   Busy% Bzy_MHz TSC_MHz
       -       4    0.46     885    2494
       0       1    0.12     889    2494
       1       1    0.16     885    2494
       2      10    1.15     883    2494
       3       4    0.40     891    2494
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index cb56074..97c16af 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -71,7 +71,7 @@ struct sample {
 	u64 mperf;
 	u64 tsc;
 	int freq;
-	u64 time;
+	ktime_t time;
 };
 
 struct pstate_data {
@@ -103,13 +103,13 @@ struct _pid {
 struct cpudata {
 	int cpu;
 
-	struct update_util_data update_util;
+	struct timer_list timer;
 
 	struct pstate_data pstate;
 	struct vid_data vid;
 	struct _pid pid;
 
-	u64	last_sample_time;
+	ktime_t last_sample_time;
 	u64	prev_aperf;
 	u64	prev_mperf;
 	u64	prev_tsc;
@@ -120,7 +120,6 @@ struct cpudata {
 static struct cpudata **all_cpu_data;
 struct pstate_adjust_policy {
 	int sample_rate_ms;
-	s64 sample_rate_ns;
 	int deadband;
 	int setpoint;
 	int p_gain_pct;
@@ -719,7 +718,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate)
 	if (limits->no_turbo && !limits->turbo_disabled)
 		val |= (u64)1 << 32;
 
-	wrmsrl(MSR_IA32_PERF_CTL, val);
+	wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
 }
 
 static int knl_get_turbo_pstate(void)
@@ -884,7 +883,7 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu)
 	sample->core_pct_busy = (int32_t)core_pct;
 }
 
-static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
+static inline void intel_pstate_sample(struct cpudata *cpu)
 {
 	u64 aperf, mperf;
 	unsigned long flags;
@@ -896,12 +895,12 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
 	tsc = rdtsc();
 	if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) {
 		local_irq_restore(flags);
-		return false;
+		return;
 	}
 	local_irq_restore(flags);
 
 	cpu->last_sample_time = cpu->sample.time;
-	cpu->sample.time = time;
+	cpu->sample.time = ktime_get();
 	cpu->sample.aperf = aperf;
 	cpu->sample.mperf = mperf;
 	cpu->sample.tsc =  tsc;
@@ -912,7 +911,7 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
 	cpu->prev_aperf = aperf;
 	cpu->prev_mperf = mperf;
 	cpu->prev_tsc = tsc;
-	return true;
+	return;
 }
 
 static inline int32_t get_avg_frequency(struct cpudata *cpu)
@@ -921,6 +920,22 @@ static inline int32_t get_avg_frequency(struct cpudata *cpu)
 		cpu->pstate.scaling, cpu->sample.mperf);
 }
 
+static inline void intel_hwp_set_sample_time(struct cpudata *cpu)
+{
+	int delay;
+
+	delay = msecs_to_jiffies(50);
+	mod_timer_pinned(&cpu->timer, jiffies + delay);
+}
+
+static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
+{
+	int delay;
+
+	delay = msecs_to_jiffies(pid_params.sample_rate_ms);
+	mod_timer_pinned(&cpu->timer, jiffies + delay);
+}
+
 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 {
 	struct sample *sample = &cpu->sample;
@@ -959,7 +974,8 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 {
 	int32_t core_busy, max_pstate, current_pstate, sample_ratio;
-	u64 duration_ns;
+	s64 duration_us;
+	u32 sample_time;
 
 	intel_pstate_calc_busy(cpu);
 
@@ -980,16 +996,18 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 	core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
 
 	/*
-	 * Since our utilization update callback will not run unless we are
-	 * in C0, check if the actual elapsed time is significantly greater (3x)
-	 * than our sample interval.  If it is, then we were idle for a long
-	 * enough period of time to adjust our busyness.
+	 * Since we have a deferred timer, it will not fire unless
+	 * we are in C0.  So, determine if the actual elapsed time
+	 * is significantly greater (3x) than our sample interval.  If it
+	 * is, then we were idle for a long enough period of time
+	 * to adjust our busyness.
 	 */
-	duration_ns = cpu->sample.time - cpu->last_sample_time;
-	if ((s64)duration_ns > pid_params.sample_rate_ns * 3
-	    && cpu->last_sample_time > 0) {
-		sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns),
-				      int_tofp(duration_ns));
+	sample_time = pid_params.sample_rate_ms  * USEC_PER_MSEC;
+	duration_us = ktime_us_delta(cpu->sample.time,
+				     cpu->last_sample_time);
+	if (duration_us > sample_time * 3) {
+		sample_ratio = div_fp(int_tofp(sample_time),
+				      int_tofp(duration_us));
 		core_busy = mul_fp(core_busy, sample_ratio);
 	}
 
@@ -1019,18 +1037,23 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 		get_avg_frequency(cpu));
 }
 
-static void intel_pstate_update_util(struct update_util_data *data, u64 time,
-				     unsigned long util, unsigned long max)
+static void intel_hwp_timer_func(unsigned long __data)
 {
-	struct cpudata *cpu = container_of(data, struct cpudata, update_util);
-	u64 delta_ns = time - cpu->sample.time;
+	struct cpudata *cpu = (struct cpudata *) __data;
 
-	if ((s64)delta_ns >= pid_params.sample_rate_ns) {
-		bool sample_taken = intel_pstate_sample(cpu, time);
+	intel_pstate_sample(cpu);
+	intel_hwp_set_sample_time(cpu);
+}
 
-		if (sample_taken && !hwp_active)
-			intel_pstate_adjust_busy_pstate(cpu);
-	}
+static void intel_pstate_timer_func(unsigned long __data)
+{
+	struct cpudata *cpu = (struct cpudata *) __data;
+
+	intel_pstate_sample(cpu);
+
+	intel_pstate_adjust_busy_pstate(cpu);
+
+	intel_pstate_set_sample_time(cpu);
 }
 
 #define ICPU(model, policy) \
@@ -1078,19 +1101,24 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 
 	cpu->cpu = cpunum;
 
-	if (hwp_active) {
+	if (hwp_active)
 		intel_pstate_hwp_enable(cpu);
-		pid_params.sample_rate_ms = 50;
-		pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC;
-	}
 
 	intel_pstate_get_cpu_pstates(cpu);
 
+	init_timer_deferrable(&cpu->timer);
+	cpu->timer.data = (unsigned long)cpu;
+	cpu->timer.expires = jiffies + HZ/100;
+
+	if (!hwp_active)
+		cpu->timer.function = intel_pstate_timer_func;
+	else
+		cpu->timer.function = intel_hwp_timer_func;
+
 	intel_pstate_busy_pid_reset(cpu);
-	intel_pstate_sample(cpu, 0);
+	intel_pstate_sample(cpu);
 
-	cpu->update_util.func = intel_pstate_update_util;
-	cpufreq_set_update_util_data(cpunum, &cpu->update_util);
+	add_timer_on(&cpu->timer, cpunum);
 
 	pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
 
@@ -1174,9 +1202,7 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
 
 	pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);
 
-	cpufreq_set_update_util_data(cpu_num, NULL);
-	synchronize_sched();
-
+	del_timer_sync(&all_cpu_data[cpu_num]->timer);
 	if (hwp_active)
 		return;
 
@@ -1240,7 +1266,6 @@ static int intel_pstate_msrs_not_valid(void)
 static void copy_pid_params(struct pstate_adjust_policy *policy)
 {
 	pid_params.sample_rate_ms = policy->sample_rate_ms;
-	pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
 	pid_params.p_gain_pct = policy->p_gain_pct;
 	pid_params.i_gain_pct = policy->i_gain_pct;
 	pid_params.d_gain_pct = policy->d_gain_pct;
@@ -1442,8 +1467,7 @@ out:
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
 		if (all_cpu_data[cpu]) {
-			cpufreq_set_update_util_data(cpu, NULL);
-			synchronize_sched();
+			del_timer_sync(&all_cpu_data[cpu]->timer);
 			kfree(all_cpu_data[cpu]);
 		}
 	}

Reply via email to