On 2025-08-28 07:22, Jan Beulich wrote:
On 28.08.2025 12:03, Penny Zheng wrote:
+static int cf_check amd_cppc_cpufreq_target(struct cpufreq_policy *policy,
+                                            unsigned int target_freq,
+                                            unsigned int relation)
+{
+    unsigned int cpu = policy->cpu;
+    const struct amd_cppc_drv_data *data = per_cpu(amd_cppc_drv_data, cpu);

I fear there's a problem here that I so far overlooked. As it happens, just
yesterday I made a patch to eliminate cpufreq_drv_data[] global. In the
course of doing so it became clear that in principle the CPU denoted by
policy->cpu can be offline. Hence its per-CPU data is also unavailable. See
cpufreq_add_cpu()'s invocation of .init() and cpufreq_del_cpu()'s invocation
of .exit(). Is there anything well-hidden (and likely lacking some suitable
comment) which guarantees that no two CPUs (threads) will be in the same
domain? If not, I fear you simply can't use per-CPU data here.

Sorry, I'm confused by your use of "domain" here. Do you mean a per_cpu(..., policy->cpu) access racing with a cpu offline? I'm not away of anything preventing that, though I'm not particularly familiar with it.

do_pm_op() has:
    if ( op->cpuid >= nr_cpu_ids || !cpu_online(op->cpuid) )
        return -EINVAL;
    pmpt = processor_pminfo[op->cpuid];

and do_get_pm_info() has:
    if ( !op || (op->cpuid >= nr_cpu_ids) || !cpu_online(op->cpuid) )
        return -EINVAL;
    pmpt = processor_pminfo[op->cpuid];

But those are only at entry.

Regards,
Jason

Since initially I was thinking of using per-CPU data also in my patch, I'm
reproducing this in raw form below, for your reference. It's generally only
4.22 material now, of course. Yet in turn for your driver the new drv_data
field may want to become a union, with an "acpi" and a "cppc" sub-struct.
And possibly a "hwp" one: Jason, looks like the HWP driver has a similar
issue (unless again something guarantees that no two CPUs / threads will
be in the same domain).

Jan

cpufreq: eliminate cpufreq_drv_data[]

Possibly many slots of it may be unused (all of them when the HWP or CPPC
drivers are in use), as it's always strictly associated with the CPU
recorded in a policy (irrespective of that CPU intermediately being taken
offline). It is shared by all CPUs sharing a policy. We could therefore
put the respective pointers in struct cpufreq_policy, but even that level
of indirection is pointless. Embed the driver data structure directly in
the policy one.

Signed-off-by: Jan Beulich <jbeul...@suse.com>

--- a/xen/arch/x86/acpi/cpufreq/acpi.c
+++ b/xen/arch/x86/acpi/cpufreq/acpi.c
@@ -174,17 +174,18 @@ static u32 get_cur_val(const cpumask_t *
          return 0;
policy = per_cpu(cpufreq_cpu_policy, cpu);
-    if (!policy || !cpufreq_drv_data[policy->cpu])
+    if ( !policy )
          return 0;
- switch (cpufreq_drv_data[policy->cpu]->arch_cpu_flags) {
+    switch ( policy->drv_data.arch_cpu_flags )
+    {
      case SYSTEM_INTEL_MSR_CAPABLE:
          cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
          cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
          break;
      case SYSTEM_IO_CAPABLE:
          cmd.type = SYSTEM_IO_CAPABLE;
-        perf = cpufreq_drv_data[policy->cpu]->acpi_data;
+        perf = policy->drv_data.acpi_data;
          cmd.addr.io.port = perf->control_register.address;
          cmd.addr.io.bit_width = perf->control_register.bit_width;
          break;
@@ -210,9 +211,8 @@ static unsigned int cf_check get_cur_fre
      if (!policy)
          return 0;
- data = cpufreq_drv_data[policy->cpu];
-    if (unlikely(data == NULL ||
-        data->acpi_data == NULL || data->freq_table == NULL))
+    data = &policy->drv_data;
+    if ( !data->acpi_data || !data->freq_table )
          return 0;
return extract_freq(get_cur_val(cpumask_of(cpu)), data);
@@ -255,7 +255,7 @@ static int cf_check acpi_cpufreq_target(
      struct cpufreq_policy *policy,
      unsigned int target_freq, unsigned int relation)
  {
-    struct acpi_cpufreq_data *data = cpufreq_drv_data[policy->cpu];
+    struct acpi_cpufreq_data *data = &policy->drv_data;
      struct processor_performance *perf;
      struct cpufreq_freqs freqs;
      cpumask_t online_policy_cpus;
@@ -265,10 +265,8 @@ static int cf_check acpi_cpufreq_target(
      unsigned int j;
      int result = 0;
- if (unlikely(data == NULL ||
-        data->acpi_data == NULL || data->freq_table == NULL)) {
+    if ( !data->acpi_data || !data->freq_table )
          return -ENODEV;
-    }
if (policy->turbo == CPUFREQ_TURBO_DISABLED)
          if (target_freq > policy->cpuinfo.second_max_freq)
@@ -334,11 +332,9 @@ static int cf_check acpi_cpufreq_target(
static int cf_check acpi_cpufreq_verify(struct cpufreq_policy *policy)
  {
-    struct acpi_cpufreq_data *data;
      struct processor_performance *perf;
- if (!policy || !(data = cpufreq_drv_data[policy->cpu]) ||
-        !processor_pminfo[policy->cpu])
+    if ( !policy || !processor_pminfo[policy->cpu] )
          return -EINVAL;
perf = &processor_pminfo[policy->cpu]->perf;
@@ -346,7 +342,7 @@ static int cf_check acpi_cpufreq_verify(
      cpufreq_verify_within_limits(policy, 0,
          perf->states[perf->platform_limit].core_frequency * 1000);
- return cpufreq_frequency_table_verify(policy, data->freq_table);
+    return cpufreq_frequency_table_verify(policy, policy->drv_data.freq_table);
  }
static unsigned long
@@ -382,17 +378,11 @@ static int cf_check acpi_cpufreq_cpu_ini
      unsigned int i;
      unsigned int valid_states = 0;
      unsigned int cpu = policy->cpu;
-    struct acpi_cpufreq_data *data;
+    struct acpi_cpufreq_data *data = &policy->drv_data;
      unsigned int result = 0;
      struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
      struct processor_performance *perf;
- data = xzalloc(struct acpi_cpufreq_data);
-    if (!data)
-        return -ENOMEM;
-
-    cpufreq_drv_data[cpu] = data;
-
      data->acpi_data = &processor_pminfo[cpu]->perf;
perf = data->acpi_data;
@@ -409,23 +399,18 @@ static int cf_check acpi_cpufreq_cpu_ini
          if (cpufreq_verbose)
              printk("xen_pminfo: @acpi_cpufreq_cpu_init,"
                     "HARDWARE addr space\n");
-        if (!cpu_has(c, X86_FEATURE_EIST)) {
-            result = -ENODEV;
-            goto err_unreg;
-        }
+        if ( !cpu_has(c, X86_FEATURE_EIST) )
+            return -ENODEV;
          data->arch_cpu_flags = SYSTEM_INTEL_MSR_CAPABLE;
          break;
      default:
-        result = -ENODEV;
-        goto err_unreg;
+        return -ENODEV;
      }
data->freq_table = xmalloc_array(struct cpufreq_frequency_table,
                                      (perf->state_count+1));
-    if (!data->freq_table) {
-        result = -ENOMEM;
-        goto err_unreg;
-    }
+    if ( !data->freq_table )
+        return -ENOMEM;
/* detect transition latency */
      policy->cpuinfo.transition_latency = 0;
@@ -483,23 +468,14 @@ static int cf_check acpi_cpufreq_cpu_ini
      return result;
err_freqfree:
-    xfree(data->freq_table);
-err_unreg:
-    xfree(data);
-    cpufreq_drv_data[cpu] = NULL;
+    XFREE(data->freq_table);
return result;
  }
static int cf_check acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
  {
-    struct acpi_cpufreq_data *data = cpufreq_drv_data[policy->cpu];
-
-    if (data) {
-        cpufreq_drv_data[policy->cpu] = NULL;
-        xfree(data->freq_table);
-        xfree(data);
-    }
+    XFREE(policy->drv_data.freq_table);
return 0;
  }
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c
@@ -35,8 +35,6 @@
#include <acpi/cpufreq/cpufreq.h> -struct acpi_cpufreq_data *cpufreq_drv_data[NR_CPUS];
-
  struct perf_pair {
      union {
          struct {
--- a/xen/arch/x86/acpi/cpufreq/powernow.c
+++ b/xen/arch/x86/acpi/cpufreq/powernow.c
@@ -84,16 +84,14 @@ static int cf_check powernow_cpufreq_tar
      struct cpufreq_policy *policy,
      unsigned int target_freq, unsigned int relation)
  {
-    struct acpi_cpufreq_data *data = cpufreq_drv_data[policy->cpu];
+    struct acpi_cpufreq_data *data = &policy->drv_data;
      struct processor_performance *perf;
      unsigned int next_state; /* Index into freq_table */
      unsigned int next_perf_state; /* Index into perf table */
      int result;
- if (unlikely(data == NULL ||
-        data->acpi_data == NULL || data->freq_table == NULL)) {
+    if ( !data->acpi_data || !data->freq_table )
          return -ENODEV;
-    }
perf = data->acpi_data;
      result = cpufreq_frequency_table_target(policy,
@@ -185,11 +183,9 @@ static void cf_check get_cpu_data(void *
static int cf_check powernow_cpufreq_verify(struct cpufreq_policy *policy)
  {
-    struct acpi_cpufreq_data *data;
      struct processor_performance *perf;
- if (!policy || !(data = cpufreq_drv_data[policy->cpu]) ||
-        !processor_pminfo[policy->cpu])
+    if ( !policy || !processor_pminfo[policy->cpu] )
          return -EINVAL;
perf = &processor_pminfo[policy->cpu]->perf;
@@ -197,7 +193,7 @@ static int cf_check powernow_cpufreq_ver
      cpufreq_verify_within_limits(policy, 0,
          perf->states[perf->platform_limit].core_frequency * 1000);
- return cpufreq_frequency_table_verify(policy, data->freq_table);
+    return cpufreq_frequency_table_verify(policy, policy->drv_data.freq_table);
  }
static int cf_check powernow_cpufreq_cpu_init(struct cpufreq_policy *policy)
@@ -205,18 +201,12 @@ static int cf_check powernow_cpufreq_cpu
      unsigned int i;
      unsigned int valid_states = 0;
      unsigned int cpu = policy->cpu;
-    struct acpi_cpufreq_data *data;
+    struct acpi_cpufreq_data *data = &policy->drv_data;
      unsigned int result = 0;
      struct processor_performance *perf;
      struct amd_cpu_data info;
      struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
- data = xzalloc(struct acpi_cpufreq_data);
-    if (!data)
-        return -ENOMEM;
-
-    cpufreq_drv_data[cpu] = data;
-
      data->acpi_data = &processor_pminfo[cpu]->perf;
info.perf = perf = data->acpi_data;
@@ -228,8 +218,7 @@ static int cf_check powernow_cpufreq_cpu
          if (cpumask_weight(policy->cpus) != 1) {
              printk(XENLOG_WARNING "Unsupported sharing type %d (%u CPUs)\n",
                     policy->shared_type, cpumask_weight(policy->cpus));
-            result = -ENODEV;
-            goto err_unreg;
+            return -ENODEV;
          }
      } else {
          cpumask_copy(policy->cpus, cpumask_of(cpu));
@@ -238,21 +227,16 @@ static int cf_check powernow_cpufreq_cpu
      /* capability check */
      if (perf->state_count <= 1) {
          printk("No P-States\n");
-        result = -ENODEV;
-        goto err_unreg;
+        return -ENODEV;
      }
- if (perf->control_register.space_id != perf->status_register.space_id) {
-        result = -ENODEV;
-        goto err_unreg;
-    }
+    if ( perf->control_register.space_id != perf->status_register.space_id )
+        return -ENODEV;
data->freq_table = xmalloc_array(struct cpufreq_frequency_table,
                                      (perf->state_count+1));
-    if (!data->freq_table) {
-        result = -ENOMEM;
-        goto err_unreg;
-    }
+    if ( !data->freq_table )
+        return -ENOMEM;
/* detect transition latency */
      policy->cpuinfo.transition_latency = 0;
@@ -298,23 +282,14 @@ static int cf_check powernow_cpufreq_cpu
      return result;
err_freqfree:
-    xfree(data->freq_table);
-err_unreg:
-    xfree(data);
-    cpufreq_drv_data[cpu] = NULL;
+    XFREE(data->freq_table);
return result;
  }
static int cf_check powernow_cpufreq_cpu_exit(struct cpufreq_policy *policy)
  {
-    struct acpi_cpufreq_data *data = cpufreq_drv_data[policy->cpu];
-
-    if (data) {
-        cpufreq_drv_data[policy->cpu] = NULL;
-        xfree(data->freq_table);
-        xfree(data);
-    }
+    XFREE(policy->drv_data.freq_table);
return 0;
  }
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -37,8 +37,6 @@ struct acpi_cpufreq_data {
      unsigned int arch_cpu_flags;
  };
-extern struct acpi_cpufreq_data *cpufreq_drv_data[NR_CPUS];
-
  struct cpufreq_cpuinfo {
      unsigned int        max_freq;
      unsigned int        second_max_freq;    /* P1 if Turbo Mode is on */
@@ -80,6 +78,8 @@ struct cpufreq_policy {
      int8_t              turbo;  /* tristate flag: 0 for unsupported
                                   * -1 for disable, 1 for enabled
                                   * See CPUFREQ_TURBO_* below for defines */
+
+    struct acpi_cpufreq_data drv_data;
  };
  DECLARE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_policy);


Reply via email to