The branch main has been updated by aokblast:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3e6e4e4a0d42fa24f3b2a1c087e9ad25f9594081

commit 3e6e4e4a0d42fa24f3b2a1c087e9ad25f9594081
Author:     ShengYi Hung <[email protected]>
AuthorDate: 2025-07-08 11:45:26 +0000
Commit:     ShengYi Hung <[email protected]>
CommitDate: 2026-01-03 02:52:51 +0000

    hwpstate: add CPPC support for pstate driver on AMD
    
    Implement CPPC interface for AMD Pstate Driver.
    This feature is only enabled when the CPUID shows it support CPPC.
    
    The CPPC is implemneted by the following steps:
    
    1. Write MSR to enable it.
    2. Read capability registert which indicates binary value of levels
    about lowest, best energy efficient, guarantee, and max performance.
    3. Write request register with epp in energy balanced mode. And let
    CPU and firmware to enter autonomous mode.
    
    Also, create a sysctl handler to allow userspace to change epp value.
    In intel's hwpstate, The epp value can be in package level and core level.
    However, in AMD's one, there is only core level. Thus, to sync with the
    intel's code, we implement package level control in software and provide
    another sysctl (machdep.hwpstate_pkg_ctrl) to control it.
    
    Reviewed by:    olce, khng
    Approved by:    lwhsu (mentor)
    MFC after:      2 weeks
    Sponsored by:   The FreeBSD Foundation
    Differential Revision: https://reviews.freebsd.org/D49587
---
 sys/x86/cpufreq/hwpstate_amd.c | 361 ++++++++++++++++++++++++++++++++++++++---
 sys/x86/include/specialreg.h   |   1 +
 2 files changed, 337 insertions(+), 25 deletions(-)

diff --git a/sys/x86/cpufreq/hwpstate_amd.c b/sys/x86/cpufreq/hwpstate_amd.c
index fc948dc90a15..4395e43a219f 100644
--- a/sys/x86/cpufreq/hwpstate_amd.c
+++ b/sys/x86/cpufreq/hwpstate_amd.c
@@ -8,6 +8,7 @@
  * Copyright (c) 2009 Michael Reifenberger
  * Copyright (c) 2009 Norikatsu Shigemura
  * Copyright (c) 2008-2009 Gen Otsuji
+ * Copyright (c) 2025 ShengYi Hung
  *
  * This code is depending on kern_cpu.c, est.c, powernow.c, p4tcc.c, smist.c
  * in various parts. The authors of these files are Nate Lawson,
@@ -55,6 +56,7 @@
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/pcpu.h>
+#include <sys/sbuf.h>
 #include <sys/smp.h>
 #include <sys/sched.h>
 
@@ -74,6 +76,15 @@
 #define        MSR_AMD_10H_11H_STATUS  0xc0010063
 #define        MSR_AMD_10H_11H_CONFIG  0xc0010064
 
+#define        MSR_AMD_CPPC_CAPS_1     0xc00102b0
+#define        MSR_AMD_CPPC_ENABLE     0xc00102b1
+#define        MSR_AMD_CPPC_CAPS_2     0xc00102b2
+#define        MSR_AMD_CPPC_REQUEST    0xc00102b3
+#define        MSR_AMD_CPPC_STATUS     0xc00102b4
+
+#define        MSR_AMD_PWR_ACC         0xc001007a
+#define        MSR_AMD_PWR_ACC_MX      0xc001007b
+
 #define        AMD_10H_11H_MAX_STATES  16
 
 /* for MSR_AMD_10H_11H_LIMIT C001_0061 */
@@ -92,6 +103,23 @@
 
 #define        AMD_1AH_CUR_FID(msr)                    ((msr) & 0xFFF)
 
+#define        AMD_CPPC_CAPS_1_HIGH_PERF_BITS          0xff000000
+#define        AMD_CPPC_CAPS_1_NOMINAL_PERF_BITS       0x00ff0000
+#define        AMD_CPPC_CAPS_1_LOW_NONLIN_PERF_BITS    0x0000ff00
+#define        AMD_CPPC_CAPS_1_LOW_PERF_BITS           0x000000ff
+
+#define        AMD_CPPC_REQUEST_ENERGY_PERF_BITS       0xff000000
+#define        AMD_CPPC_REQUEST_DES_PERF_BITS          0x00ff0000
+#define        AMD_CPPC_REQUEST_MIN_PERF_BITS          0x0000ff00
+#define        AMD_CPPC_REQUEST_MAX_PERF_BITS          0x000000ff
+
+#define        HWP_AMD_CLASSNAME                       "hwpstate_amd"
+
+#define        BITS_VALUE(bits, num)                   (((num) & (bits)) >> 
(ffsll((bits)) - 1))
+#define        BITS_WITH_VALUE(bits, val)              ((uintmax_t)(val) << 
(ffsll((bits)) - 1))
+#define        SET_BITS_VALUE(var, bits, val) \
+       ((var) = (var) & ~(bits) | BITS_WITH_VALUE((bits), (val)))
+
 #define        HWPSTATE_DEBUG(dev, msg...)                     \
        do {                                            \
                if (hwpstate_verbose)                   \
@@ -106,10 +134,16 @@ struct hwpstate_setting {
        int     pstate_id;      /* P-State id */
 };
 
+enum hwpstate_flags {
+       PSTATE_CPPC = 1,
+};
+
 struct hwpstate_softc {
        device_t                dev;
-       struct hwpstate_setting hwpstate_settings[AMD_10H_11H_MAX_STATES];
+       struct hwpstate_setting hwpstate_settings[AMD_10H_11H_MAX_STATES];
        int                     cfnum;
+       uint32_t flags;
+       uint64_t req;
 };
 
 static void    hwpstate_identify(driver_t *driver, device_t parent);
@@ -140,6 +174,11 @@ SYSCTL_BOOL(_debug, OID_AUTO, hwpstate_pstate_limit, 
CTLFLAG_RWTUN,
     "If enabled (1), limit administrative control of P-states to the value in "
     "CurPstateLimit");
 
+static bool hwpstate_pkg_ctrl_enable = true;
+SYSCTL_BOOL(_machdep, OID_AUTO, hwpstate_pkg_ctrl, CTLFLAG_RDTUN,
+    &hwpstate_pkg_ctrl_enable, 0,
+    "Set 1 (default) to enable package-level control, 0 to disable");
+
 static device_method_t hwpstate_methods[] = {
        /* Device interface */
        DEVMETHOD(device_identify,      hwpstate_identify),
@@ -159,8 +198,154 @@ static device_method_t hwpstate_methods[] = {
        {0, 0}
 };
 
+static int
+amdhwp_dump_sysctl_handler(SYSCTL_HANDLER_ARGS)
+{
+       device_t dev;
+       struct pcpu *pc;
+       struct sbuf *sb;
+       struct hwpstate_softc *sc;
+       uint64_t data;
+       int ret;
+
+       sc = (struct hwpstate_softc *)arg1;
+       dev = sc->dev;
+
+       pc = cpu_get_pcpu(dev);
+       if (pc == NULL)
+               return (ENXIO);
+
+       sb = sbuf_new(NULL, NULL, 1024, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
+       sbuf_putc(sb, '\n');
+       thread_lock(curthread);
+       sched_bind(curthread, pc->pc_cpuid);
+       thread_unlock(curthread);
+
+       rdmsr_safe(MSR_AMD_CPPC_ENABLE, &data);
+       sbuf_printf(sb, "CPU%d: HWP %sabled\n", pc->pc_cpuid,
+           ((data & 1) ? "En" : "Dis"));
+
+       if (data == 0) {
+               ret = 0;
+               goto out;
+       }
+
+       rdmsr_safe(MSR_AMD_CPPC_CAPS_1, &data);
+       sbuf_printf(sb, "\tHighest Performance: %03ju\n",
+           BITS_VALUE(AMD_CPPC_CAPS_1_HIGH_PERF_BITS, data));
+       sbuf_printf(sb, "\tGuaranteed Performance: %03ju\n",
+           BITS_VALUE(AMD_CPPC_CAPS_1_NOMINAL_PERF_BITS, data));
+       sbuf_printf(sb, "\tEfficient Performance: %03ju\n",
+           BITS_VALUE(AMD_CPPC_CAPS_1_LOW_NONLIN_PERF_BITS, data));
+       sbuf_printf(sb, "\tLowest Performance: %03ju\n",
+           BITS_VALUE(AMD_CPPC_CAPS_1_LOW_PERF_BITS, data));
+       sbuf_putc(sb, '\n');
+
+       rdmsr_safe(MSR_AMD_CPPC_REQUEST, &data);
+
+#define pkg_print(name, offset)                         \
+       do {                                            \
+               sbuf_printf(sb, "\t%s: %03u\n", name,   \
+                   (unsigned)(data >> offset) & 0xff); \
+       } while (0)
+
+       pkg_print("Requested Efficiency Performance Preference", 24);
+       pkg_print("Requested Desired Performance", 16);
+       pkg_print("Requested Maximum Performance", 8);
+       pkg_print("Requested Minimum Performance", 0);
+#undef pkg_print
+
+       sbuf_putc(sb, '\n');
+
+out:
+       thread_lock(curthread);
+       sched_unbind(curthread);
+       thread_unlock(curthread);
+
+       ret = sbuf_finish(sb);
+       if (ret == 0)
+               ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
+       sbuf_delete(sb);
+
+       return (ret);
+}
+
+static bool
+sysctl_epp_select_per_core(const device_t hwp_device, uint32_t val)
+{
+       struct hwpstate_softc *sc;
+       bool success = true;
+       int ret, cpuid;
+
+       cpuid = cpu_get_pcpu(hwp_device)->pc_cpuid;
+       thread_lock(curthread);
+       sched_bind(curthread, cpuid);
+       thread_unlock(curthread);
+       sc = device_get_softc(hwp_device);
+       if (BITS_VALUE(AMD_CPPC_REQUEST_ENERGY_PERF_BITS, sc->req) == val)
+               goto end;
+       SET_BITS_VALUE(sc->req, AMD_CPPC_REQUEST_ENERGY_PERF_BITS, val);
+       ret = wrmsr_safe(MSR_AMD_CPPC_REQUEST, sc->req);
+       if (ret != 0) {
+               success = false;
+               device_printf(hwp_device, "Failed to set EPP to %u", val);
+               goto end;
+       }
+
+end:
+       thread_lock(curthread);
+       sched_unbind(curthread);
+       thread_unlock(curthread);
+
+       return (success);
+}
+
+static int
+sysctl_epp_select(SYSCTL_HANDLER_ARGS)
+{
+       device_t dev, hwp_dev;
+       struct hwpstate_softc *sc;
+       const uint32_t max_energy_perf =
+           BITS_VALUE(AMD_CPPC_REQUEST_ENERGY_PERF_BITS, (uint64_t)-1);
+       devclass_t dc;
+       uint32_t val;
+       int ret = 0;
+       int cpu;
+
+       dev = oidp->oid_arg1;
+       sc = device_get_softc(dev);
+
+       if (!(sc->flags & PSTATE_CPPC))
+               return (ENODEV);
+
+       val = BITS_VALUE(AMD_CPPC_REQUEST_ENERGY_PERF_BITS, sc->req) * 100 /
+           max_energy_perf;
+       ret = sysctl_handle_int(oidp, &val, 0, req);
+       if (ret != 0 || req->newptr == NULL)
+               goto end;
+       if (val > 100) {
+               ret = EINVAL;
+               goto end;
+       }
+       val = (val * max_energy_perf) / 100;
+
+       if (hwpstate_pkg_ctrl_enable) {
+               dc = devclass_find(HWP_AMD_CLASSNAME);
+               KASSERT(dc != NULL,
+                   (HWP_AMD_CLASSNAME ": devclass cannot be null"));
+               CPU_FOREACH(cpu) {
+                       hwp_dev = devclass_get_device(dc, cpu);
+                       sysctl_epp_select_per_core(hwp_dev, val);
+               }
+       } else
+               sysctl_epp_select_per_core(dev, val);
+
+end:
+       return (ret);
+}
+
 static driver_t hwpstate_driver = {
-       "hwpstate",
+       HWP_AMD_CLASSNAME,
        hwpstate_methods,
        sizeof(struct hwpstate_softc),
 };
@@ -269,6 +454,8 @@ hwpstate_set(device_t dev, const struct cf_setting *cf)
        if (cf == NULL)
                return (EINVAL);
        sc = device_get_softc(dev);
+       if (sc->flags & PSTATE_CPPC)
+               return (EOPNOTSUPP);
        set = sc->hwpstate_settings;
        for (i = 0; i < sc->cfnum; i++)
                if (CPUFREQ_CMP(cf->freq, set[i].freq))
@@ -284,21 +471,38 @@ hwpstate_get(device_t dev, struct cf_setting *cf)
 {
        struct hwpstate_softc *sc;
        struct hwpstate_setting set;
+       struct pcpu *pc;
        uint64_t msr;
+       uint64_t rate;
+       int ret;
 
        sc = device_get_softc(dev);
        if (cf == NULL)
                return (EINVAL);
-       msr = rdmsr(MSR_AMD_10H_11H_STATUS);
-       if (msr >= sc->cfnum)
-               return (EINVAL);
-       set = sc->hwpstate_settings[msr];
 
-       cf->freq = set.freq;
-       cf->volts = set.volts;
-       cf->power = set.power;
-       cf->lat = set.lat;
-       cf->dev = dev;
+       if (sc->flags & PSTATE_CPPC) {
+               pc = cpu_get_pcpu(dev);
+               if (pc == NULL)
+                       return (ENXIO);
+
+               memset(cf, CPUFREQ_VAL_UNKNOWN, sizeof(*cf));
+               cf->dev = dev;
+               if ((ret = cpu_est_clockrate(pc->pc_cpuid, &rate)))
+                       return (ret);
+               cf->freq = rate / 1000000;
+       } else {
+               msr = rdmsr(MSR_AMD_10H_11H_STATUS);
+               if (msr >= sc->cfnum)
+                       return (EINVAL);
+               set = sc->hwpstate_settings[msr];
+
+               cf->freq = set.freq;
+               cf->volts = set.volts;
+               cf->power = set.power;
+               cf->lat = set.lat;
+               cf->dev = dev;
+       }
+
        return (0);
 }
 
@@ -312,6 +516,9 @@ hwpstate_settings(device_t dev, struct cf_setting *sets, 
int *count)
        if (sets == NULL || count == NULL)
                return (EINVAL);
        sc = device_get_softc(dev);
+       if (sc->flags & PSTATE_CPPC)
+               return (EOPNOTSUPP);
+
        if (*count < sc->cfnum)
                return (E2BIG);
        for (i = 0; i < sc->cfnum; i++, sets++) {
@@ -330,19 +537,24 @@ hwpstate_settings(device_t dev, struct cf_setting *sets, 
int *count)
 static int
 hwpstate_type(device_t dev, int *type)
 {
+       struct hwpstate_softc *sc;
 
        if (type == NULL)
                return (EINVAL);
+       sc = device_get_softc(dev);
 
        *type = CPUFREQ_TYPE_ABSOLUTE;
+       *type |= sc->flags & PSTATE_CPPC ?
+           CPUFREQ_FLAG_INFO_ONLY | CPUFREQ_FLAG_UNCACHED :
+           0;
        return (0);
 }
 
 static void
 hwpstate_identify(driver_t *driver, device_t parent)
 {
-
-       if (device_find_child(parent, "hwpstate", DEVICE_UNIT_ANY) != NULL)
+       if (device_find_child(parent, HWP_AMD_CLASSNAME, DEVICE_UNIT_ANY) !=
+           NULL)
                return;
 
        if ((cpu_vendor_id != CPU_VENDOR_AMD || CPUID_TO_FAMILY(cpu_id) < 0x10) 
&&
@@ -357,14 +569,82 @@ hwpstate_identify(driver_t *driver, device_t parent)
                return;
        }
 
-       if (resource_disabled("hwpstate", 0))
+       if (resource_disabled(HWP_AMD_CLASSNAME, 0))
                return;
 
-       if (BUS_ADD_CHILD(parent, 10, "hwpstate", device_get_unit(parent))
-           == NULL)
+       if (BUS_ADD_CHILD(parent, 10, HWP_AMD_CLASSNAME,
+               device_get_unit(parent)) == NULL)
                device_printf(parent, "hwpstate: add child failed\n");
 }
 
+static int
+amd_set_autonomous_hwp(struct hwpstate_softc *sc)
+{
+       struct pcpu *pc;
+       device_t dev;
+       uint64_t caps;
+       int ret;
+
+       dev = sc->dev;
+       pc = cpu_get_pcpu(dev);
+       if (pc == NULL)
+               return (ENXIO);
+
+       thread_lock(curthread);
+       sched_bind(curthread, pc->pc_cpuid);
+       thread_unlock(curthread);
+
+       ret = wrmsr_safe(MSR_AMD_CPPC_ENABLE, 1);
+       if (ret != 0) {
+               device_printf(dev, "Failed to enable cppc for cpu%d (%d)\n",
+                   pc->pc_cpuid, ret);
+               goto out;
+       }
+
+       ret = rdmsr_safe(MSR_AMD_CPPC_REQUEST, &sc->req);
+       if (ret != 0) {
+               device_printf(dev,
+                   "Failed to read CPPC request MSR for cpu%d (%d)\n",
+                   pc->pc_cpuid, ret);
+               goto out;
+       }
+
+       ret = rdmsr_safe(MSR_AMD_CPPC_CAPS_1, &caps);
+       if (ret != 0) {
+               device_printf(dev,
+                   "Failed to read HWP capabilities MSR for cpu%d (%d)\n",
+                   pc->pc_cpuid, ret);
+               goto out;
+       }
+
+       /*
+        * In Intel's reference manual, the default value of EPP is 0x80u which
+        * is the balanced mode. For consistency, we set the same value in AMD's
+        * CPPC driver.
+        */
+       SET_BITS_VALUE(sc->req, AMD_CPPC_REQUEST_ENERGY_PERF_BITS, 0x80);
+       SET_BITS_VALUE(sc->req, AMD_CPPC_REQUEST_MIN_PERF_BITS,
+           BITS_VALUE(AMD_CPPC_CAPS_1_LOW_PERF_BITS, caps));
+       SET_BITS_VALUE(sc->req, AMD_CPPC_REQUEST_MAX_PERF_BITS,
+           BITS_VALUE(AMD_CPPC_CAPS_1_HIGH_PERF_BITS, caps));
+       /* enable autonomous mode by setting desired performance to 0 */
+       SET_BITS_VALUE(sc->req, AMD_CPPC_REQUEST_DES_PERF_BITS, 0);
+
+       ret = wrmsr_safe(MSR_AMD_CPPC_REQUEST, sc->req);
+       if (ret) {
+               device_printf(dev,
+                   "Failed to setup autonomous HWP for cpu%d\n",
+                   pc->pc_cpuid);
+               goto out;
+       }
+out:
+       thread_lock(curthread);
+       sched_unbind(curthread);
+       thread_unlock(curthread);
+
+       return (ret);
+}
+
 static int
 hwpstate_probe(device_t dev)
 {
@@ -373,15 +653,25 @@ hwpstate_probe(device_t dev)
        uint64_t msr;
        int error, type;
 
-       /*
-        * Only hwpstate0.
-        * It goes well with acpi_throttle.
-        */
-       if (device_get_unit(dev) != 0)
-               return (ENXIO);
-
        sc = device_get_softc(dev);
+
+       if (amd_extended_feature_extensions & AMDFEID_CPPC) {
+               sc->flags |= PSTATE_CPPC;
+               device_set_desc(dev,
+                   "AMD Collaborative Processor Performance Control (CPPC)");
+       } else {
+               /*
+                * No CPPC support.  Only keep hwpstate0, it goes well with
+                * acpi_throttle.
+                */
+               if (device_get_unit(dev) != 0)
+                       return (ENXIO);
+               device_set_desc(dev, "Cool`n'Quiet 2.0");
+       }
+
        sc->dev = dev;
+       if (sc->flags & PSTATE_CPPC)
+               return (0);
 
        /*
         * Check if acpi_perf has INFO only flag.
@@ -433,14 +723,32 @@ hwpstate_probe(device_t dev)
        if (error)
                return (error);
 
-       device_set_desc(dev, "Cool`n'Quiet 2.0");
        return (0);
 }
 
 static int
 hwpstate_attach(device_t dev)
 {
+       struct hwpstate_softc *sc;
+       int res;
 
+       sc = device_get_softc(dev);
+       if (sc->flags & PSTATE_CPPC) {
+               if ((res = amd_set_autonomous_hwp(sc)))
+                       return res;
+               SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
+                   SYSCTL_STATIC_CHILDREN(_debug), OID_AUTO,
+                   device_get_nameunit(dev),
+                   CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE,
+                   sc, 0, amdhwp_dump_sysctl_handler, "A", "");
+
+               SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
+                   SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
+                   "epp", CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, dev, 0,
+                   sysctl_epp_select, "I",
+                   "Efficiency/Performance Preference "
+                   "(range from 0, most performant, through 100, most 
efficient)");
+       }
        return (cpufreq_register(dev));
 }
 
@@ -584,8 +892,11 @@ out:
 static int
 hwpstate_detach(device_t dev)
 {
+       struct hwpstate_softc *sc;
 
-       hwpstate_goto_pstate(dev, 0);
+       sc = device_get_softc(dev);
+       if (!(sc->flags & PSTATE_CPPC))
+               hwpstate_goto_pstate(dev, 0);
        return (cpufreq_unregister(dev));
 }
 
diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h
index e9dde5c3b46a..3e5f598cd82a 100644
--- a/sys/x86/include/specialreg.h
+++ b/sys/x86/include/specialreg.h
@@ -418,6 +418,7 @@
 #define        AMDPM_HW_PSTATE         0x00000080
 #define        AMDPM_TSC_INVARIANT     0x00000100
 #define        AMDPM_CPB               0x00000200
+#define        AMDPM_PWR_REPORT        0x00001000
 
 /*
  * AMD extended function 8000_0008h ebx info (amd_extended_feature_extensions)

Reply via email to