The branch main has been updated by aokblast: URL: https://cgit.FreeBSD.org/src/commit/?id=3e6e4e4a0d42fa24f3b2a1c087e9ad25f9594081
commit 3e6e4e4a0d42fa24f3b2a1c087e9ad25f9594081 Author: ShengYi Hung <[email protected]> AuthorDate: 2025-07-08 11:45:26 +0000 Commit: ShengYi Hung <[email protected]> CommitDate: 2026-01-03 02:52:51 +0000 hwpstate: add CPPC support for pstate driver on AMD Implement CPPC interface for AMD Pstate Driver. This feature is only enabled when the CPUID shows it support CPPC. The CPPC is implemneted by the following steps: 1. Write MSR to enable it. 2. Read capability registert which indicates binary value of levels about lowest, best energy efficient, guarantee, and max performance. 3. Write request register with epp in energy balanced mode. And let CPU and firmware to enter autonomous mode. Also, create a sysctl handler to allow userspace to change epp value. In intel's hwpstate, The epp value can be in package level and core level. However, in AMD's one, there is only core level. Thus, to sync with the intel's code, we implement package level control in software and provide another sysctl (machdep.hwpstate_pkg_ctrl) to control it. Reviewed by: olce, khng Approved by: lwhsu (mentor) MFC after: 2 weeks Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D49587 --- sys/x86/cpufreq/hwpstate_amd.c | 361 ++++++++++++++++++++++++++++++++++++++--- sys/x86/include/specialreg.h | 1 + 2 files changed, 337 insertions(+), 25 deletions(-) diff --git a/sys/x86/cpufreq/hwpstate_amd.c b/sys/x86/cpufreq/hwpstate_amd.c index fc948dc90a15..4395e43a219f 100644 --- a/sys/x86/cpufreq/hwpstate_amd.c +++ b/sys/x86/cpufreq/hwpstate_amd.c @@ -8,6 +8,7 @@ * Copyright (c) 2009 Michael Reifenberger * Copyright (c) 2009 Norikatsu Shigemura * Copyright (c) 2008-2009 Gen Otsuji + * Copyright (c) 2025 ShengYi Hung * * This code is depending on kern_cpu.c, est.c, powernow.c, p4tcc.c, smist.c * in various parts. The authors of these files are Nate Lawson, @@ -55,6 +56,7 @@ #include <sys/malloc.h> #include <sys/proc.h> #include <sys/pcpu.h> +#include <sys/sbuf.h> #include <sys/smp.h> #include <sys/sched.h> @@ -74,6 +76,15 @@ #define MSR_AMD_10H_11H_STATUS 0xc0010063 #define MSR_AMD_10H_11H_CONFIG 0xc0010064 +#define MSR_AMD_CPPC_CAPS_1 0xc00102b0 +#define MSR_AMD_CPPC_ENABLE 0xc00102b1 +#define MSR_AMD_CPPC_CAPS_2 0xc00102b2 +#define MSR_AMD_CPPC_REQUEST 0xc00102b3 +#define MSR_AMD_CPPC_STATUS 0xc00102b4 + +#define MSR_AMD_PWR_ACC 0xc001007a +#define MSR_AMD_PWR_ACC_MX 0xc001007b + #define AMD_10H_11H_MAX_STATES 16 /* for MSR_AMD_10H_11H_LIMIT C001_0061 */ @@ -92,6 +103,23 @@ #define AMD_1AH_CUR_FID(msr) ((msr) & 0xFFF) +#define AMD_CPPC_CAPS_1_HIGH_PERF_BITS 0xff000000 +#define AMD_CPPC_CAPS_1_NOMINAL_PERF_BITS 0x00ff0000 +#define AMD_CPPC_CAPS_1_LOW_NONLIN_PERF_BITS 0x0000ff00 +#define AMD_CPPC_CAPS_1_LOW_PERF_BITS 0x000000ff + +#define AMD_CPPC_REQUEST_ENERGY_PERF_BITS 0xff000000 +#define AMD_CPPC_REQUEST_DES_PERF_BITS 0x00ff0000 +#define AMD_CPPC_REQUEST_MIN_PERF_BITS 0x0000ff00 +#define AMD_CPPC_REQUEST_MAX_PERF_BITS 0x000000ff + +#define HWP_AMD_CLASSNAME "hwpstate_amd" + +#define BITS_VALUE(bits, num) (((num) & (bits)) >> (ffsll((bits)) - 1)) +#define BITS_WITH_VALUE(bits, val) ((uintmax_t)(val) << (ffsll((bits)) - 1)) +#define SET_BITS_VALUE(var, bits, val) \ + ((var) = (var) & ~(bits) | BITS_WITH_VALUE((bits), (val))) + #define HWPSTATE_DEBUG(dev, msg...) \ do { \ if (hwpstate_verbose) \ @@ -106,10 +134,16 @@ struct hwpstate_setting { int pstate_id; /* P-State id */ }; +enum hwpstate_flags { + PSTATE_CPPC = 1, +}; + struct hwpstate_softc { device_t dev; - struct hwpstate_setting hwpstate_settings[AMD_10H_11H_MAX_STATES]; + struct hwpstate_setting hwpstate_settings[AMD_10H_11H_MAX_STATES]; int cfnum; + uint32_t flags; + uint64_t req; }; static void hwpstate_identify(driver_t *driver, device_t parent); @@ -140,6 +174,11 @@ SYSCTL_BOOL(_debug, OID_AUTO, hwpstate_pstate_limit, CTLFLAG_RWTUN, "If enabled (1), limit administrative control of P-states to the value in " "CurPstateLimit"); +static bool hwpstate_pkg_ctrl_enable = true; +SYSCTL_BOOL(_machdep, OID_AUTO, hwpstate_pkg_ctrl, CTLFLAG_RDTUN, + &hwpstate_pkg_ctrl_enable, 0, + "Set 1 (default) to enable package-level control, 0 to disable"); + static device_method_t hwpstate_methods[] = { /* Device interface */ DEVMETHOD(device_identify, hwpstate_identify), @@ -159,8 +198,154 @@ static device_method_t hwpstate_methods[] = { {0, 0} }; +static int +amdhwp_dump_sysctl_handler(SYSCTL_HANDLER_ARGS) +{ + device_t dev; + struct pcpu *pc; + struct sbuf *sb; + struct hwpstate_softc *sc; + uint64_t data; + int ret; + + sc = (struct hwpstate_softc *)arg1; + dev = sc->dev; + + pc = cpu_get_pcpu(dev); + if (pc == NULL) + return (ENXIO); + + sb = sbuf_new(NULL, NULL, 1024, SBUF_FIXEDLEN | SBUF_INCLUDENUL); + sbuf_putc(sb, '\n'); + thread_lock(curthread); + sched_bind(curthread, pc->pc_cpuid); + thread_unlock(curthread); + + rdmsr_safe(MSR_AMD_CPPC_ENABLE, &data); + sbuf_printf(sb, "CPU%d: HWP %sabled\n", pc->pc_cpuid, + ((data & 1) ? "En" : "Dis")); + + if (data == 0) { + ret = 0; + goto out; + } + + rdmsr_safe(MSR_AMD_CPPC_CAPS_1, &data); + sbuf_printf(sb, "\tHighest Performance: %03ju\n", + BITS_VALUE(AMD_CPPC_CAPS_1_HIGH_PERF_BITS, data)); + sbuf_printf(sb, "\tGuaranteed Performance: %03ju\n", + BITS_VALUE(AMD_CPPC_CAPS_1_NOMINAL_PERF_BITS, data)); + sbuf_printf(sb, "\tEfficient Performance: %03ju\n", + BITS_VALUE(AMD_CPPC_CAPS_1_LOW_NONLIN_PERF_BITS, data)); + sbuf_printf(sb, "\tLowest Performance: %03ju\n", + BITS_VALUE(AMD_CPPC_CAPS_1_LOW_PERF_BITS, data)); + sbuf_putc(sb, '\n'); + + rdmsr_safe(MSR_AMD_CPPC_REQUEST, &data); + +#define pkg_print(name, offset) \ + do { \ + sbuf_printf(sb, "\t%s: %03u\n", name, \ + (unsigned)(data >> offset) & 0xff); \ + } while (0) + + pkg_print("Requested Efficiency Performance Preference", 24); + pkg_print("Requested Desired Performance", 16); + pkg_print("Requested Maximum Performance", 8); + pkg_print("Requested Minimum Performance", 0); +#undef pkg_print + + sbuf_putc(sb, '\n'); + +out: + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); + + ret = sbuf_finish(sb); + if (ret == 0) + ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb)); + sbuf_delete(sb); + + return (ret); +} + +static bool +sysctl_epp_select_per_core(const device_t hwp_device, uint32_t val) +{ + struct hwpstate_softc *sc; + bool success = true; + int ret, cpuid; + + cpuid = cpu_get_pcpu(hwp_device)->pc_cpuid; + thread_lock(curthread); + sched_bind(curthread, cpuid); + thread_unlock(curthread); + sc = device_get_softc(hwp_device); + if (BITS_VALUE(AMD_CPPC_REQUEST_ENERGY_PERF_BITS, sc->req) == val) + goto end; + SET_BITS_VALUE(sc->req, AMD_CPPC_REQUEST_ENERGY_PERF_BITS, val); + ret = wrmsr_safe(MSR_AMD_CPPC_REQUEST, sc->req); + if (ret != 0) { + success = false; + device_printf(hwp_device, "Failed to set EPP to %u", val); + goto end; + } + +end: + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); + + return (success); +} + +static int +sysctl_epp_select(SYSCTL_HANDLER_ARGS) +{ + device_t dev, hwp_dev; + struct hwpstate_softc *sc; + const uint32_t max_energy_perf = + BITS_VALUE(AMD_CPPC_REQUEST_ENERGY_PERF_BITS, (uint64_t)-1); + devclass_t dc; + uint32_t val; + int ret = 0; + int cpu; + + dev = oidp->oid_arg1; + sc = device_get_softc(dev); + + if (!(sc->flags & PSTATE_CPPC)) + return (ENODEV); + + val = BITS_VALUE(AMD_CPPC_REQUEST_ENERGY_PERF_BITS, sc->req) * 100 / + max_energy_perf; + ret = sysctl_handle_int(oidp, &val, 0, req); + if (ret != 0 || req->newptr == NULL) + goto end; + if (val > 100) { + ret = EINVAL; + goto end; + } + val = (val * max_energy_perf) / 100; + + if (hwpstate_pkg_ctrl_enable) { + dc = devclass_find(HWP_AMD_CLASSNAME); + KASSERT(dc != NULL, + (HWP_AMD_CLASSNAME ": devclass cannot be null")); + CPU_FOREACH(cpu) { + hwp_dev = devclass_get_device(dc, cpu); + sysctl_epp_select_per_core(hwp_dev, val); + } + } else + sysctl_epp_select_per_core(dev, val); + +end: + return (ret); +} + static driver_t hwpstate_driver = { - "hwpstate", + HWP_AMD_CLASSNAME, hwpstate_methods, sizeof(struct hwpstate_softc), }; @@ -269,6 +454,8 @@ hwpstate_set(device_t dev, const struct cf_setting *cf) if (cf == NULL) return (EINVAL); sc = device_get_softc(dev); + if (sc->flags & PSTATE_CPPC) + return (EOPNOTSUPP); set = sc->hwpstate_settings; for (i = 0; i < sc->cfnum; i++) if (CPUFREQ_CMP(cf->freq, set[i].freq)) @@ -284,21 +471,38 @@ hwpstate_get(device_t dev, struct cf_setting *cf) { struct hwpstate_softc *sc; struct hwpstate_setting set; + struct pcpu *pc; uint64_t msr; + uint64_t rate; + int ret; sc = device_get_softc(dev); if (cf == NULL) return (EINVAL); - msr = rdmsr(MSR_AMD_10H_11H_STATUS); - if (msr >= sc->cfnum) - return (EINVAL); - set = sc->hwpstate_settings[msr]; - cf->freq = set.freq; - cf->volts = set.volts; - cf->power = set.power; - cf->lat = set.lat; - cf->dev = dev; + if (sc->flags & PSTATE_CPPC) { + pc = cpu_get_pcpu(dev); + if (pc == NULL) + return (ENXIO); + + memset(cf, CPUFREQ_VAL_UNKNOWN, sizeof(*cf)); + cf->dev = dev; + if ((ret = cpu_est_clockrate(pc->pc_cpuid, &rate))) + return (ret); + cf->freq = rate / 1000000; + } else { + msr = rdmsr(MSR_AMD_10H_11H_STATUS); + if (msr >= sc->cfnum) + return (EINVAL); + set = sc->hwpstate_settings[msr]; + + cf->freq = set.freq; + cf->volts = set.volts; + cf->power = set.power; + cf->lat = set.lat; + cf->dev = dev; + } + return (0); } @@ -312,6 +516,9 @@ hwpstate_settings(device_t dev, struct cf_setting *sets, int *count) if (sets == NULL || count == NULL) return (EINVAL); sc = device_get_softc(dev); + if (sc->flags & PSTATE_CPPC) + return (EOPNOTSUPP); + if (*count < sc->cfnum) return (E2BIG); for (i = 0; i < sc->cfnum; i++, sets++) { @@ -330,19 +537,24 @@ hwpstate_settings(device_t dev, struct cf_setting *sets, int *count) static int hwpstate_type(device_t dev, int *type) { + struct hwpstate_softc *sc; if (type == NULL) return (EINVAL); + sc = device_get_softc(dev); *type = CPUFREQ_TYPE_ABSOLUTE; + *type |= sc->flags & PSTATE_CPPC ? + CPUFREQ_FLAG_INFO_ONLY | CPUFREQ_FLAG_UNCACHED : + 0; return (0); } static void hwpstate_identify(driver_t *driver, device_t parent) { - - if (device_find_child(parent, "hwpstate", DEVICE_UNIT_ANY) != NULL) + if (device_find_child(parent, HWP_AMD_CLASSNAME, DEVICE_UNIT_ANY) != + NULL) return; if ((cpu_vendor_id != CPU_VENDOR_AMD || CPUID_TO_FAMILY(cpu_id) < 0x10) && @@ -357,14 +569,82 @@ hwpstate_identify(driver_t *driver, device_t parent) return; } - if (resource_disabled("hwpstate", 0)) + if (resource_disabled(HWP_AMD_CLASSNAME, 0)) return; - if (BUS_ADD_CHILD(parent, 10, "hwpstate", device_get_unit(parent)) - == NULL) + if (BUS_ADD_CHILD(parent, 10, HWP_AMD_CLASSNAME, + device_get_unit(parent)) == NULL) device_printf(parent, "hwpstate: add child failed\n"); } +static int +amd_set_autonomous_hwp(struct hwpstate_softc *sc) +{ + struct pcpu *pc; + device_t dev; + uint64_t caps; + int ret; + + dev = sc->dev; + pc = cpu_get_pcpu(dev); + if (pc == NULL) + return (ENXIO); + + thread_lock(curthread); + sched_bind(curthread, pc->pc_cpuid); + thread_unlock(curthread); + + ret = wrmsr_safe(MSR_AMD_CPPC_ENABLE, 1); + if (ret != 0) { + device_printf(dev, "Failed to enable cppc for cpu%d (%d)\n", + pc->pc_cpuid, ret); + goto out; + } + + ret = rdmsr_safe(MSR_AMD_CPPC_REQUEST, &sc->req); + if (ret != 0) { + device_printf(dev, + "Failed to read CPPC request MSR for cpu%d (%d)\n", + pc->pc_cpuid, ret); + goto out; + } + + ret = rdmsr_safe(MSR_AMD_CPPC_CAPS_1, &caps); + if (ret != 0) { + device_printf(dev, + "Failed to read HWP capabilities MSR for cpu%d (%d)\n", + pc->pc_cpuid, ret); + goto out; + } + + /* + * In Intel's reference manual, the default value of EPP is 0x80u which + * is the balanced mode. For consistency, we set the same value in AMD's + * CPPC driver. + */ + SET_BITS_VALUE(sc->req, AMD_CPPC_REQUEST_ENERGY_PERF_BITS, 0x80); + SET_BITS_VALUE(sc->req, AMD_CPPC_REQUEST_MIN_PERF_BITS, + BITS_VALUE(AMD_CPPC_CAPS_1_LOW_PERF_BITS, caps)); + SET_BITS_VALUE(sc->req, AMD_CPPC_REQUEST_MAX_PERF_BITS, + BITS_VALUE(AMD_CPPC_CAPS_1_HIGH_PERF_BITS, caps)); + /* enable autonomous mode by setting desired performance to 0 */ + SET_BITS_VALUE(sc->req, AMD_CPPC_REQUEST_DES_PERF_BITS, 0); + + ret = wrmsr_safe(MSR_AMD_CPPC_REQUEST, sc->req); + if (ret) { + device_printf(dev, + "Failed to setup autonomous HWP for cpu%d\n", + pc->pc_cpuid); + goto out; + } +out: + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); + + return (ret); +} + static int hwpstate_probe(device_t dev) { @@ -373,15 +653,25 @@ hwpstate_probe(device_t dev) uint64_t msr; int error, type; - /* - * Only hwpstate0. - * It goes well with acpi_throttle. - */ - if (device_get_unit(dev) != 0) - return (ENXIO); - sc = device_get_softc(dev); + + if (amd_extended_feature_extensions & AMDFEID_CPPC) { + sc->flags |= PSTATE_CPPC; + device_set_desc(dev, + "AMD Collaborative Processor Performance Control (CPPC)"); + } else { + /* + * No CPPC support. Only keep hwpstate0, it goes well with + * acpi_throttle. + */ + if (device_get_unit(dev) != 0) + return (ENXIO); + device_set_desc(dev, "Cool`n'Quiet 2.0"); + } + sc->dev = dev; + if (sc->flags & PSTATE_CPPC) + return (0); /* * Check if acpi_perf has INFO only flag. @@ -433,14 +723,32 @@ hwpstate_probe(device_t dev) if (error) return (error); - device_set_desc(dev, "Cool`n'Quiet 2.0"); return (0); } static int hwpstate_attach(device_t dev) { + struct hwpstate_softc *sc; + int res; + sc = device_get_softc(dev); + if (sc->flags & PSTATE_CPPC) { + if ((res = amd_set_autonomous_hwp(sc))) + return res; + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_STATIC_CHILDREN(_debug), OID_AUTO, + device_get_nameunit(dev), + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, + sc, 0, amdhwp_dump_sysctl_handler, "A", ""); + + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, + "epp", CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, dev, 0, + sysctl_epp_select, "I", + "Efficiency/Performance Preference " + "(range from 0, most performant, through 100, most efficient)"); + } return (cpufreq_register(dev)); } @@ -584,8 +892,11 @@ out: static int hwpstate_detach(device_t dev) { + struct hwpstate_softc *sc; - hwpstate_goto_pstate(dev, 0); + sc = device_get_softc(dev); + if (!(sc->flags & PSTATE_CPPC)) + hwpstate_goto_pstate(dev, 0); return (cpufreq_unregister(dev)); } diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h index e9dde5c3b46a..3e5f598cd82a 100644 --- a/sys/x86/include/specialreg.h +++ b/sys/x86/include/specialreg.h @@ -418,6 +418,7 @@ #define AMDPM_HW_PSTATE 0x00000080 #define AMDPM_TSC_INVARIANT 0x00000100 #define AMDPM_CPB 0x00000200 +#define AMDPM_PWR_REPORT 0x00001000 /* * AMD extended function 8000_0008h ebx info (amd_extended_feature_extensions)
