pm: Change gpu_metrics over to binary

Tom St Denis Wed, 01 Apr 2026 06:52:49 -0700

The file is binary anyways but also because it reported previously
as a static 4KB block it made correctly reading it hard since
you can't error check on if your read succeeded or not.


Tested on my Navi48.

Signed-off-by: Tom St Denis <[email protected]>
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c      | 106 +++++++++++++++++++-----
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h |   1 +
 drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h  |   1 -
 3 files changed, 88 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index a4d8e667eafb..7139983705bc 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -34,8 +34,17 @@
 #include <linux/nospec.h>
 #include <linux/pm_runtime.h>
 #include <linux/string_choices.h>
+#include <linux/sysfs.h>
+#include <linux/sizes.h>
 #include <asm/processor.h>
 
+/*
+ * Sysfs reports this as the file size (stat/ls); kernfs also uses it to cap
+ * read offsets. Actual payload length is the return value of
+ * amdgpu_dpm_get_gpu_metrics() and must not exceed this.
+ */
+#define AMDGPU_GPU_METRICS_BIN_ATTR_MAX_SZ     SZ_128K
+
 #define MAX_NUM_OF_FEATURES_PER_SUBSET         8
 #define MAX_NUM_OF_SUBSETS                     8
 
@@ -1734,43 +1743,86 @@ static ssize_t amdgpu_get_pm_metrics(struct device *dev,
  * DOC: gpu_metrics
  *
  * The amdgpu driver provides a sysfs API for retrieving current gpu
- * metrics data. The file gpu_metrics is used for this. Reading the
- * file will dump all the current gpu metrics data.
+ * metrics data. The binary sysfs file gpu_metrics is used for this.
+ * Reading the file returns the raw metrics blob; reads may be shorter
+ * than the full structure, so userspace should use read() until EOF
+ * when the buffer may exceed one page. The sysfs file size is an upper
+ * bound for inode metadata; the real length is the amount returned
+ * before EOF on sequential reads.
+ *
+ * Do not use stdio fread(3) as fread(buf, 128*1024, 1, fp): that asks for
+ * one object of 128KiB and returns 0 if the payload is shorter (even when
+ * data was read). Use read(2), or fread(buf, 1, sizeof(buf), fp), or loop
+ * until feof/short read.
  *
  * These data include temperature, frequency, engines utilization,
  * power consume, throttler status, fan speed and cpu core statistics(
  * available for APU only). That's it will give a snapshot of all sensors
  * at the same time.
  */
-static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
-                                     struct device_attribute *attr,
-                                     char *buf)
+static bool amdgpu_pm_gpu_metrics_bin_visible(struct amdgpu_device *adev,
+                                             uint32_t mask)
+{
+       uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
+
+       if (!((ATTR_FLAG_BASIC | ATTR_FLAG_ONEVF) & mask))
+               return false;
+
+       return gc_ver >= IP_VERSION(9, 1, 0);
+}
+
+static ssize_t amdgpu_sysfs_gpu_metrics_read(struct file *f, struct kobject 
*kobj,
+                                            const struct bin_attribute *attr,
+                                            char *buf, loff_t off, size_t 
count)
 {
+       struct device *dev = kobj_to_dev(kobj);
        struct drm_device *ddev = dev_get_drvdata(dev);
        struct amdgpu_device *adev = drm_to_adev(ddev);
        void *gpu_metrics;
-       ssize_t size = 0;
+       int len;
        int ret;
 
-       ret = amdgpu_pm_get_access_if_active(adev);
+       (void)f;
+       (void)attr;
+
+       /*
+        * Kernfs invokes this once per chunk (at most PAGE_SIZE bytes per call)
+        * for a single userspace read(). Use pm_runtime_resume_and_get via
+        * amdgpu_pm_get_access so later chunks still succeed after the prior
+        * chunk's put_autosuspend — get_if_active would return -EPERM once the
+        * GPU had gone idle between chunks.
+        */
+       ret = amdgpu_pm_get_access(adev);
        if (ret)
                return ret;
 
-       size = amdgpu_dpm_get_gpu_metrics(adev, &gpu_metrics);
-       if (size <= 0)
-               goto out;
+       len = amdgpu_dpm_get_gpu_metrics(adev, &gpu_metrics);
+       if (len < 0) {
+               amdgpu_pm_put_access(adev);
+               return len;
+       }
+       if (len == 0) {
+               amdgpu_pm_put_access(adev);
+               return 0;
+       }
 
-       if (size >= PAGE_SIZE)
-               size = PAGE_SIZE - 1;
+       if (off >= len) {
+               amdgpu_pm_put_access(adev);
+               return 0;
+       }
 
-       memcpy(buf, gpu_metrics, size);
+       if (count > (size_t)(len - off))
+               count = len - off;
 
-out:
+       memcpy(buf, (u8 *)gpu_metrics + off, count);
        amdgpu_pm_put_access(adev);
 
-       return size;
+       return count;
 }
 
+static const BIN_ATTR(gpu_metrics, 0444, amdgpu_sysfs_gpu_metrics_read, NULL,
+                     AMDGPU_GPU_METRICS_BIN_ATTR_MAX_SZ);
+
 static int amdgpu_show_powershift_percent(struct device *dev,
                                        char *buf, enum amd_pp_sensors sensor)
 {
@@ -2579,7 +2631,6 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = {
        AMDGPU_DEVICE_ATTR_RO(unique_id,                                
ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
        AMDGPU_DEVICE_ATTR_RW(thermal_throttling_logging,               
ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
        AMDGPU_DEVICE_ATTR_RW(apu_thermal_cap,                          
ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
-       AMDGPU_DEVICE_ATTR_RO(gpu_metrics,                              
ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
        AMDGPU_DEVICE_ATTR_RO(smartshift_apu_power,                     
ATTR_FLAG_BASIC,
                              .attr_update = ss_power_attr_update),
        AMDGPU_DEVICE_ATTR_RO(smartshift_dgpu_power,                    
ATTR_FLAG_BASIC,
@@ -2657,9 +2708,6 @@ static int default_attr_update(struct amdgpu_device 
*adev, struct amdgpu_device_
                     gc_ver != IP_VERSION(9, 4, 3)) ||
                    gc_ver < IP_VERSION(9, 0, 0))
                        *states = ATTR_STATE_UNSUPPORTED;
-       } else if (DEVICE_ATTR_IS(gpu_metrics)) {
-               if (gc_ver < IP_VERSION(9, 1, 0))
-                       *states = ATTR_STATE_UNSUPPORTED;
        } else if (DEVICE_ATTR_IS(pp_power_profile_mode)) {
                if (amdgpu_dpm_get_power_profile_mode(adev, NULL) == 
-EOPNOTSUPP)
                        *states = ATTR_STATE_UNSUPPORTED;
@@ -4755,6 +4803,17 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
        if (ret)
                goto err_out0;
 
+       if (amdgpu_pm_gpu_metrics_bin_visible(adev, mask)) {
+               ret = sysfs_create_bin_file(&adev->dev->kobj, 
&bin_attr_gpu_metrics);
+               if (ret) {
+                       dev_err(adev->dev,
+                               "failed to create gpu_metrics sysfs bin file, 
ret = %d\n",
+                               ret);
+                       goto err_out1;
+               }
+               adev->pm.gpu_metrics_bin_registered = true;
+       }
+
        if (amdgpu_dpm_is_overdrive_supported(adev)) {
                ret = amdgpu_od_set_init(adev);
                if (ret)
@@ -4806,6 +4865,10 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
        return 0;
 
 err_out1:
+       if (adev->pm.gpu_metrics_bin_registered) {
+               sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_gpu_metrics);
+               adev->pm.gpu_metrics_bin_registered = false;
+       }
        amdgpu_device_attr_remove_groups(adev, &adev->pm.pm_attr_list);
 err_out0:
        if (adev->pm.int_hwmon_dev)
@@ -4821,6 +4884,11 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
        if (adev->pm.int_hwmon_dev)
                hwmon_device_unregister(adev->pm.int_hwmon_dev);
 
+       if (adev->pm.gpu_metrics_bin_registered) {
+               sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_gpu_metrics);
+               adev->pm.gpu_metrics_bin_registered = false;
+       }
+
        amdgpu_device_attr_remove_groups(adev, &adev->pm.pm_attr_list);
 }
 
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h 
b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index aa3f427819a0..67ff83b2134c 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -349,6 +349,7 @@ struct amdgpu_pm {
        /* dpm */
        bool                    dpm_enabled;
        bool                    sysfs_initialized;
+       bool                    gpu_metrics_bin_registered;
        struct amdgpu_dpm       dpm;
        const struct firmware   *fw;    /* SMC firmware */
        uint32_t                fw_version;
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h 
b/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h
index c12ced32f780..dc6875871f1d 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h
@@ -73,7 +73,6 @@ enum amdgpu_device_attr_id {
        device_attr_id__unique_id,
        device_attr_id__thermal_throttling_logging,
        device_attr_id__apu_thermal_cap,
-       device_attr_id__gpu_metrics,
        device_attr_id__smartshift_apu_power,
        device_attr_id__smartshift_dgpu_power,
        device_attr_id__smartshift_bias,
-- 
2.51.0

[PATCH] drm/amd/pm: Change gpu_metrics over to binary

Reply via email to