Hi Chengwen,

Thanks for your review.

在 2024/9/10 10:00, fengchengwen 写道:
Hi Huisong

Please see comments inline.

Thanks

On 2024/8/9 17:50, Huisong Li wrote:
The deeper the idle state, the lower the power consumption, but the longer
the resume time. Some service are delay sensitive and very except the low
resume time, like interrupt packet receiving mode.

And the "/sys/devices/system/cpu/cpuX/power/pm_qos_resume_latency_us" sysfs
interface is used to set and get the resume latency limit on the cpuX for
userspace. Each cpuidle governor in Linux select which idle state to enter
based on this CPU resume latency in their idle task.

The per-CPU PM QoS API can be used to control this CPU's idle state
selection and limit just enter the shallowest idle state to low the delay
after sleep by setting strict resume latency (zero value).

Signed-off-by: Huisong Li <lihuis...@huawei.com>
Acked-by: Morten Brørup <m...@smartsharesystems.com>
---
...

diff --git a/lib/power/rte_power_qos.c b/lib/power/rte_power_qos.c
new file mode 100644
index 0000000000..375746f832
--- /dev/null
+++ b/lib/power/rte_power_qos.c
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2024 HiSilicon Limited
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <rte_lcore.h>
+#include <rte_log.h>
+
+#include "power_common.h"
+#include "rte_power_qos.h"
+
+#define PM_QOS_SYSFILE_RESUME_LATENCY_US       \
+       "/sys/devices/system/cpu/cpu%u/power/pm_qos_resume_latency_us"
+
+int
+rte_power_qos_set_cpu_resume_latency(uint16_t lcore_id, int latency)
+{
+       char buf[LINE_MAX];
no need LINE_MAX, [32] would enough.
Ack

+       FILE *f;
+       int ret;
+
+       if (!rte_lcore_is_enabled(lcore_id)) {
+               POWER_LOG(ERR, "lcore id %u is not enabled", lcore_id);
+               return -EINVAL;
+       }
+
+       if (latency < 0) {
+               POWER_LOG(ERR, "latency should be greater than and equal to 0");
+               return -EINVAL;
+       }
+
+       ret = open_core_sysfs_file(&f, "w", PM_QOS_SYSFILE_RESUME_LATENCY_US, 
lcore_id);
+       if (ret != 0) {
+               POWER_LOG(ERR, "Failed to open 
"PM_QOS_SYSFILE_RESUME_LATENCY_US, lcore_id);
+               return ret;
+       }
+
+       /*
+        * Based on the sysfs interface pm_qos_resume_latency_us under
+        * @PM_QOS_SYSFILE_RESUME_LATENCY_US directory in kernel, their meanning
meanning -> meaning
Ack

+        * is as follows for different input string.
+        * 1> the resume latency is 0 if the input is "n/a".
+        * 2> the resume latency is no constraint if the input is "0".
+        * 3> the resume latency is the actual value to be set.
+        */
+       if (latency == 0)
+               snprintf(buf, sizeof(buf), "%s", "n/a");
+       else if (latency == RTE_POWER_QOS_RESUME_LATENCY_NO_CONSTRAINT)
+               snprintf(buf, sizeof(buf), "%u", 0);
+       else
+               snprintf(buf, sizeof(buf), "%u", latency);
+
+       ret = write_core_sysfs_s(f, buf);
+       if (ret != 0) {
+               POWER_LOG(ERR, "Failed to write 
"PM_QOS_SYSFILE_RESUME_LATENCY_US, lcore_id);
+               goto out;
no need of goto
Ack

+       }
+
+out:
+       if (f != NULL)
+               fclose(f);
just fclose(f) because f is valid here.
Ack
+
+       return ret;
+}
+
+int
+rte_power_qos_get_cpu_resume_latency(uint16_t lcore_id)
+{
+       char buf[LINE_MAX];
+       int latency = -1;
+       FILE *f;
+       int ret;
+
+       if (!rte_lcore_is_enabled(lcore_id)) {
+               POWER_LOG(ERR, "lcore id %u is not enabled", lcore_id);
+               return -EINVAL;
+       }
+
+       ret = open_core_sysfs_file(&f, "r", PM_QOS_SYSFILE_RESUME_LATENCY_US, 
lcore_id);
+       if (ret != 0) {
+               POWER_LOG(ERR, "Failed to open 
"PM_QOS_SYSFILE_RESUME_LATENCY_US, lcore_id);
+               return ret;
+       }
+
+       ret = read_core_sysfs_s(f, buf, sizeof(buf));
+       if (ret != 0) {
+               POWER_LOG(ERR, "Failed to read 
"PM_QOS_SYSFILE_RESUME_LATENCY_US, lcore_id);
+               goto out;
+       }
+
+       /*
+        * Based on the sysfs interface pm_qos_resume_latency_us under
+        * @PM_QOS_SYSFILE_RESUME_LATENCY_US directory in kernel, their meanning
meanning -> meaning
Ack

+        * is as follows for different output string.
+        * 1> the resume latency is 0 if the output is "n/a".
+        * 2> the resume latency is no constraint if the output is "0".
+        * 3> the resume latency is the actual value in used for other string.
+        */
+       if (strcmp(buf, "n/a") == 0)
+               latency = 0;
+       else {
+               latency = strtoul(buf, NULL, 10);
+               latency = latency == 0 ? 
RTE_POWER_QOS_RESUME_LATENCY_NO_CONSTRAINT : latency;
+       }
+
+out:
+       if (f != NULL)
+               fclose(f);
just fclose(f) because f is valid here.
Ack

+
+       return latency != -1 ? latency : ret;
+}
diff --git a/lib/power/rte_power_qos.h b/lib/power/rte_power_qos.h
new file mode 100644
index 0000000000..990c488373
--- /dev/null
+++ b/lib/power/rte_power_qos.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2024 HiSilicon Limited
+ */
+
+#ifndef RTE_POWER_QOS_H
+#define RTE_POWER_QOS_H
+
+#include <stdint.h>
+
+#include <rte_compat.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file rte_power_qos.h
+ *
+ * PM QoS API.
+ *
+ * The CPU-wide resume latency limit has a positive impact on this CPU's idle
+ * state selection in each cpuidle governor.
+ * Please see the PM QoS on CPU wide in the following link:
+ * 
https://www.kernel.org/doc/html/latest/admin-guide/abi-testing.html?highlight=pm_qos_resume_latency_us#abi-sys-devices-power-pm-qos-resume-latency-us
+ *
+ * The deeper the idle state, the lower the power consumption, but the
+ * longer the resume time. Some service are delay sensitive and very except the
+ * low resume time, like interrupt packet receiving mode.
+ *
+ * In these case, per-CPU PM QoS API can be used to control this CPU's idle
+ * state selection and limit just enter the shallowest idle state to low the
+ * delay after sleep by setting strict resume latency (zero value).
+ */
+
+#define RTE_POWER_QOS_STRICT_LATENCY_VALUE             0
+#define RTE_POWER_QOS_RESUME_LATENCY_NO_CONSTRAINT    ((int)(UINT32_MAX >> 1))
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * @param lcore_id
+ *   target logical core id
+ *
+ * @param latency
+ *   The latency should be greater than and equal to zero in microseconds unit.
+ *
+ * @return
+ *   0 on success. Otherwise negative value is returned.
+ */
+__rte_experimental
+int rte_power_qos_set_cpu_resume_latency(uint16_t lcore_id, int latency);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Get the current resume latency of this logical core.
+ * The default value in kernel is @see 
RTE_POWER_QOS_RESUME_LATENCY_NO_CONSTRAINT
+ * if don't set it.
+ *
+ * @return
+ *   Negative value on failure.
+ *   >= 0 means the actual resume latency limit on this core.
+ */
+__rte_experimental
+int rte_power_qos_get_cpu_resume_latency(uint16_t lcore_id);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_POWER_QOS_H */
diff --git a/lib/power/version.map b/lib/power/version.map
index c9a226614e..4e4955a4cf 100644
--- a/lib/power/version.map
+++ b/lib/power/version.map
@@ -51,4 +51,8 @@ EXPERIMENTAL {
        rte_power_set_uncore_env;
        rte_power_uncore_freqs;
        rte_power_unset_uncore_env;
+
+       # added in 24.11
+       rte_power_qos_set_cpu_resume_latency;
+       rte_power_qos_get_cpu_resume_latency;
order by alphabetic.
Ack

another question, I think rename cpu with core maybe more accurate, despite 
sysfs export with cpu, but in DPDK it means core.
and there are some rte_power_core_xxx name in rte_power library, I think better 
to keep the same.
Firstly, the rte_power_qos_set/get_cpu_resume_latency is just consistent with linux sysfs interface. Having the same name is more releative for user. In addition, Sivaprasad Tummala is reworking power library and the name of rte_power_core_xxx also might be changed.

  };

.

Reply via email to