Currently, there is a hard limitation on the PMD power management
support that only allows it to support a single queue per lcore. This is
not ideal as most DPDK use cases will poll multiple queues per core.
The PMD power management mechanism relies on ethdev Rx callbacks, so it
is very difficult to implement such support because callbacks are
effectively stateless and have no visibility into what the other ethdev
devices are doing. This places limitations on what we can do within the
framework of Rx callbacks, but the basics of this implementation are as
follows:
- Replace per-queue structures with per-lcore ones, so that any device
polled from the same lcore can share data
- Any queue that is going to be polled from a specific lcore has to be
added to the list of queues to poll, so that the callback is aware of
other queues being polled by the same lcore
- Both the empty poll counter and the actual power saving mechanism is
shared between all queues polled on a particular lcore, and is only
activated when all queues in the list were polled and were determined
to have no traffic.
- The limitation on UMWAIT-based polling is not removed because UMWAIT
is incapable of monitoring more than one address.
Also, while we're at it, update and improve the docs.
Signed-off-by: Anatoly Burakov <anatoly.bura...@intel.com>
---
Notes:
v5:
- Remove the "power save queue" API and replace it with
mechanism suggested by
Konstantin
v3:
- Move the list of supported NICs to NIC feature table
v2:
- Use a TAILQ for queues instead of a static array
- Address feedback from Konstantin
- Add additional checks for stopped queues
doc/guides/nics/features.rst | 10 +
doc/guides/prog_guide/power_man.rst | 65 ++--
doc/guides/rel_notes/release_21_08.rst | 3 +
lib/power/rte_power_pmd_mgmt.c | 431 ++++++++++++++++++-------
4 files changed, 373 insertions(+), 136 deletions(-)
int
rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t
port_id,
uint16_t queue_id, enum rte_power_pmd_mgmt_type mode)
{
- struct pmd_queue_cfg *queue_cfg;
+ const union queue qdata = {.portid = port_id, .qid = queue_id};
+ struct pmd_core_cfg *lcore_cfg;
+ struct queue_list_entry *queue_cfg;
struct rte_eth_dev_info info;
rte_rx_callback_fn clb;
int ret;
@@ -202,9 +401,19 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned int
lcore_id, uint16_t port_id,
goto end;
}
- queue_cfg = &port_cfg[port_id][queue_id];
+ lcore_cfg = &lcore_cfgs[lcore_id];
- if (queue_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED) {
+ /* check if other queues are stopped as well */
+ ret = cfg_queues_stopped(lcore_cfg);
+ if (ret != 1) {
+ /* error means invalid queue, 0 means queue wasn't stopped */
+ ret = ret < 0 ? -EINVAL : -EBUSY;
+ goto end;
+ }
+
+ /* if callback was already enabled, check current callback type */
+ if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED &&
+ lcore_cfg->cb_mode != mode) {
ret = -EINVAL;
goto end;
}
@@ -214,53 +423,20 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned
int lcore_id, uint16_t port_id,
switch (mode) {
case RTE_POWER_MGMT_TYPE_MONITOR:
- {
- struct rte_power_monitor_cond dummy;
-
- /* check if rte_power_monitor is supported */
- if (!global_data.intrinsics_support.power_monitor) {
- RTE_LOG(DEBUG, POWER, "Monitoring intrinsics are not
supported\n");
- ret = -ENOTSUP;
+ /* check if we can add a new queue */
+ ret = check_monitor(lcore_cfg, &qdata);
+ if (ret < 0)
goto end;
- }
- /* check if the device supports the necessary PMD API */
- if (rte_eth_get_monitor_addr(port_id, queue_id,
- &dummy) == -ENOTSUP) {
- RTE_LOG(DEBUG, POWER, "The device does not support
rte_eth_get_monitor_addr\n");
- ret = -ENOTSUP;
- goto end;
- }
clb = clb_umwait;
break;
- }
case RTE_POWER_MGMT_TYPE_SCALE:
- {
- enum power_management_env env;
- /* only PSTATE and ACPI modes are supported */
- if (!rte_power_check_env_supported(PM_ENV_ACPI_CPUFREQ) &&
- !rte_power_check_env_supported(
- PM_ENV_PSTATE_CPUFREQ)) {
- RTE_LOG(DEBUG, POWER, "Neither ACPI nor PSTATE modes are
supported\n");
- ret = -ENOTSUP;
+ /* check if we can add a new queue */
+ ret = check_scale(lcore_id);
+ if (ret < 0)
goto end;
- }
- /* ensure we could initialize the power library */
- if (rte_power_init(lcore_id)) {
- ret = -EINVAL;
- goto end;
- }
- /* ensure we initialized the correct env */
- env = rte_power_get_env();
- if (env != PM_ENV_ACPI_CPUFREQ &&
- env != PM_ENV_PSTATE_CPUFREQ) {
- RTE_LOG(DEBUG, POWER, "Neither ACPI nor PSTATE modes
were initialized\n");
- ret = -ENOTSUP;
- goto end;
- }
clb = clb_scale_freq;
break;
- }
case RTE_POWER_MGMT_TYPE_PAUSE:
/* figure out various time-to-tsc conversions */
if (global_data.tsc_per_us == 0)
@@ -273,13 +449,23 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned
int lcore_id, uint16_t port_id,
ret = -EINVAL;
goto end;
}
+ /* add this queue to the list */
+ ret = queue_list_add(lcore_cfg, &qdata);
+ if (ret < 0) {
+ RTE_LOG(DEBUG, POWER, "Failed to add queue to list: %s\n",
+ strerror(-ret));
+ goto end;
+ }
+ /* new queue is always added last */
+ queue_cfg = TAILQ_LAST(&lcore_cfgs->head, queue_list_head);