On 10 Dec 16:08, Burakov, Anatoly wrote:
<snip> 
> Hi Liang,
> 
> General comment: please do not break up log strings onto multiple lines. It
> makes it harder to grep the codebase. Checkpatch will not warn about log
> strings going over 80 characters.
agree, I will update in next version
> 
> > ---
> >   lib/librte_power/Makefile               |   2 +
> >   lib/librte_power/meson.build            |   4 +-
> >   lib/librte_power/power_pstate_cpufreq.c | 778 
> > ++++++++++++++++++++++++++++++++
> >   lib/librte_power/power_pstate_cpufreq.h | 218 +++++++++
> >   lib/librte_power/rte_power.c            |  43 +-
> >   lib/librte_power/rte_power.h            |   3 +-
> >   6 files changed, 1039 insertions(+), 9 deletions(-)
> >   create mode 100644 lib/librte_power/power_pstate_cpufreq.c
> >   create mode 100644 lib/librte_power/power_pstate_cpufreq.h
> 
> <snip>
> 
> >   sources = files('rte_power.c', 'power_acpi_cpufreq.c',
> >             'power_kvm_vm.c', 'guest_channel.c',
> > -           'rte_power_empty_poll.c')
> > +           'rte_power_empty_poll.c',
> > +           'power_pstate_cpufreq.c')
> >   headers = files('rte_power.h','rte_power_empty_poll.h')
> > -deps += ['timer']
> > diff --git a/lib/librte_power/power_pstate_cpufreq.c 
> > b/lib/librte_power/power_pstate_cpufreq.c
> > new file mode 100644
> > index 0000000..f1dada8
> > --- /dev/null
> > +++ b/lib/librte_power/power_pstate_cpufreq.c
> > @@ -0,0 +1,778 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2010-2018 Intel Corporation
> 
> I believe it should be 2018.
I didn't see anything wrong here.
> 
> > + */
> > +
> > +#include <stdio.h>
> > +#include <sys/types.h>
> > +#include <sys/stat.h>
> > +#include <fcntl.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <unistd.h>
> > +#include <signal.h>
> > +#include <limits.h>
> > +#include <errno.h>
> > +
> > +#include <rte_memcpy.h>
> > +#include <rte_atomic.h>
> > +
> > +#include "power_pstate_cpufreq.h"
> > +#include "power_common.h"
> > +
> <snip>
> 
> > +
> > +static struct pstate_power_info lcore_power_info[RTE_MAX_LCORE];
> > +
> > +/**
> > + * It is to read the specific MSR.
> > + */
> > +
> > +static int32_t
> > +power_rdmsr(int msr, uint64_t *val, unsigned int lcore_id)
> > +{
> > +   int fd;
> > +   int ret;
> 
> int fd, ret?a
should be fine.
> 
> > +   char fullpath[PATH_MAX];
> > +
> > +   snprintf(fullpath, sizeof(fullpath), POWER_MSR_PATH, lcore_id);
> > +
> > +   fd = open(fullpath, O_RDONLY);
> > +
> > +   if (fd < 0) {
> > +
> > +           if (errno == EACCES)
> > +                   RTE_LOG(ERR, POWER, "No access to %s\n", fullpath);
> > +
> > +           if (errno == ENXIO)
> > +                   RTE_LOG(ERR, POWER, "%s Not Exist!\n", fullpath);
> 
> How about:
> 
> RTE_LOG(ERR, POWER, "Error opening '%s': %s\n", fullpath, strerror(errno));
agree, I will update in next version 
> 
> ?
> 
> > +
> > +           return fd;
> > +   }
> > +
> > +   ret = pread(fd, val, sizeof(uint64_t), msr);
> 
> Can pread fail?
agree, I will add error checking here.
> 
> > +
> > +   close(fd);
> > +
> > +   POWER_DEBUG_TRACE("MSR Path %s, offset 0x%X for lcore %u\n",
> > +                   fullpath, msr, lcore_id);
> > +
> > +   POWER_DEBUG_TRACE("Ret value %d, content is 0x%lx\n", ret, *val);
> > +
> > +   return ret;
> > +}
> > +
> 
> <snip>
> 
> > +   if (fseek(pi->f_cur_max, 0, SEEK_SET) < 0) {
> > +           RTE_LOG(ERR, POWER, "Fail to set file position indicator to 0 "
> > +                           "for setting frequency for lcore %u\n",
> > +                           pi->lcore_id);
> > +           return -1;
> > +   }
> > +
> > +   /* Turbo is available and enabled, first freq bucket is sys max freq */
> > +   if (pi->turbo_available && pi->turbo_enable && (idx == 0))
> > +
> > +           target_freq = pi->sys_max_freq;
> > +
> > +   else
> > +
> > +           target_freq = pi->freqs[idx];
> 
> Unneeded whitespace?
agree, I will remove it. 
> > +
> > +
> > +   /* Decrease freq, the min freq should be updated first */
> > +   if (idx  >  pi->curr_idx) {
> > +
> > +           if (fprintf(pi->f_cur_min, "%u", target_freq) < 0) {
> > +                   RTE_LOG(ERR, POWER, "Fail to write new frequency for "
> > +                                   "lcore %u\n", pi->lcore_id);
> > +                   return -1;
> > +           }
> > +
> > +           if (fprintf(pi->f_cur_max, "%u", target_freq) < 0) {
> > +                   RTE_LOG(ERR, POWER, "Fail to write new frequency for "
> > +                                   "lcore %u\n", pi->lcore_id);
> > +                   return -1;
> > +           }
> > +
> > +           POWER_DEBUG_TRACE("Freqency[%u] to be set for lcore %u\n",
> > +                             target_freq, pi->lcore_id);
> 
> Frequency :)
> 
> Also, i believe "Frequency[%u]" is misleading in this case, because the
> value is not an index into an array, but is an actual target frequency. I
> think "Frequency '%u'" would be more descriptive.
agree I will update in next version
> 
> > +
> > +           fflush(pi->f_cur_min);
> > +           fflush(pi->f_cur_max);
> > +
> > +   }
> > +
> > +   /* Increase freq, the max freq should be updated first */
> > +   if (idx  <  pi->curr_idx) {
> 
> Else if?
no need here.
> 
> > +
> > +           if (fprintf(pi->f_cur_max, "%u", target_freq) < 0) {
> > +                   RTE_LOG(ERR, POWER, "Fail to write new frequency for "
> > +                                   "lcore %u\n", pi->lcore_id);
> > +                   return -1;
> > +           }
> > +
> > +           if (fprintf(pi->f_cur_min, "%u", target_freq) < 0) {
> > +                   RTE_LOG(ERR, POWER, "Fail to write new frequency for "
> > +                                   "lcore %u\n", pi->lcore_id);
> > +                   return -1;
> > +           }
> 
> Forgot POWER_DEBUG_TRACE?
> 
agree, I will add here.
> > +
> > +           fflush(pi->f_cur_max);
> > +           fflush(pi->f_cur_min);
> > +   }
> > +
> > +   pi->curr_idx = idx;
> > +
> > +   return 1;
> > +}
> > +
> 
> <snip>
> 
> > +   snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR,
> > +                   pi->lcore_id);
> > +   f = fopen(fullpath, "rw+");
> > +   FOPEN_OR_ERR_RET(f, ret);
> > +
> > +   s = fgets(buf, sizeof(buf), f);
> > +   FOPS_OR_NULL_GOTO(s, out);
> > +
> > +   /* Check if current governor is performance */
> > +   if (strncmp(buf, POWER_GOVERNOR_PERF,
> > +                   sizeof(POWER_GOVERNOR_PERF)) == 0) {
> 
> Nitpick, but probably should be strlen, not sizeof?
sizeof should be fine
> 
> > +           ret = 0;
> > +           POWER_DEBUG_TRACE("Power management governor of lcore %u is "
> > +                           "already performance\n", pi->lcore_id);
> > +           goto out;
> > +   }
> > +   /* Save the original governor */
> > +   snprintf(pi->governor_ori, sizeof(pi->governor_ori), "%s", buf);
> > +
> > +   /* Write 'performance' to the governor */
> > +   val = fseek(f, 0, SEEK_SET);
> > +   FOPS_OR_ERR_GOTO(val, out);
> > +
> > +   val = fputs(POWER_GOVERNOR_PERF, f);
> > +   FOPS_OR_ERR_GOTO(val, out);
> > +
> > +   ret = 0;
> > +   RTE_LOG(INFO, POWER, "Power management governor of lcore %u has been "
> > +                   "set to performance successfully\n", pi->lcore_id);
> 
> Do we want this as INFO? (it's OK if we do, just asking!)
just keep it as same as acpi driver
> 
> > +out:
> > +   fclose(f);
> > +
> > +   return ret;
> > +}
> > +
> > +/**
> > + * It is to check the governor and then set the original governor back if
> > + * needed by writing the sys file.
> > + */
> > +static int
> > +power_set_governor_original(struct pstate_power_info *pi)
> > +{
> 
> <snip>
> 
> > +   /* Write back the original governor */
> > +   val = fseek(f, 0, SEEK_SET);
> > +   FOPS_OR_ERR_GOTO(val, out);
> > +
> > +   val = fputs(pi->governor_ori, f);
> > +   FOPS_OR_ERR_GOTO(val, out);
> > +
> > +   ret = 0;
> > +   RTE_LOG(INFO, POWER, "Power management governor of lcore %u "
> > +                   "has been set back to %s successfully\n",
> > +                   pi->lcore_id, pi->governor_ori);
> 
> Same - do we want this as INFO?
keep it as same as acpi driver
> 
> > +out:
> > +   fclose(f);
> > +
> > +   return ret;
> > +}
> > +
> > +/**
> > + * It is to get the available frequencies of the specific lcore by reading 
> > the
> > + * sys file.
> > + */
> > +static int
> > +power_get_available_freqs(struct pstate_power_info *pi)
> > +{
> > +   FILE *f_min, *f_max;
> > +   int ret = -1;
> > +   char *p_min, *p_max;
> > +   char buf_min[BUFSIZ];
> > +   char buf_max[BUFSIZ];
> > +   char fullpath_min[PATH_MAX];
> > +   char fullpath_max[PATH_MAX];
> > +   char *s_min, *s_max;
> > +   uint32_t sys_min_freq = 0, sys_max_freq = 0, base_max_freq = 0;
> > +   uint32_t i, num_freqs = 0;
> > +
> > +   snprintf(fullpath_max, sizeof(fullpath_max),
> > +                   POWER_SYSFILE_BASE_MAX_FREQ,
> > +                   pi->lcore_id);
> > +   snprintf(fullpath_min, sizeof(fullpath_min),
> > +                   POWER_SYSFILE_BASE_MIN_FREQ,
> > +                   pi->lcore_id);
> > +
> > +   f_min = fopen(fullpath_min, "r");
> > +   FOPEN_OR_ERR_RET(f_min, ret);
> > +
> > +   s_min = fgets(buf_min, sizeof(buf_min), f_min);
> > +   FOPS_OR_NULL_GOTO(s_min, out);
> > +
> > +   f_max = fopen(fullpath_max, "r");
> > +   FOPEN_OR_ERR_RET(f_max, ret);
> > +
> > +   s_max = fgets(buf_max, sizeof(buf_max), f_max);
> > +   FOPS_OR_NULL_GOTO(s_max, out);
> > +
> > +
> > +   /* Strip the line break if there is */
> > +   p_min = strchr(buf_min, '\n');
> > +   if (p_min != NULL)
> > +           *p_min = 0;
> > +
> > +   p_max = strchr(buf_max, '\n');
> > +   if (p_max != NULL)
> > +           *p_max = 0;
> 
> Probably '\0' would be more correct.
should be fine
> 
> > +
> > +   sys_min_freq = strtoul(buf_min, &p_min, POWER_CONVERT_TO_DECIMAL);
> > +   sys_max_freq = strtoul(buf_max, &p_max, POWER_CONVERT_TO_DECIMAL);
> > +
> > +   if (sys_max_freq < sys_min_freq)
> > +           goto out;
> 
> This leaks fds. See below.
agree, I will adjust the postion of label "out"
> 
> > +
> > +
> > +   pi->sys_max_freq = sys_max_freq;
> > +
> > +   base_max_freq = pi->non_turbo_max_ratio*BUS_FREQ;
> 
> spacing around multiplication :)
agree. I will remove that
> 
> > +
> > +   POWER_DEBUG_TRACE("sys min %u, sys max %u, base_max %u\n",
> > +                   sys_min_freq,
> > +                   sys_max_freq,
> > +                   base_max_freq);
> > +
> > +   if (base_max_freq < sys_max_freq)
> > +
> > +           pi->turbo_available = 1;
> 
> Extra whitespace.
agree
> 
> > +   else
> > +           pi->turbo_available = 0;
> > +
> > +
> > +   /* If turbo is available then there is one extra freq bucket
> > +    * to store the sys max freq which value is base_max +1
> > +    */
> > +   num_freqs = (base_max_freq - sys_min_freq)/BUS_FREQ + 1
> > +           + pi->turbo_available;
> 
> The '+' should be on the preceding line, also spacing.
agree
> 
> > +
> > +   /* Generate the freq bucket array.
> > +    * If turbo is available the freq bucket[0] value is base_max +1
> > +    * the bucket[1] is base_max, bucket[2] is base_max - BUS_FREQ
> > +    * and so on.
> > +    * If turbo is not available bucket[0] is base_max and so on
> > +    */
> > +   for (i = 0, pi->nb_freqs = 0; i < num_freqs; i++) {
> > +
> 
> Extra whitespace.
agree
> 
> > +           if ((i == 0) && pi->turbo_available)
> > +                   pi->freqs[pi->nb_freqs++] = base_max_freq + 1;
> > +           else
> > +                   pi->freqs[pi->nb_freqs++] =
> > +                   base_max_freq - (i - pi->turbo_available)*BUS_FREQ;
> 
> Misleading indentation, also spacing around multiplication operator.
agree 
> > +   }
> > +
> > +   ret = 0;
> > +
> > +   POWER_DEBUG_TRACE("%d frequency(s) of lcore %u are available\n",
> > +                   num_freqs, pi->lcore_id);
> > +
> > +   fclose(f_min);
> > +   fclose(f_max);
> > +
> > +
> > +out:
> 
> fclose should be after out, otherwise error path above will leak fd.
agree 
> > +   return ret;
> > +}
> > +
> > +int
> > +power_pstate_cpufreq_init(unsigned int lcore_id)
> > +{
> > +   struct pstate_power_info *pi;
> > +
> > +   if (lcore_id >= RTE_MAX_LCORE) {
> > +           RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
> 
> Cannot exceed.
>
agree
> > +                           lcore_id, RTE_MAX_LCORE - 1U);
> > +           return -1;
> > +   }
> > +
> > +   pi = &lcore_power_info[lcore_id];
> > +   if (rte_atomic32_cmpset(&(pi->state), POWER_IDLE, POWER_ONGOING)
> > +                   == 0) {
> > +           RTE_LOG(INFO, POWER, "Power management of lcore %u is "
> > +                           "in use\n", lcore_id);
> > +           return -1;
> > +   }
> > +
> > +   pi->lcore_id = lcore_id;
> 
> Can we not set this until we're sure we didn't fail? Or do any of the below
> calls rely on pi->lcore_id to be set?
> 
power_set_governor_performance will sue lcore_id
> > +   /* Check and set the governor */
> > +   if (power_set_governor_performance(pi) < 0) {
> > +           RTE_LOG(ERR, POWER, "Cannot set governor of lcore %u to "
> > +                           "performance\n", lcore_id);
> > +           goto fail;
> > +   }
> > +   /* Init for setting lcore frequency */
> > +   if (power_init_for_setting_freq(pi) < 0) {
> > +           RTE_LOG(ERR, POWER, "Cannot init for setting frequency for "
> > +                           "lcore %u\n", lcore_id);
> > +           goto fail;
> > +   }
> > +
> > +   /* Get the available frequencies */
> > +   if (power_get_available_freqs(pi) < 0) {
> > +           RTE_LOG(ERR, POWER, "Cannot get available frequencies of "
> > +                           "lcore %u\n", lcore_id);
> > +           goto fail;
> > +   }
> > +
> > +
> > +   /* Set freq to max by default */
> > +   if (power_pstate_cpufreq_freq_max(lcore_id) < 0) {
> > +           RTE_LOG(ERR, POWER, "Cannot set frequency of lcore %u "
> > +                           "to max\n", lcore_id);
> > +           goto fail;
> > +   }
> > +
> > +   RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u "
> > +                   "power management\n", lcore_id);
> 
> Do we want this as INFO?
> 
> > +   rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_USED);
> > +
> > +   return 0;
> > +
> > +fail:
> > +   rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
> > +
> > +   return -1;
> > +}
> > +
> > +int
> > +power_pstate_cpufreq_exit(unsigned int lcore_id)
> > +{
> > +   struct pstate_power_info *pi;
> > +
> 
> <snip>
> 
> > +   /* Set the governor back to the original */
> > +   if (power_set_governor_original(pi) < 0) {
> > +           RTE_LOG(ERR, POWER, "Cannot set the governor of %u back "
> > +                           "to the original\n", lcore_id);
> > +           goto fail;
> > +   }
> > +
> > +   RTE_LOG(INFO, POWER, "Power management of lcore %u has exited from "
> > +                   "'performance' mode and been set back to the "
> > +                   "original\n", lcore_id);
> 
> Perhaps print out the original governor name? Also, i think it's better to
> use the performance string define, rather than having it as part of
> formatted message.
original governor name is alread print out by power_set_governor_original
> > +   rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_IDLE);
> > +
> > +   return 0;
> > +
> > +fail:
> > +   rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
> > +
> > +   return -1;
> > +}
> > +
> > +
> > +uint32_t
> > +power_pstate_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, 
> > uint32_t num)
> > +{
> > +   struct pstate_power_info *pi;
> > +
> > +   if (lcore_id >= RTE_MAX_LCORE) {
> > +           RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
> > +           return -1;
> > +   }
> > +
> > +   pi = &lcore_power_info[lcore_id];
> > +   if (num < pi->nb_freqs) {
> > +           RTE_LOG(ERR, POWER, "Buffer size is not enough\n");
> > +           return 0;
> > +   }
> > +   rte_memcpy(freqs, pi->freqs, pi->nb_freqs * sizeof(uint32_t));
> 
> Why rte_memcpy?
the table can be large
> > +
> > +   return pi->nb_freqs;
> > +}
> > +
> > +uint32_t
> > +power_pstate_cpufreq_get_freq(unsigned int lcore_id)
> > +{
> > +   if (lcore_id >= RTE_MAX_LCORE) {
> > +           RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
> > +           return RTE_POWER_INVALID_FREQ_INDEX;
> > +   }
> > +
> > +   return lcore_power_info[lcore_id].curr_idx;
> > +}
> > +
> 
> <snip>
> 
> > +   caps->turbo = !!(pi->turbo_available);
> > +
> > +   return 0;
> > +}
> > diff --git a/lib/librte_power/power_pstate_cpufreq.h 
> > b/lib/librte_power/power_pstate_cpufreq.h
> > new file mode 100644
> > index 0000000..0fc917a
> > --- /dev/null
> > +++ b/lib/librte_power/power_pstate_cpufreq.h
> > @@ -0,0 +1,218 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2010-2018 Intel Corporation
> 
> I believe it should be just 2018.
> 
> > + */
> > +
> > +#ifndef _POWER_PSTATE_CPUFREQ_H
> > +#define _POWER_PSTATE_CPUFREQ_H
> > +
> > +/**
> > + * @file
> > + * RTE Power Management via Intel Pstate driver
> > + */
> > +
> > +#include <rte_common.h>
> > +#include <rte_byteorder.h>
> > +#include <rte_log.h>
> > +#include <rte_string_fns.h>
> > +#include "rte_power.h"
> > +
> > +#ifdef __cplusplus
> > +extern "C" {
> > +#endif
> 
> I don't think this is necessary. These extern declarations are only for
> public API headers, so that C++ compilers could parse them. Since this is
> not a public header, there's no need for extern C declaration.
just keep as same as other headers 
> 
> > +
> > +/**
> > + * Initialize power management for a specific lcore. It will check and set 
> > the
> > + * governor to performance for the lcore, get the available frequencies, 
> > and
> > + * prepare to set new lcore frequency.
> > + *
> > + * @param lcore_id
> > + *  lcore id.
> > + *
> > + * @return
> > + *  - 0 on success.
> > + *  - Negative on error.
> > + */
> 
> <snip>
> 
> > + *
> > + * @return
> > + *  The number of available frequencies.
> > + */
> > +uint32_t power_pstate_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs,
> > +           uint32_t num);
> > +
> > +/**
> > + * Return the current index of available frequencies of a specific lcore. 
> > It
> > + * will return 'RTE_POWER_INVALID_FREQ_INDEX = (~0)' if error.
> 
> This isn't a public API, so it's nitpicking, but any notes about return
> value should be in the @return section.
agree
> 
> > + * It should be protected outside of this function for threadsafe.
> > + *
> > + * @param lcore_id
> > + *  lcore id.
> > + *
> > + * @return
> > + *  The current index of available frequencies.
> > + */
> > +uint32_t power_pstate_cpufreq_get_freq(unsigned int lcore_id);
> > +
> > +/**
> 
> <snip>
> 
> >   #include "rte_power.h"
> >   #include "power_acpi_cpufreq.h"
> >   #include "power_kvm_vm.h"
> > +#include "power_pstate_cpufreq.h"
> >   #include "power_common.h"
> >   enum power_management_env global_default_env = PM_ENV_NOT_SET;
> > @@ -29,6 +30,8 @@ rte_power_get_capabilities_t rte_power_get_capabilities;
> >   int
> >   rte_power_set_env(enum power_management_env env)
> >   {
> > +
> > +
> 
> This is probably unintended whitespace change.
> 
agree
> >     if (rte_atomic32_cmpset(&global_env_cfg_status, 0, 1) == 0) {
> >             return 0;
> >     }
> > @@ -56,6 +59,19 @@ rte_power_set_env(enum power_management_env env)
> >             rte_power_freq_enable_turbo = power_kvm_vm_enable_turbo;
> >             rte_power_freq_disable_turbo = power_kvm_vm_disable_turbo;
> >             rte_power_get_capabilities = power_kvm_vm_get_capabilities;
> > +   } else if (env == PM_ENV_PSTATE_CPUFREQ) {
> > +           rte_power_freqs = power_pstate_cpufreq_freqs;
> > +           rte_power_get_freq = power_pstate_cpufreq_get_freq;
> 
> <snip>
> 
> > +   if (global_default_env == PM_ENV_KVM_VM)
> >             return power_kvm_vm_init(lcore_id);
> > -   }
> > +
> > +   if (global_default_env == PM_ENV_PSTATE_CPUFREQ)
> > +           return power_pstate_cpufreq_init(lcore_id);
> > +
> 
> switch?
should be fine for only 3 enum type
> 
> >     /* Auto detect Environment */
> >     RTE_LOG(INFO, POWER, "Attempting to initialise ACPI cpufreq power "
> >                     "management...\n");
> > @@ -99,13 +118,23 @@ rte_power_init(unsigned int lcore_id)
> >             goto out;
> >     }
> > -   RTE_LOG(INFO, POWER, "Attempting to initialise VM power 
> > management...\n");
> > +   RTE_LOG(INFO, POWER, "Attempting to initialise PSTAT power "
> > +                   "management...\n");
> > +   ret = power_pstate_cpufreq_init(lcore_id);
> > +   if (ret == 0) {
> > +           rte_power_set_env(PM_ENV_PSTATE_CPUFREQ);
> > +           goto out;
> > +   }
> > +
> > +   RTE_LOG(INFO, POWER, "Attempting to initialise VM power "
> > +                   "management...\n");
> >     ret = power_kvm_vm_init(lcore_id);
> >     if (ret == 0) {
> >             rte_power_set_env(PM_ENV_KVM_VM);
> >             goto out;
> >     }
> > -   RTE_LOG(ERR, POWER, "Unable to set Power Management Environment for 
> > lcore "
> > +   RTE_LOG(ERR, POWER, "Unable to set Power Management "
> > +                   "Environment for lcore "
> >                     "%u\n", lcore_id);
> 
> Perhaps this could be moved to a separate function, so that the code could
> look like:
> 
> switch (global_default_env) {
> case PM_ENV_ACPI:
>       return acpi_init();
> case PM_ENV_KVM:
>       return kvm_init();
> case PM_ENV_PSTATE:
>       return pstate_init();
> default:
>       if (autodetect() < 0)
>               RTE_LOG("error");
>       return 0;
> }
> 
> Or something like that.

I can move to switch
> 
> >   out:
> >     return ret;
> > @@ -118,6 +147,8 @@ rte_power_exit(unsigned int lcore_id)
> >             return power_acpi_cpufreq_exit(lcore_id);
> >     if (global_default_env == PM_ENV_KVM_VM)
> >             return power_kvm_vm_exit(lcore_id);
> > +   if (global_default_env == PM_ENV_PSTATE_CPUFREQ)
> > +           return power_pstate_cpufreq_exit(lcore_id);
> 
> switch?
> 
> >     RTE_LOG(ERR, POWER, "Environment has not been set, unable to exit "
> >                             "gracefully\n");
> > diff --git a/lib/librte_power/rte_power.h b/lib/librte_power/rte_power.h
> > index d70bc0b..c5e8f6b 100644
> > --- a/lib/librte_power/rte_power.h
> > +++ b/lib/librte_power/rte_power.h
> > @@ -20,7 +20,8 @@ extern "C" {
> >   #endif
> >   /* Power Management Environment State */
> > -enum power_management_env {PM_ENV_NOT_SET, PM_ENV_ACPI_CPUFREQ, 
> > PM_ENV_KVM_VM};
> > +enum power_management_env {PM_ENV_NOT_SET, PM_ENV_ACPI_CPUFREQ, 
> > PM_ENV_KVM_VM,
> > +           PM_ENV_PSTATE_CPUFREQ};
> 
> I don't like this approach. While it can be argued that application needs to
> be explicitly aware of whether it's using native (ACPI or pstate) vs. KVM
> power management, there's no real reason for an application to differentiate
> between ACPI and pstate modes.
> 
> Changing this would of course require a deprecation notice, so for now, can
> we hide all of this behind ACPI mode, and auto-detect whether we want ACPI
> or pstate mode? IMO it would be better for the user application to use a
> somewhat misnamed ACPI option for both ACPI and pstate modes, than to
> needlessly care about whether one or the other is in use.
> 
> What do you think?
acpi has significant difference with hwp. sysfs/xxxx/setspeed  
sysfs/xxxx/scaling_driver is different
I would like to make  application aware the driver type. 
> 
> >   /**
> >    * Set the default power management implementation. If this is not called 
> > prior
> > 
> 
> -- 
> Thanks,
> Anatoly

Reply via email to