Hi,

On Sat, Oct 4, 2014 at 7:33 AM, pi-cheng.chen <pi-cheng.c...@linaro.org> wrote:
> Initialize struct cpufreq_pstates with initial P-state of CPUs and allocate
> struct cpufreq_pstate dynamically when parsing trace file to solve the issue
> caused by missing "scaling_avaialable_freqs" attr when using intel_pstate
> driver.
>
> Changes v2 to v3:
> Initialize struct cpufreq_pstates with initial P-state of all CPUs and
> beginning timestamp before trace acquisition and close all P-states with
> ending timestamp
>
> hanges v1 to v2:
> Sort the cpufreq_pstate list when parsing events
>
> Signed-off-by: Pi-Cheng Chen <pi-cheng.c...@linaro.org>
> ---
>  idlestat.c | 259 
> ++++++++++++++++++++++++++++++++++++++++++++-----------------
>  idlestat.h |   7 ++
>  trace.h    |   1 +
>  3 files changed, 195 insertions(+), 72 deletions(-)
>
> diff --git a/idlestat.c b/idlestat.c
> index da615cb..8230067 100644
> --- a/idlestat.c
> +++ b/idlestat.c
> @@ -536,6 +536,121 @@ static struct cpuidle_cstates *build_cstate_info(int 
> nrcpus)
>         return cstates;
>  }
>
> +#define TRACE_STAT_FORMAT "%*[^:]:%lf"
> +
> +static double get_trace_ts(void)
> +{
> +       FILE *f;
> +       double ts;
> +
> +       f = fopen(TRACE_STAT_FILE, "r");
> +       if (!f)
> +               return -1;
> +
> +       while (fgets(buffer, BUFSIZE, f)) {
> +               if (!strstr(buffer, "now ts"))
> +                       continue;
> +               if (!sscanf(buffer, TRACE_STAT_FORMAT, &ts))
> +                       ts = -1;
> +               break;
> +       }
> +       fclose(f);
> +
> +       return ts;
> +}
> +
> +static void release_init_pstates(struct init_pstates *init)
> +{
> +       free(init->freqs);
> +       free(init);
> +}
> +
> +static struct init_pstates *build_init_pstates(void)
> +{
> +       struct init_pstates *init;
> +       int nr_cpus, cpu;
> +       unsigned int *freqs;
> +
> +       nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
> +       if (nr_cpus < 0)
> +               return NULL;
> +
> +       init = malloc(sizeof(*init));
> +       if (!init)
> +               return NULL;

Note, init->freqs is uninitialized and used below in error handling path.

> +
> +       freqs = calloc(nr_cpus, sizeof(*freqs));
> +       if (!freqs) {
> +               free(init);
> +               return NULL;
> +       }
> +       memset(freqs, 0, sizeof(*freqs) * nr_cpus);

(This is useless, either we assign every element in the loop or we
release the memory.)

> +
> +       for (cpu = 0; cpu < nr_cpus; cpu++) {
> +               char *fpath;
> +               unsigned int *freq = &(freqs[cpu]);
> +
> +               if (asprintf(&fpath, CPUFREQ_CURFREQ_PATH_FORMAT, cpu) < 0) {
> +                       release_init_pstates(init);

The uninitialized variable is used within the called function. Plus
freqs does not get released.

> +                       return NULL;
> +               }
> +               if (read_int(fpath, (int *)freq))
> +                       *freq = 0;
> +               free(fpath);
> +       }
> +       init->nr_cpus = nr_cpus;
> +       init->freqs = freqs;

Moving these two assignments before the loop would fix the above issue
without leaking memory. Overall this is a minor issue, but lets fix it
anyway.

> +
> +       return init;
> +}
> +
> +/**
> + * alloc_pstate - allocate, sort, and initialize pstate struct
> + * to maintain statistics of P-state transitions
> + * @pstates: per-CPU P-state statistics struct
> + * @freq: frequency for which the newly pstate is allocated
> + *
> + * Return: the index of the newly allocated pstate struct
> + */
> +static int alloc_pstate(struct cpufreq_pstates *pstates, unsigned int freq)
> +{
> +       struct cpufreq_pstate *pstate, *tmp;
> +       int nrfreq, i, next = 0;
> +
> +       pstate = pstates->pstate;
> +       nrfreq = pstates->max;
> +
> +       tmp = realloc(pstate, sizeof(*pstate) * (nrfreq + 1));
> +       if (!tmp) {
> +               perror("realloc pstate");
> +               return -1;
> +       }
> +       pstate = tmp;
> +       pstates->pstate = tmp;
> +       pstates->max = nrfreq + 1;
> +
> +       for (i = 0; i < nrfreq && freq <= pstate[i].freq; i++)
> +               ;
> +
> +       next = i;
> +       for (i = nrfreq; i > next && i > 0; i--) {
> +               pstate[i] = pstate[i - 1];
> +               pstate[i].id = i;
> +               pstates->current = (pstates->current == i - 1)?
> +                                       i : pstates->current;
> +       }

The above itches me a bit. Readability would be better as:

memmove(pstate+next+1, pstate+next, sizeof(*pstate) * (nrfreq-next));
for (i = nrfreq, i > next; i--)
  pstate[i].id = i;
if (pstates->current >= next) pstates->current++;

It's ok to move a memory block of 0 bytes. Again, minor.

> +
> +       pstate[next].id = next;
> +       pstate[next].freq = freq;
> +       pstate[next].count = 0;
> +       pstate[next].min_time = DBL_MAX;
> +       pstate[next].max_time = 0;
> +       pstate[next].avg_time = 0;
> +       pstate[next].duration = 0;
> +
> +       return next;
> +}
> +
>  /**
>   * release_pstate_info - free all P-state related structs
>   * @pstates: per-cpu array of P-state statistics structs
> @@ -560,14 +675,16 @@ static void release_pstate_info(struct cpufreq_pstates 
> *pstates, int nrcpus)
>         return;
>  }
>
> -/**
> - * build_pstate_info - parse cpufreq sysfs entries and build per-CPU
> - * structs to maintain statistics of P-state transitions
> +/* build_pstate_info - allocate and initialize per-CPU structs to
> + * maintain statistics of P-state transitions
>   * @nrcpus: number of CPUs
> + * @initp: initial P-state of CPUs before trace acquistion
> + * @start_ts: timestamp when trace acquisition started
>   *
>   * Return: per-CPU array of structs (success) or NULL (error)
>   */
> -static struct cpufreq_pstates *build_pstate_info(int nrcpus)
> +static struct cpufreq_pstates *build_pstate_info(int nrcpus,
> +                       struct init_pstates *initp, double start_ts)
>  {
>         int cpu;
>         struct cpufreq_pstates *pstates;
> @@ -577,67 +694,28 @@ static struct cpufreq_pstates *build_pstate_info(int 
> nrcpus)
>                 return NULL;
>         memset(pstates, 0, sizeof(*pstates) * nrcpus);
>
> -       for (cpu = 0; cpu < nrcpus; cpu++) {
> -               struct cpufreq_pstate *pstate;
> -               int nrfreq;
> -               char *fpath, *freq, line[256];
> -               FILE *sc_av_freq;
> -
> -               if (asprintf(&fpath, CPUFREQ_AVFREQ_PATH_FORMAT, cpu) < 0)
> -                       goto clean_exit;
> -
> -               /* read scaling_available_frequencies for the CPU */
> -               sc_av_freq = fopen(fpath, "r");
> -               free(fpath);
> -               if (!sc_av_freq) {
> -                       fprintf(stderr, "warning: P-states not supported for "
> -                               "CPU%d\n", cpu);
> -                       continue;
> -               }
> -               freq = fgets(line, sizeof(line)/sizeof(line[0]), sc_av_freq);
> -               fclose(sc_av_freq);
> -               if (!freq) {
> -                       /* unlikely to be here, but just in case... */
> -                       fprintf(stderr, "warning: P-state info not found for "
> -                               "CPU%d\n", cpu);
> -                       continue;
> -               }
> +       if (initp)
> +               assert(initp->nr_cpus == nrcpus);
>
> -               /* tokenize line and populate each frequency */
> -               nrfreq = 0;
> -               pstate = NULL;
> -               while ((freq = strtok(freq, "\n ")) != NULL) {
> -                       struct cpufreq_pstate *tmp = realloc(pstate, 
> sizeof(*pstate) * (nrfreq+1));
> -                       if (!tmp)
> -                               goto clean_exit;
> -                       pstate = tmp;
> -
> -                       /* initialize pstate record */
> -                       pstate[nrfreq].id = nrfreq;
> -                       pstate[nrfreq].freq = atol(freq);
> -                       pstate[nrfreq].count = 0;
> -                       pstate[nrfreq].min_time = DBL_MAX;
> -                       pstate[nrfreq].max_time = 0.;
> -                       pstate[nrfreq].avg_time = 0.;
> -                       pstate[nrfreq].duration = 0.;
> -                       nrfreq++;
> -                       freq = NULL;
> +       for (cpu = 0; cpu < nrcpus; cpu++) {
> +               struct cpufreq_pstates *ps = &(pstates[cpu]);
> +
> +               ps->pstate = NULL;
> +               ps->max = 0;
> +               ps->current = -1;      /* unknown */
> +               ps->idle = -1;         /* unknown */
> +               ps->time_enter = 0.;
> +               ps->time_exit = 0.;
> +

> +               if (initp && initp->freqs[cpu] > 0 && start_ts > 0) {
> +                       ps->current = alloc_pstate(ps, initp->freqs[cpu]);
> +                       assert(ps->current >= 0);
> +                       ps->time_enter = start_ts;
> +                       ps->idle = 0;
>                 }

This block is not solving the problem completely, see below.

Note, setting ps->idle to 0 is not really fixing the bug in
cpu_change_pstate calling open_next_pstate with idle set to -1. I
assume you added that line because of the warnings generated, right?
(It is a separate issue, don't fix that bug in this patch. But we
shouldn't really hide it either.)

> -
> -               /* now populate cpufreq_pstates for this CPU */
> -               pstates[cpu].pstate = pstate;
> -               pstates[cpu].max = nrfreq;
> -               pstates[cpu].current = -1;      /* unknown */
> -               pstates[cpu].idle = -1;         /* unknown */
> -               pstates[cpu].time_enter = 0.;
> -               pstates[cpu].time_exit = 0.;
>         }
>
>         return pstates;
> -
> -clean_exit:
> -       release_pstate_info(pstates, nrcpus);
> -       return NULL;
>  }
>
>  static int get_current_pstate(struct cpuidle_datas *datas, int cpu,
> @@ -712,6 +790,9 @@ static void cpu_change_pstate(struct cpuidle_datas 
> *datas, int cpu,
>
>         cur = get_current_pstate(datas, cpu, &ps, &p);
>         next = freq_to_pstate_index(ps, freq);
> +       if (next < 0)
> +               next = alloc_pstate(ps, freq);
> +       assert(next >= 0);
>
>         switch (cur) {
>         case 1:
> @@ -742,6 +823,19 @@ static void cpu_change_pstate(struct cpuidle_datas 
> *datas, int cpu,
>         }
>  }
>
> +static void cpu_close_all_pstate(struct cpuidle_datas *datas, double time)
> +{
> +       struct cpufreq_pstates *ps;
> +       struct cpufreq_pstate *p;
> +       int cpu, cur;
> +
> +       for (cpu = 0; cpu < datas->nrcpus; cpu++) {
> +               cur = get_current_pstate(datas, cpu, &ps, &p);
> +               if (p && !cur && time > 0)
> +                       close_current_pstate(ps, time);
> +       }
> +}
> +

Not needed / should not be created, see below. Not solving the problem
completely.

>  static void cpu_pstate_idle(struct cpuidle_datas *datas, int cpu, double 
> time)
>  {
>         struct cpufreq_pstates *ps = &(datas->pstates[cpu]);
> @@ -763,7 +857,6 @@ static int store_data(double time, int state, int cpu,
>                       struct cpuidle_datas *datas, int count)
>  {
>         struct cpuidle_cstates *cstates = &datas->cstates[cpu];
> -       struct cpufreq_pstate *pstate = datas->pstates[cpu].pstate;
>         struct cpuidle_cstate *cstate;
>         struct cpuidle_data *data, *tmp;
>         int nrdata, last_cstate = cstates->last_cstate;
> @@ -826,9 +919,8 @@ static int store_data(double time, int state, int cpu,
>                 /* need indication if CPU is idle or not */
>                 cstates->last_cstate = -1;
>
> -               /* update P-state stats if supported */
> -               if (pstate)
> -                       cpu_pstate_running(datas, cpu, time);
> +               /* update P-state stats */
> +               cpu_pstate_running(datas, cpu, time);
>
>                 return 0;
>         }
> @@ -846,9 +938,8 @@ static int store_data(double time, int state, int cpu,
>         cstates->cstate_max = MAX(cstates->cstate_max, state);
>         cstates->last_cstate = state;
>         cstates->wakeirq = NULL;
> -       /* update P-state stats if supported */
> -       if (pstate)
> -               cpu_pstate_idle(datas, cpu, time);
> +       /* update P-state stats*/
> +       cpu_pstate_idle(datas, cpu, time);
>
>         return 0;
>  }
> @@ -932,7 +1023,8 @@ static int get_wakeup_irq(struct cpuidle_datas *datas, 
> char *buffer, int count)
>         return -1;
>  }
>
> -static struct cpuidle_datas *idlestat_load(struct program_options *options)
> +static struct cpuidle_datas *idlestat_load(struct program_options *options,
> +                       struct init_pstates *initp, double start_ts, double 
> end_ts)

Now this is my main objection. Instead of hacking into load, you need
to tweak the store. Your changes do not work as intended with
--report, as initp, start_ts and end_ts will not be properly set
without --trace.

Instead of modifying load and adding close_all_pstates and such, hack
the store so that after storing the topology, we emit initial pstate
change for all CPUs at start_ts to frequency specified by initp. Then,
after the actual trace entries have been copied, store final pstate
change into MAXINT or some other value that is not a real frequency.
Then things should work equally for --trace and --report (and
compare).

While at it, why don't we break this change into 2 separate patches.
The dynamic pstate allocation, which in principle is already sound
(with or without changing the minor issues I mentioned) and then the
part where we try to get the leading and trailing pstate information
into the report, which needs to be changed from faking stuff in load
into writing the data into trace properly.

Tuukka


>  {
>         FILE *f;
>         unsigned int state = 0, freq = 0, cpu = 0, nrcpus = 0;
> @@ -988,14 +1080,13 @@ static struct cpuidle_datas *idlestat_load(struct 
> program_options *options)
>                 return ptrerror("build_cstate_info: out of memory");
>         }
>
> -       datas->pstates = build_pstate_info(nrcpus);
> +       datas->pstates = build_pstate_info(nrcpus, initp, start_ts);
>         if (!datas->pstates) {
>                 free(datas->cstates);
>                 free(datas);
>                 fclose(f);
>                 return ptrerror("build_pstate_info: out of memory");
>         }
> -
>         datas->nrcpus = nrcpus;
>
>         /* read topology information */
> @@ -1018,7 +1109,6 @@ static struct cpuidle_datas *idlestat_load(struct 
> program_options *options)
>                 } else if (strstr(buffer, "cpu_frequency")) {
>                         assert(sscanf(buffer, TRACE_FORMAT, &time, &freq,
>                                       &cpu) == 3);
> -                       assert(datas->pstates[cpu].pstate != NULL);
>                         cpu_change_pstate(datas, cpu, freq, time);
>                         count++;
>                         continue;
> @@ -1031,6 +1121,9 @@ static struct cpuidle_datas *idlestat_load(struct 
> program_options *options)
>
>         fclose(f);
>
> +       /* close all p-state with end timestamp */
> +       cpu_close_all_pstate(datas, end_ts);
> +
>         fprintf(stderr, "Log is %lf secs long with %zd events\n",
>                 end - begin, count);
>
> @@ -1481,7 +1574,9 @@ int main(int argc, char *argv[], char *const envp[])
>  {
>         struct cpuidle_datas *datas;
>         struct program_options options;
> +       struct init_pstates *initp = NULL;
>         int args;
> +       double start_ts, end_ts;
>
>         args = getoptions(argc, argv, &options);
>         if (args <= 0)
> @@ -1527,6 +1622,14 @@ int main(int argc, char *argv[], char *const envp[])
>                 if (idlestat_flush_trace())
>                         return -1;
>
> +               /* Get current P-state of all CPUs */
> +               if (options.display & FREQUENCY_DISPLAY)
> +                       initp = build_init_pstates();
> +
> +               /* Get timestamp before trace acquisition */
> +               if (options.display & FREQUENCY_DISPLAY)
> +                       start_ts = get_trace_ts();
> +
>                 /* Start the recording */
>                 if (idlestat_trace_enable(true))
>                         return -1;
> @@ -1549,6 +1652,17 @@ int main(int argc, char *argv[], char *const envp[])
>                 if (idlestat_trace_enable(false))
>                         return -1;
>
> +               /* Get timestamp after trace acquisition */
> +               if (options.display & FREQUENCY_DISPLAY)
> +                       end_ts = get_trace_ts();
> +
> +               /* Emit warning messages when P-state info might be partial */
> +               if (options.display & FREQUENCY_DISPLAY &&
> +                       (!initp || start_ts < 0 || end_ts < 0))
> +                       fprintf(stderr, "Unable to get initial P-state,"
> +                               " beginning timestamp, or ending timestamp!\n"
> +                               "P-state statistics data might be 
> partial!\n");
> +
>                 /* At this point we should have some spurious wake up
>                  * at the beginning of the traces and at the end (wake
>                  * up all cpus and timer expiration for the timer
> @@ -1559,7 +1673,7 @@ int main(int argc, char *argv[], char *const envp[])
>         }
>
>         /* Load the idle states information */
> -       datas = idlestat_load(&options);
> +       datas = idlestat_load(&options, initp, start_ts, end_ts);
>
>         if (!datas)
>                 return 1;
> @@ -1590,6 +1704,7 @@ int main(int argc, char *argv[], char *const envp[])
>                 }
>         }
>
> +       release_init_pstates(initp);
>         release_cpu_topo_cstates();
>         release_cpu_topo_info();
>         release_pstate_info(datas->pstates, datas->nrcpus);
> diff --git a/idlestat.h b/idlestat.h
> index 735f0fe..39ba4d7 100644
> --- a/idlestat.h
> +++ b/idlestat.h
> @@ -41,6 +41,8 @@
>         "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/residency"
>  #define CPUFREQ_AVFREQ_PATH_FORMAT \
>         "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_available_frequencies"
> +#define CPUFREQ_CURFREQ_PATH_FORMAT \
> +       "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_cur_freq"
>  #define CPUIDLE_STATENAME_PATH_FORMAT \
>         "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name"
>
> @@ -63,6 +65,11 @@ struct cpuidle_cstate {
>         int target_residency; /* -1 if not available */
>  };
>
> +struct init_pstates {
> +       int nr_cpus;
> +       unsigned int *freqs;
> +};
> +
>  enum IRQ_TYPE {
>         HARD_IRQ = 0,
>         IPI_IRQ,
> diff --git a/trace.h b/trace.h
> index bef6703..90b4a6a 100644
> --- a/trace.h
> +++ b/trace.h
> @@ -33,6 +33,7 @@
>  #define TRACE_EVENT_PATH TRACE_PATH "/events/enable"
>  #define TRACE_FREE TRACE_PATH "/free_buffer"
>  #define TRACE_FILE TRACE_PATH "/trace"
> +#define TRACE_STAT_FILE TRACE_PATH "/per_cpu/cpu0/stats"
>  #define TRACE_IDLE_NRHITS_PER_SEC 10000
>  #define TRACE_IDLE_LENGTH 196
>  #define TRACE_CPUFREQ_NRHITS_PER_SEC 100
> --
> 1.9.1
>
>
> _______________________________________________
> linaro-dev mailing list
> linaro-dev@lists.linaro.org
> http://lists.linaro.org/mailman/listinfo/linaro-dev

_______________________________________________
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev

Reply via email to