On Fri, Jul 20, 2007 at 09:56:03AM +0200, Joachim Deguara wrote:
> On Friday 20 July 2007 09:25:22 Nick Piggin wrote:
> > On Wed, Jul 18, 2007 at 11:11:30AM +0200, Joachim Deguara wrote:
> > > While learning about schedstats I found that the documentation in the
> > > tree is old.  I updated it and found some interesting stuff like
> > > schedstats version 14 is the same as version and version 13 never saw a
> > > kernel release!  Also there are 6 fields in the current schedstats that
> > > are not used anymore.  Nick had made them irrelevant in commit
> > > 476d139c218e44e045e4bc6d4cc02b010b343939 but never removed them.
> > >
> > > Thanks to Rick's perl script who I borrowed some of the updated
> > > descriptions from.
> >
> > Ah, thanks, I actually didn't realise there was such good documentation
> > there. Patch looks good.
> >
> > BTW. I have a simple program to do a basic statistical summary of the
> > multiprocessor balancing if you are interested and haven't seen it.
> 
> Yes I am interested.  Actually I started down this road looking to find out 
> if 
> task migration could be tracked and I saw that got kicked out from early 
> versions.

What do you mean by that? You mean if you can check information on the
migration events that a particular task has experienced?


> Your script could come in useful to link to in the documentation.  Rick has a 
> great page but hasn't been updated in a little while (though still up-to-date 
> as version 12==14) and his email bounced (though just a config error).

Here it is, its a bit ugly and I think it may still have a bug somewhere,
but I haven't looked at it for a while.

---
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define SCHEDSTAT_VERSION 14
struct rq_stats {
        /* sys_sched_yield stats */
        unsigned long long yld_both_empty;
        unsigned long long yld_act_empty;
        unsigned long long yld_exp_empty;
        unsigned long long yld_cnt;
                                                                                
        /* schedule stats */
        unsigned long long sched_active; //new
        unsigned long long sched_switch;
        unsigned long long sched_cnt;
        unsigned long long sched_idle;

        /* wake stats */
        unsigned long long ttwu_cnt;
        unsigned long long ttwu_local;

        /* latency stats */
        unsigned long long cpu_time;
        unsigned long long delay_time;
        unsigned long long pcnt;
};

struct domain_stats {
        unsigned long long lb_cnt[3];
        unsigned long long lb_balanced[3];
        unsigned long long lb_failed[3];
        unsigned long long lb_pulled[3];
        unsigned long long lb_hot_pulled[3];
        unsigned long long lb_imbalance[3];
        unsigned long long lb_nobusyq[3];
        unsigned long long lb_nobusyg[3];
                                                                                
        /* Active load balancing */
        unsigned long long alb_cnt;
        unsigned long long alb_failed;
        unsigned long long alb_pushed;
        
        /* Wake ups */
        unsigned long long ttwu_wake_remote;

        /* Passive load balancing */
        unsigned long long ttwu_move_balance;
                                                                                
        /* Affine wakeups */
        unsigned long long ttwu_move_affine;
                                                                                
        /* SD_BALANCE_EXEC */
        unsigned long long sbe_cnt;
        unsigned long long sbe_balanced;
        unsigned long long sbe_pushed;

        /* SD_BALANCE_FORK */
        unsigned long long sbf_cnt;
        unsigned long long sbf_balanced;
        unsigned long long sbf_pushed;
};

enum idle_type {
        IDLE,
        NOT_IDLE,
        NEWLY_IDLE,
};

#define MAXDOMAINS      4
#define MAXCPUS         32
#define HZ              100UL

static void parse_file(FILE *file, unsigned long long *ts,
                int *cpus, int *domains,
                struct rq_stats rq_stats[MAXCPUS],
                struct domain_stats domain_stats[MAXCPUS][MAXDOMAINS])
{
        int i, j;
        int ret, cpu, domain;
        int tmp;

        *domains = -1;

        ret = fscanf(file, "version %d\n", &tmp);
        if (ret == 0 || ret == EOF) {
                fprintf(stderr, "file format error 0\n");
                exit(1);
        }
        if (tmp != SCHEDSTAT_VERSION) {
                fprintf(stderr, "wrong file format version\n");
                exit(1);
        }

        ret = fscanf(file, "timestamp %llu\n", ts);
        if (ret == 0 || ret == EOF) {
                fprintf(stderr, "file format error 1\n");
                exit(1);
        }

        for (i = 0; i < MAXCPUS; i++) {
                struct rq_stats *rs = &rq_stats[i];

                ret = fscanf(file, "cpu%d ", &cpu);
                if (ret == EOF)
                        break;
                if (ret == 0 || cpu != i) {
                        fprintf(stderr, "file format error 2\n");
                        exit(1);
                }

                ret = fscanf(file, "%llu %llu %llu %llu %llu %llu %llu %llu 
%llu %llu %llu %llu",
                        &rs->yld_both_empty, &rs->yld_act_empty,
                        &rs->yld_exp_empty, &rs->yld_cnt,
                        &rs->sched_switch, &rs->sched_cnt, &rs->sched_idle,
                        &rs->ttwu_cnt, &rs->ttwu_local,
                        &rs->cpu_time, &rs->delay_time, &rs->pcnt);
                if (ret == 0 || ret == EOF) {
                        fprintf(stderr, "file format error 3\n");
                        exit(1);
                }
                
                for (j = 0; j < MAXDOMAINS; j++) {
                        int k;
                        struct domain_stats *ds = &domain_stats[i][j];
                        
                        /* We discard the domain's cpumask for now */
                        ret = fscanf(file, " domain%d %*s", &domain);
                        if (ret == 0 || ret == EOF)
                                break;
                        if (domain != j) {
                                fprintf(stderr, "file format error 4\n");
                                exit(1);
                        }

                        for (k = 0; k < 3; k++) {
                                ret = fscanf(file, "%llu %llu %llu %llu %llu 
%llu %llu %llu",
                                        &ds->lb_cnt[k], &ds->lb_balanced[k],
                                        &ds->lb_failed[k], &ds->lb_imbalance[k],
                                        &ds->lb_pulled[k], 
&ds->lb_hot_pulled[k],
                                        &ds->lb_nobusyq[k], &ds->lb_nobusyg[k]);
                                if (ret == 0 || ret == EOF) {
                                        fprintf(stderr, "file format error 
5\n");
                                        exit(1);
                                }
                        }

                        ret = fscanf(file, " %llu %llu %llu %llu %llu %llu %llu 
%llu %llu %llu %llu %llu",
                                &ds->alb_cnt, &ds->alb_failed, &ds->alb_pushed,
                                &ds->sbe_cnt, &ds->sbe_balanced, 
&ds->sbe_pushed,
                                &ds->sbf_cnt, &ds->sbf_balanced, 
&ds->sbf_pushed,
                                &ds->ttwu_wake_remote, &ds->ttwu_move_affine,
                                &ds->ttwu_move_balance);
                        if (ret == 0 || ret == EOF) {
                                fprintf(stderr, "file format error 6\n");
                                exit(1);
                        }
                }

                if (*domains != -1 && *domains != j) {
                        fprintf(stderr, "domains mismatch within file\n");
                        exit(1);
                }
                *domains = j;

                ret = fscanf(file, "\n");
                if (ret == EOF)
                        break;
        }
        
        *cpus = i;
}

static void find_stats_delta(struct rq_stats rq_pre[MAXCPUS],
                        struct rq_stats rq_post[MAXCPUS],
                        struct rq_stats *rq_delta,
                        struct domain_stats domain_pre[MAXCPUS][MAXDOMAINS],
                        struct domain_stats domain_post[MAXCPUS][MAXDOMAINS],
                        struct domain_stats domain_delta[MAXDOMAINS])
{
        int i;

        memset(rq_delta, 0, sizeof(struct rq_stats));
        memset(domain_delta, 0, sizeof(struct domain_stats)*MAXDOMAINS);
        
        for (i = 0; i < MAXCPUS; i++) {
                unsigned int j, k;

                /* No problem because they're all unsigned long long */
                for (j = 0; j < sizeof(struct rq_stats)/sizeof(unsigned long 
long); j++) {
                        *((unsigned long long *)rq_delta + j) +=
                        *((unsigned long long *)&rq_post[i] + j) -
                        *((unsigned long long *)&rq_pre[i] + j);
                }
                
                for (j = 0; j < MAXDOMAINS; j++) {
                        for (k = 0; k < sizeof(struct 
domain_stats)/sizeof(unsigned long long); k++) {
                                *((unsigned long long *)&domain_delta[j] + k) +=
                                *((unsigned long long *)&domain_post[i][j] + k) 
-
                                *((unsigned long long *)&domain_pre[i][j] + k);
                        }
                }
        }
}

static void show_stats(unsigned long long time_delta, int cpus, int domains,
                struct rq_stats *rq_stats,
                struct domain_stats domain_stats[MAXDOMAINS])
{
        unsigned long long ttwu_remote;
        double s, tmp;
        int i;

        /* Ensures we don't get 0 time delta */
        s = ((double)0.5 + time_delta) / HZ;
        printf("sample period: %.3fs\n", s);
        
        /* TODO add the runqueue stats */
        tmp = (double)rq_stats->sched_cnt / s;
        printf("%.3f calls to schedule / s\n", tmp);

        tmp = (double)rq_stats->cpu_time / rq_stats->pcnt;
        printf("%.3fms average timeslice\n", tmp);

        tmp = (double)rq_stats->delay_time / rq_stats->pcnt;
        printf("%.3fms average runqueue delay\n", tmp);

        printf("\n--- wakeup statistics ---\n");
        tmp = (double)rq_stats->ttwu_cnt / s;
        printf("  %.3f task wakes / s\n", tmp);
        tmp = (double)100 * rq_stats->ttwu_local / rq_stats->ttwu_cnt;
        printf("    %.3f%% of them from the local CPU\n", tmp);

        ttwu_remote = rq_stats->ttwu_cnt - rq_stats->ttwu_local;

        for (i = 0; i < domains; i++) {
                tmp = (double)100 * domain_stats[i].ttwu_wake_remote / 
ttwu_remote;
                printf("    %.3f%% of remote wakeups come from domain%d\n", 
tmp, i);

                tmp = (double)100 * domain_stats[i].ttwu_move_balance / 
domain_stats[i].ttwu_wake_remote;
                printf("      %.3f%% are moved to the local CPU via passive 
load balancing\n", tmp);

                tmp = (double)100 * domain_stats[i].ttwu_move_affine / 
domain_stats[i].ttwu_wake_remote;
                printf("      %.3f%% are moved to the local CPU via affine 
wakeups\n", tmp);
        }

        printf("\n--- load balancing statistics ---\n");

        for (i = 0; i < domains; i++) {
                unsigned long long total_lb = 0;
                unsigned long long total_pulled = 0;
                int j;

                printf("  for domain%d\n", i);

                for (j = 0; j < 3; j++) {
                        total_lb += domain_stats[i].lb_cnt[j];
                        total_pulled += domain_stats[i].lb_pulled[j];
                }

                tmp = (double)total_lb / s;
                printf("    %.3f load balance calls / s", tmp);
                tmp = (double)total_pulled / s;
                printf(" move %.3f tasks / s\n", tmp);

                for (j = 0; j < 3; j++) {
                        unsigned long long lb = domain_stats[i].lb_cnt[j];
                        unsigned long long pulled = 
domain_stats[i].lb_pulled[j];
                        tmp = (double)100 * lb / total_lb;
                        printf("      %.3f%% calls and", tmp);
                        tmp = (double)100 * pulled / total_pulled;
                        printf(" %.3f%% task moves came from ", tmp);
                        if (j == 0)
                                printf("idle balancing\n");
                        else if (j == 1)
                                printf("busy balancing\n");
                        else if (j == 2)
                                printf("new-idle balancing\n");

                        if (lb) {
                                tmp = (double)100 * (lb - 
domain_stats[i].lb_balanced[j]) / lb;
                                printf("        %.3f%% were imbalanced", tmp);

                                tmp = (double)domain_stats[i].lb_imbalance[j] / 
(lb - domain_stats[i].lb_balanced[j]);
                                printf(" with an average imbalance of %.3f\n", 
tmp);

                                tmp = (double)100 * 
domain_stats[i].lb_failed[j] / lb;
                                printf("        %.3f%% found an imbalance but 
failed\n", tmp);
                        }

                        if (pulled) {
                                tmp = (double)100 * 
domain_stats[i].lb_hot_pulled[j] / pulled;
                                printf("        %.3f%% of tasks moved were 
cache hot\n", tmp);
                        }
                }
                
                tmp = (double)domain_stats[i].alb_cnt / s;
                printf("    %.3f active balances / s ", tmp);
                
                tmp = (double)domain_stats[i].alb_pushed / s;
                printf(" move %.3f tasks / s\n", tmp);

                if (domain_stats[i].alb_cnt) {
                        tmp = (double)100 * domain_stats[i].alb_failed / 
domain_stats[i].alb_cnt;
                        printf("      %%%.3f attempts failed\n", tmp);
                }

                tmp = (double)domain_stats[i].sbe_cnt / s;
                printf("    %.3f exec balances / s ", tmp);
                
                tmp = (double)domain_stats[i].sbe_pushed / s;
                printf(" move %.3f tasks / s\n", tmp);

                if (domain_stats[i].sbe_cnt) {
                        tmp = (double)100 * domain_stats[i].sbe_balanced / 
domain_stats[i].sbe_cnt;
                        printf("      %%%.3f found no imbalance\n", tmp);
                }

                tmp = (double)domain_stats[i].sbf_cnt / s;
                printf("    %.3f fork balances / s ", tmp);
                
                tmp = (double)domain_stats[i].sbf_pushed / s;
                printf(" move %.3f tasks / s\n", tmp);

                if (domain_stats[i].sbf_cnt) {
                        tmp = (double)100 * domain_stats[i].sbf_balanced / 
domain_stats[i].sbf_cnt;
                        printf("      %%%.3f found no imbalance\n", tmp);
                }


                printf("\n");
        }
}

static unsigned long long pre_ts, post_ts;
static int pre_cpus, post_cpus;
static int pre_domains, post_domains;
static struct rq_stats pre_rq_stats[MAXCPUS];
static struct rq_stats post_rq_stats[MAXCPUS];
static struct rq_stats delta_rq_stats;
static struct domain_stats pre_domain_stats[MAXCPUS][MAXDOMAINS];
static struct domain_stats post_domain_stats[MAXCPUS][MAXDOMAINS];
static struct domain_stats delta_domain_stats[MAXDOMAINS];

int main(int argc, char *argv[])
{
        FILE *pre, *post;

        if (argc < 2) {
                fprintf(stderr, "Usage: %s <starts before> <stats after>\n",
                                argv[0]);
                exit(1);
        }

        pre = fopen(argv[1], "r");
        if (pre == NULL)
                perror("fopen pre file"), exit(1);
        post = fopen(argv[2], "r");
        if (post == NULL)
                perror("fopen post file"), exit(1);

        parse_file(pre, &pre_ts, &pre_cpus, &pre_domains,
                        pre_rq_stats, pre_domain_stats);
        parse_file(post, &post_ts, &post_cpus, &post_domains,
                        post_rq_stats, post_domain_stats);
        if (pre_cpus != post_cpus || pre_domains != post_domains) {
                fprintf(stderr, "pre and post file formats mismatch\n");
                exit(1);
        }

        find_stats_delta(pre_rq_stats, post_rq_stats, &delta_rq_stats,
                pre_domain_stats, post_domain_stats, delta_domain_stats);

        show_stats(post_ts - pre_ts, pre_cpus, pre_domains,
                        &delta_rq_stats, delta_domain_stats);

        exit(0);
}
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to