On Fri, Jul 20, 2007 at 09:56:03AM +0200, Joachim Deguara wrote: > On Friday 20 July 2007 09:25:22 Nick Piggin wrote: > > On Wed, Jul 18, 2007 at 11:11:30AM +0200, Joachim Deguara wrote: > > > While learning about schedstats I found that the documentation in the > > > tree is old. I updated it and found some interesting stuff like > > > schedstats version 14 is the same as version and version 13 never saw a > > > kernel release! Also there are 6 fields in the current schedstats that > > > are not used anymore. Nick had made them irrelevant in commit > > > 476d139c218e44e045e4bc6d4cc02b010b343939 but never removed them. > > > > > > Thanks to Rick's perl script who I borrowed some of the updated > > > descriptions from. > > > > Ah, thanks, I actually didn't realise there was such good documentation > > there. Patch looks good. > > > > BTW. I have a simple program to do a basic statistical summary of the > > multiprocessor balancing if you are interested and haven't seen it. > > Yes I am interested. Actually I started down this road looking to find out > if > task migration could be tracked and I saw that got kicked out from early > versions.
What do you mean by that? You mean if you can check information on the migration events that a particular task has experienced? > Your script could come in useful to link to in the documentation. Rick has a > great page but hasn't been updated in a little while (though still up-to-date > as version 12==14) and his email bounced (though just a config error). Here it is, its a bit ugly and I think it may still have a bug somewhere, but I haven't looked at it for a while. --- #include <unistd.h> #include <stdlib.h> #include <stdio.h> #include <string.h> #define SCHEDSTAT_VERSION 14 struct rq_stats { /* sys_sched_yield stats */ unsigned long long yld_both_empty; unsigned long long yld_act_empty; unsigned long long yld_exp_empty; unsigned long long yld_cnt; /* schedule stats */ unsigned long long sched_active; //new unsigned long long sched_switch; unsigned long long sched_cnt; unsigned long long sched_idle; /* wake stats */ unsigned long long ttwu_cnt; unsigned long long ttwu_local; /* latency stats */ unsigned long long cpu_time; unsigned long long delay_time; unsigned long long pcnt; }; struct domain_stats { unsigned long long lb_cnt[3]; unsigned long long lb_balanced[3]; unsigned long long lb_failed[3]; unsigned long long lb_pulled[3]; unsigned long long lb_hot_pulled[3]; unsigned long long lb_imbalance[3]; unsigned long long lb_nobusyq[3]; unsigned long long lb_nobusyg[3]; /* Active load balancing */ unsigned long long alb_cnt; unsigned long long alb_failed; unsigned long long alb_pushed; /* Wake ups */ unsigned long long ttwu_wake_remote; /* Passive load balancing */ unsigned long long ttwu_move_balance; /* Affine wakeups */ unsigned long long ttwu_move_affine; /* SD_BALANCE_EXEC */ unsigned long long sbe_cnt; unsigned long long sbe_balanced; unsigned long long sbe_pushed; /* SD_BALANCE_FORK */ unsigned long long sbf_cnt; unsigned long long sbf_balanced; unsigned long long sbf_pushed; }; enum idle_type { IDLE, NOT_IDLE, NEWLY_IDLE, }; #define MAXDOMAINS 4 #define MAXCPUS 32 #define HZ 100UL static void parse_file(FILE *file, unsigned long long *ts, int *cpus, int *domains, struct rq_stats rq_stats[MAXCPUS], struct domain_stats domain_stats[MAXCPUS][MAXDOMAINS]) { int i, j; int ret, cpu, domain; int tmp; *domains = -1; ret = fscanf(file, "version %d\n", &tmp); if (ret == 0 || ret == EOF) { fprintf(stderr, "file format error 0\n"); exit(1); } if (tmp != SCHEDSTAT_VERSION) { fprintf(stderr, "wrong file format version\n"); exit(1); } ret = fscanf(file, "timestamp %llu\n", ts); if (ret == 0 || ret == EOF) { fprintf(stderr, "file format error 1\n"); exit(1); } for (i = 0; i < MAXCPUS; i++) { struct rq_stats *rs = &rq_stats[i]; ret = fscanf(file, "cpu%d ", &cpu); if (ret == EOF) break; if (ret == 0 || cpu != i) { fprintf(stderr, "file format error 2\n"); exit(1); } ret = fscanf(file, "%llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu", &rs->yld_both_empty, &rs->yld_act_empty, &rs->yld_exp_empty, &rs->yld_cnt, &rs->sched_switch, &rs->sched_cnt, &rs->sched_idle, &rs->ttwu_cnt, &rs->ttwu_local, &rs->cpu_time, &rs->delay_time, &rs->pcnt); if (ret == 0 || ret == EOF) { fprintf(stderr, "file format error 3\n"); exit(1); } for (j = 0; j < MAXDOMAINS; j++) { int k; struct domain_stats *ds = &domain_stats[i][j]; /* We discard the domain's cpumask for now */ ret = fscanf(file, " domain%d %*s", &domain); if (ret == 0 || ret == EOF) break; if (domain != j) { fprintf(stderr, "file format error 4\n"); exit(1); } for (k = 0; k < 3; k++) { ret = fscanf(file, "%llu %llu %llu %llu %llu %llu %llu %llu", &ds->lb_cnt[k], &ds->lb_balanced[k], &ds->lb_failed[k], &ds->lb_imbalance[k], &ds->lb_pulled[k], &ds->lb_hot_pulled[k], &ds->lb_nobusyq[k], &ds->lb_nobusyg[k]); if (ret == 0 || ret == EOF) { fprintf(stderr, "file format error 5\n"); exit(1); } } ret = fscanf(file, " %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu", &ds->alb_cnt, &ds->alb_failed, &ds->alb_pushed, &ds->sbe_cnt, &ds->sbe_balanced, &ds->sbe_pushed, &ds->sbf_cnt, &ds->sbf_balanced, &ds->sbf_pushed, &ds->ttwu_wake_remote, &ds->ttwu_move_affine, &ds->ttwu_move_balance); if (ret == 0 || ret == EOF) { fprintf(stderr, "file format error 6\n"); exit(1); } } if (*domains != -1 && *domains != j) { fprintf(stderr, "domains mismatch within file\n"); exit(1); } *domains = j; ret = fscanf(file, "\n"); if (ret == EOF) break; } *cpus = i; } static void find_stats_delta(struct rq_stats rq_pre[MAXCPUS], struct rq_stats rq_post[MAXCPUS], struct rq_stats *rq_delta, struct domain_stats domain_pre[MAXCPUS][MAXDOMAINS], struct domain_stats domain_post[MAXCPUS][MAXDOMAINS], struct domain_stats domain_delta[MAXDOMAINS]) { int i; memset(rq_delta, 0, sizeof(struct rq_stats)); memset(domain_delta, 0, sizeof(struct domain_stats)*MAXDOMAINS); for (i = 0; i < MAXCPUS; i++) { unsigned int j, k; /* No problem because they're all unsigned long long */ for (j = 0; j < sizeof(struct rq_stats)/sizeof(unsigned long long); j++) { *((unsigned long long *)rq_delta + j) += *((unsigned long long *)&rq_post[i] + j) - *((unsigned long long *)&rq_pre[i] + j); } for (j = 0; j < MAXDOMAINS; j++) { for (k = 0; k < sizeof(struct domain_stats)/sizeof(unsigned long long); k++) { *((unsigned long long *)&domain_delta[j] + k) += *((unsigned long long *)&domain_post[i][j] + k) - *((unsigned long long *)&domain_pre[i][j] + k); } } } } static void show_stats(unsigned long long time_delta, int cpus, int domains, struct rq_stats *rq_stats, struct domain_stats domain_stats[MAXDOMAINS]) { unsigned long long ttwu_remote; double s, tmp; int i; /* Ensures we don't get 0 time delta */ s = ((double)0.5 + time_delta) / HZ; printf("sample period: %.3fs\n", s); /* TODO add the runqueue stats */ tmp = (double)rq_stats->sched_cnt / s; printf("%.3f calls to schedule / s\n", tmp); tmp = (double)rq_stats->cpu_time / rq_stats->pcnt; printf("%.3fms average timeslice\n", tmp); tmp = (double)rq_stats->delay_time / rq_stats->pcnt; printf("%.3fms average runqueue delay\n", tmp); printf("\n--- wakeup statistics ---\n"); tmp = (double)rq_stats->ttwu_cnt / s; printf(" %.3f task wakes / s\n", tmp); tmp = (double)100 * rq_stats->ttwu_local / rq_stats->ttwu_cnt; printf(" %.3f%% of them from the local CPU\n", tmp); ttwu_remote = rq_stats->ttwu_cnt - rq_stats->ttwu_local; for (i = 0; i < domains; i++) { tmp = (double)100 * domain_stats[i].ttwu_wake_remote / ttwu_remote; printf(" %.3f%% of remote wakeups come from domain%d\n", tmp, i); tmp = (double)100 * domain_stats[i].ttwu_move_balance / domain_stats[i].ttwu_wake_remote; printf(" %.3f%% are moved to the local CPU via passive load balancing\n", tmp); tmp = (double)100 * domain_stats[i].ttwu_move_affine / domain_stats[i].ttwu_wake_remote; printf(" %.3f%% are moved to the local CPU via affine wakeups\n", tmp); } printf("\n--- load balancing statistics ---\n"); for (i = 0; i < domains; i++) { unsigned long long total_lb = 0; unsigned long long total_pulled = 0; int j; printf(" for domain%d\n", i); for (j = 0; j < 3; j++) { total_lb += domain_stats[i].lb_cnt[j]; total_pulled += domain_stats[i].lb_pulled[j]; } tmp = (double)total_lb / s; printf(" %.3f load balance calls / s", tmp); tmp = (double)total_pulled / s; printf(" move %.3f tasks / s\n", tmp); for (j = 0; j < 3; j++) { unsigned long long lb = domain_stats[i].lb_cnt[j]; unsigned long long pulled = domain_stats[i].lb_pulled[j]; tmp = (double)100 * lb / total_lb; printf(" %.3f%% calls and", tmp); tmp = (double)100 * pulled / total_pulled; printf(" %.3f%% task moves came from ", tmp); if (j == 0) printf("idle balancing\n"); else if (j == 1) printf("busy balancing\n"); else if (j == 2) printf("new-idle balancing\n"); if (lb) { tmp = (double)100 * (lb - domain_stats[i].lb_balanced[j]) / lb; printf(" %.3f%% were imbalanced", tmp); tmp = (double)domain_stats[i].lb_imbalance[j] / (lb - domain_stats[i].lb_balanced[j]); printf(" with an average imbalance of %.3f\n", tmp); tmp = (double)100 * domain_stats[i].lb_failed[j] / lb; printf(" %.3f%% found an imbalance but failed\n", tmp); } if (pulled) { tmp = (double)100 * domain_stats[i].lb_hot_pulled[j] / pulled; printf(" %.3f%% of tasks moved were cache hot\n", tmp); } } tmp = (double)domain_stats[i].alb_cnt / s; printf(" %.3f active balances / s ", tmp); tmp = (double)domain_stats[i].alb_pushed / s; printf(" move %.3f tasks / s\n", tmp); if (domain_stats[i].alb_cnt) { tmp = (double)100 * domain_stats[i].alb_failed / domain_stats[i].alb_cnt; printf(" %%%.3f attempts failed\n", tmp); } tmp = (double)domain_stats[i].sbe_cnt / s; printf(" %.3f exec balances / s ", tmp); tmp = (double)domain_stats[i].sbe_pushed / s; printf(" move %.3f tasks / s\n", tmp); if (domain_stats[i].sbe_cnt) { tmp = (double)100 * domain_stats[i].sbe_balanced / domain_stats[i].sbe_cnt; printf(" %%%.3f found no imbalance\n", tmp); } tmp = (double)domain_stats[i].sbf_cnt / s; printf(" %.3f fork balances / s ", tmp); tmp = (double)domain_stats[i].sbf_pushed / s; printf(" move %.3f tasks / s\n", tmp); if (domain_stats[i].sbf_cnt) { tmp = (double)100 * domain_stats[i].sbf_balanced / domain_stats[i].sbf_cnt; printf(" %%%.3f found no imbalance\n", tmp); } printf("\n"); } } static unsigned long long pre_ts, post_ts; static int pre_cpus, post_cpus; static int pre_domains, post_domains; static struct rq_stats pre_rq_stats[MAXCPUS]; static struct rq_stats post_rq_stats[MAXCPUS]; static struct rq_stats delta_rq_stats; static struct domain_stats pre_domain_stats[MAXCPUS][MAXDOMAINS]; static struct domain_stats post_domain_stats[MAXCPUS][MAXDOMAINS]; static struct domain_stats delta_domain_stats[MAXDOMAINS]; int main(int argc, char *argv[]) { FILE *pre, *post; if (argc < 2) { fprintf(stderr, "Usage: %s <starts before> <stats after>\n", argv[0]); exit(1); } pre = fopen(argv[1], "r"); if (pre == NULL) perror("fopen pre file"), exit(1); post = fopen(argv[2], "r"); if (post == NULL) perror("fopen post file"), exit(1); parse_file(pre, &pre_ts, &pre_cpus, &pre_domains, pre_rq_stats, pre_domain_stats); parse_file(post, &post_ts, &post_cpus, &post_domains, post_rq_stats, post_domain_stats); if (pre_cpus != post_cpus || pre_domains != post_domains) { fprintf(stderr, "pre and post file formats mismatch\n"); exit(1); } find_stats_delta(pre_rq_stats, post_rq_stats, &delta_rq_stats, pre_domain_stats, post_domain_stats, delta_domain_stats); show_stats(post_ts - pre_ts, pre_cpus, pre_domains, &delta_rq_stats, delta_domain_stats); exit(0); } - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/