Pranith Kumar <bobby.pr...@gmail.com> writes: > This adds the x86 store-after-load re-ordering litmus test. > > Most of the supporting files are mostly unmodified and generated by > the litmus tool. > > Signed-off-by: Pranith Kumar <bobby.pr...@gmail.com> > --- > tests/tcg/mttcg/x86/Makefile | 42 ++ > tests/tcg/mttcg/x86/README.txt | 22 + > tests/tcg/mttcg/x86/SAL.c | 491 ++++++++++++++++ > tests/tcg/mttcg/x86/affinity.c | 159 +++++ > tests/tcg/mttcg/x86/affinity.h | 34 ++ > tests/tcg/mttcg/x86/comp.sh | 10 + > tests/tcg/mttcg/x86/litmus_rand.c | 64 +++ > tests/tcg/mttcg/x86/litmus_rand.h | 29 + > tests/tcg/mttcg/x86/outs.c | 148 +++++ > tests/tcg/mttcg/x86/outs.h | 49 ++ > tests/tcg/mttcg/x86/run.sh | 56 ++ > tests/tcg/mttcg/x86/show.awk | 2 + > tests/tcg/mttcg/x86/utils.c | 1148 > +++++++++++++++++++++++++++++++++++++ > tests/tcg/mttcg/x86/utils.h | 275 +++++++++
So I think tests/tcg/x86/litmus makes more sense for the final location. The tests/tcg/ directory is a bit of a mess though, a bunch of stuff needs to be moved into subdirs. > 14 files changed, 2529 insertions(+) > create mode 100644 tests/tcg/mttcg/x86/Makefile > create mode 100644 tests/tcg/mttcg/x86/README.txt > create mode 100644 tests/tcg/mttcg/x86/SAL.c > create mode 100644 tests/tcg/mttcg/x86/affinity.c > create mode 100644 tests/tcg/mttcg/x86/affinity.h > create mode 100644 tests/tcg/mttcg/x86/comp.sh > create mode 100644 tests/tcg/mttcg/x86/litmus_rand.c > create mode 100644 tests/tcg/mttcg/x86/litmus_rand.h > create mode 100644 tests/tcg/mttcg/x86/outs.c > create mode 100644 tests/tcg/mttcg/x86/outs.h > create mode 100755 tests/tcg/mttcg/x86/run.sh > create mode 100644 tests/tcg/mttcg/x86/show.awk > create mode 100644 tests/tcg/mttcg/x86/utils.c > create mode 100644 tests/tcg/mttcg/x86/utils.h > > diff --git a/tests/tcg/mttcg/x86/Makefile b/tests/tcg/mttcg/x86/Makefile > new file mode 100644 > index 0000000..6b8fa37 > --- /dev/null > +++ b/tests/tcg/mttcg/x86/Makefile > @@ -0,0 +1,42 @@ > +GCC=gcc > +GCCOPTS=-D_GNU_SOURCE -DFORCE_AFFINITY -Wall -std=gnu99 -fomit-frame-pointer > -O2 -pthread > +LINKOPTS= > +SRC=\ > + SAL.c\ > + > +EXE=$(SRC:.c=.exe) > +T=$(SRC:.c=.t) > + > +all: $(EXE) $(T) > + > +clean: > + /bin/rm -f *.o *.s *.t *.exe *~ > + > +cleansource: > + /bin/rm -f *.o *.c *.h *.s *~ > + > +affinity.o: affinity.c > + $(GCC) $(GCCOPTS) -O2 -c affinity.c > + > +outs.o: outs.c > + $(GCC) $(GCCOPTS) -O2 -c outs.c > + > +utils.o: utils.c > + $(GCC) $(GCCOPTS) -O2 -c utils.c > + > +litmus_rand.o: litmus_rand.c > + $(GCC) $(GCCOPTS) -O2 -c litmus_rand.c > + > +UTILS=affinity.o outs.o utils.o litmus_rand.o > + > +%.exe:%.s $(UTILS) > + $(GCC) $(GCCOPTS) $(LINKOPTS) -o $@ $(UTILS) $< > + > +%.s:%.c > + $(GCC) $(GCCOPTS) -S $< > + > +%.t:%.s > + awk -f show.awk $< > $@ > + > +tests: all > + ./run.sh > diff --git a/tests/tcg/mttcg/x86/README.txt b/tests/tcg/mttcg/x86/README.txt > new file mode 100644 > index 0000000..98ce238 > --- /dev/null > +++ b/tests/tcg/mttcg/x86/README.txt > @@ -0,0 +1,22 @@ > +Tests produced by litmus for architecture X86 on linux > + > +COMPILING > + with command 'make [-j N]' or 'sh comp.sh' > + > +RUNNING ALL TESTS > + with command 'sh run.sh'. Test result on standard output. > + > +RUNNING ONE TEST > + Tests are .exe files, for instance SAL.exe, run it by './SAL.exe' > + > +RUNNING OPTIONS > + Main options to the run.sh script and to .exe files: > + -v be verbose (can be repeated). > + -a <n> number of (logical) processors available, default 0. > + The default value of 0 means that .exe files attempt > + to infer the actual number of logical threads. > + -s <n> one run operates on arrays of size <n>, default 100000. > + -r <n> number of runs, default 10. > + > + For more options see for instance './SAL.exe -help' and litmus > documentation > + <http://diy.inria.fr/doc/litmus.html> > diff --git a/tests/tcg/mttcg/x86/SAL.c b/tests/tcg/mttcg/x86/SAL.c > new file mode 100644 > index 0000000..1b66508 > --- /dev/null > +++ b/tests/tcg/mttcg/x86/SAL.c > @@ -0,0 +1,491 @@ > +/****************************************************************************/ > +/* the diy toolsuite > */ > +/* > */ > +/* Jade Alglave, University College London, UK. > */ > +/* Luc Maranget, INRIA Paris-Rocquencourt, France. > */ > +/* > */ > +/* This C source is a product of litmus7 and includes source that is > */ > +/* governed by the CeCILL-B license. > */ > +/****************************************************************************/ > +/* Parameters */ > +#define SIZE_OF_TEST 100000 > +#define NUMBER_OF_RUN 10 > +#define AVAIL 0 > +#define STRIDE 1 > +#define MAX_LOOP 0 > +#define N 2 > +#define AFF_INCR (0) > +/* Includes */ > +#include <stdio.h> > +#include <stdlib.h> > +#include <unistd.h> > +#include <errno.h> > +#include <assert.h> > +#include <time.h> > +#include <limits.h> > +#include "utils.h" > +#include "outs.h" > +#include "affinity.h" > + > +/* params */ > +typedef struct { > + int verbose; > + int size_of_test,max_run; > + int stride; > + aff_mode_t aff_mode; > + int ncpus, ncpus_used; > + int do_change; > +} param_t; > + > + > +/* Full memory barrier */ > +inline static void mbar(void) { > + asm __volatile__ ("mfence" ::: "memory"); > +} > + > +/* Barriers macros */ > +inline static void barrier_wait(unsigned int id, unsigned int k, int > volatile *b) { > + if ((k % N) == id) { > + *b = 1 ; > + } else { > + while (*b == 0) ; > + } > +} > + > +/**********************/ > +/* Context definition */ > +/**********************/ > + > + > +typedef struct { > +/* Shared variables */ > + int *y; > + int *x; > +/* Final content of observed registers */ > + int *out_0_eax; > + int *out_1_eax; > +/* Check data */ > + pb_t *fst_barrier; > +/* Barrier for litmus loop */ > + int volatile *barrier; > +/* Instance seed */ > + st_t seed; > +/* Parameters */ > + param_t *_p; > +} ctx_t; > + > +inline static int final_cond(int _out_0_eax,int _out_1_eax) { > + switch (_out_0_eax) { > + case 0: > + switch (_out_1_eax) { > + case 0: > + return 1; > + default: > + return 0; > + } > + default: > + return 0; > + } > +} > + > +inline static int final_ok(int cond) { > + return cond; > +} > + > +/**********************/ > +/* Outcome collection */ > +/**********************/ > +#define NOUTS 2 > +typedef intmax_t outcome_t[NOUTS]; > + > +static const int out_0_eax_f = 0 ; > +static const int out_1_eax_f = 1 ; > + > + > +typedef struct hist_t { > + outs_t *outcomes ; > + count_t n_pos,n_neg ; > +} hist_t ; > + > +static hist_t *alloc_hist(void) { > + hist_t *p = malloc_check(sizeof(*p)) ; > + p->outcomes = NULL ; > + p->n_pos = p->n_neg = 0 ; > + return p ; > +} > + > +static void free_hist(hist_t *h) { > + free_outs(h->outcomes) ; > + free(h) ; > +} > + > +static void add_outcome(hist_t *h, count_t v, outcome_t o, int show) { > + h->outcomes = add_outcome_outs(h->outcomes,o,NOUTS,v,show) ; > +} > + > +static void merge_hists(hist_t *h0, hist_t *h1) { > + h0->n_pos += h1->n_pos ; > + h0->n_neg += h1->n_neg ; > + h0->outcomes = merge_outs(h0->outcomes,h1->outcomes,NOUTS) ; > +} > + > +static count_t sum_hist(hist_t *h) { > + return sum_outs(h->outcomes) ; > +} > + > + > +static void do_dump_outcome(FILE *fhist, intmax_t *o, count_t c, int show) { > + fprintf(fhist,"%-6"PCTR"%c>0:EAX=%i; 1:EAX=%i;\n",c,show ? '*' : > ':',(int)o[out_0_eax_f],(int)o[out_1_eax_f]); > +} > + > +static void just_dump_outcomes(FILE *fhist, hist_t *h) { > + outcome_t buff ; > + dump_outs(fhist,do_dump_outcome,h->outcomes,buff,NOUTS) ; > +} > + > +/*******************************************************/ > +/* Context allocation, freeing and reinitialization */ > +/*******************************************************/ > + > +static void init(ctx_t *_a) { > + int size_of_test = _a->_p->size_of_test; > + > + _a->seed = rand(); > + _a->out_0_eax = malloc_check(size_of_test*sizeof(*(_a->out_0_eax))); > + _a->out_1_eax = malloc_check(size_of_test*sizeof(*(_a->out_1_eax))); > + _a->y = malloc_check(size_of_test*sizeof(*(_a->y))); > + _a->x = malloc_check(size_of_test*sizeof(*(_a->x))); > + _a->fst_barrier = pb_create(N); > + _a->barrier = malloc_check(size_of_test*sizeof(*(_a->barrier))); > +} > + > +static void finalize(ctx_t *_a) { > + free((void *)_a->y); > + free((void *)_a->x); > + free((void *)_a->out_0_eax); > + free((void *)_a->out_1_eax); > + pb_free(_a->fst_barrier); > + free((void *)_a->barrier); > +} > + > +static void reinit(ctx_t *_a) { > + for (int _i = _a->_p->size_of_test-1 ; _i >= 0 ; _i--) { > + _a->y[_i] = 0; > + _a->x[_i] = 0; > + _a->out_0_eax[_i] = -239487; > + _a->out_1_eax[_i] = -239487; > + _a->barrier[_i] = 0; > + } > +} > + > +/**************************************/ > +/* Prefetch (and check) global values */ > +/**************************************/ > + > +static void check_globals(ctx_t *_a) { > + int *y = _a->y; > + int *x = _a->x; > + for (int _i = _a->_p->size_of_test-1 ; _i >= 0 ; _i--) { > + if (rand_bit(&(_a->seed)) && y[_i] != 0) fatal("SAL, check_globals > failed"); > + if (rand_bit(&(_a->seed)) && x[_i] != 0) fatal("SAL, check_globals > failed"); > + } > + pb_wait(_a->fst_barrier); > +} > + > +/***************/ > +/* Litmus code */ > +/***************/ > + > +typedef struct { > + int th_id; /* I am running on this thread */ > + int *cpu; /* On this cpu */ > + ctx_t *_a; /* In this context */ > +} parg_t; > + > + > + > + > + > +static void *P0(void *_vb) { > + mbar(); > + parg_t *_b = (parg_t *)_vb; > + ctx_t *_a = _b->_a; > + int _ecpu = _b->cpu[_b->th_id]; > + force_one_affinity(_ecpu,AVAIL,_a->_p->verbose,"SAL"); > + check_globals(_a); > + int _th_id = _b->th_id; > + int volatile *barrier = _a->barrier; > + int _size_of_test = _a->_p->size_of_test; > + int _stride = _a->_p->stride; > + int *out_0_eax = _a->out_0_eax; > + for (int _j = _stride ; _j > 0 ; _j--) { > + for (int _i = _size_of_test-_j ; _i >= 0 ; _i -= _stride) { > + barrier_wait(_th_id,_i,&barrier[_i]); > +asm __volatile__ ( > +"\n" > +"#START _litmus_P0\n" > +"#_litmus_P0_0\n\t" > +"movl $1,%[x]\n" > +"#_litmus_P0_1\n\t" > +"mfence\n" > +"#_litmus_P0_2\n\t" > +"movl %[y],%[eax]\n" > +"#END _litmus_P0\n\t" > +:[x] "=m" (_a->x[_i]),[y] "=m" (_a->y[_i]),[eax] "=&a" (out_0_eax[_i]) > +: > +:"cc","memory" > +); > + } > + } > + mbar(); > + return NULL; > +} > + > +static void *P1(void *_vb) { > + mbar(); > + parg_t *_b = (parg_t *)_vb; > + ctx_t *_a = _b->_a; > + int _ecpu = _b->cpu[_b->th_id]; > + force_one_affinity(_ecpu,AVAIL,_a->_p->verbose,"SAL"); > + check_globals(_a); > + int _th_id = _b->th_id; > + int volatile *barrier = _a->barrier; > + int _size_of_test = _a->_p->size_of_test; > + int _stride = _a->_p->stride; > + int *out_1_eax = _a->out_1_eax; > + for (int _j = _stride ; _j > 0 ; _j--) { > + for (int _i = _size_of_test-_j ; _i >= 0 ; _i -= _stride) { > + barrier_wait(_th_id,_i,&barrier[_i]); > +asm __volatile__ ( > +"\n" > +"#START _litmus_P1\n" > +"#_litmus_P1_0\n\t" > +"movl $1,%[y]\n" > +"#_litmus_P1_1\n\t" > +"mfence\n" > +"#_litmus_P1_2\n\t" > +"movl %[x],%[eax]\n" > +"#END _litmus_P1\n\t" > +:[x] "=m" (_a->x[_i]),[y] "=m" (_a->y[_i]),[eax] "=&a" (out_1_eax[_i]) > +: > +:"cc","memory" > +); > + } > + } > + mbar(); > + return NULL; > +} > + > +typedef struct { > + pm_t *p_mutex; > + pb_t *p_barrier; > + param_t *_p; > + int z_id; > + int *cpus; > +} zyva_t; > + > +#define NT N > + > +static void *zyva(void *_va) { > + zyva_t *_a = (zyva_t *) _va; > + param_t *_b = _a->_p; > + pb_wait(_a->p_barrier); > + pthread_t thread[NT]; > + parg_t parg[N]; > + f_t *fun[] = {&P0,&P1}; > + hist_t *hist = alloc_hist(); > + ctx_t ctx; > + ctx._p = _b; > + > + init(&ctx); > + for (int _p = N-1 ; _p >= 0 ; _p--) { > + parg[_p].th_id = _p; parg[_p]._a = &ctx; > + parg[_p].cpu = &(_a->cpus[0]); > + } > + > + for (int n_run = 0 ; n_run < _b->max_run ; n_run++) { > + if (_b->aff_mode == aff_random) { > + pb_wait(_a->p_barrier); > + if (_a->z_id == 0) > perm_prefix_ints(&ctx.seed,_a->cpus,_b->ncpus_used,_b->ncpus); > + pb_wait(_a->p_barrier); > + } else { > + } > + if (_b->verbose>1) fprintf(stderr,"Run %i of %i\r", n_run, _b->max_run); > + reinit(&ctx); > + if (_b->do_change) perm_funs(&ctx.seed,fun,N); > + for (int _p = NT-1 ; _p >= 0 ; _p--) { > + launch(&thread[_p],fun[_p],&parg[_p]); > + } > + if (_b->do_change) perm_threads(&ctx.seed,thread,NT); > + for (int _p = NT-1 ; _p >= 0 ; _p--) { > + join(&thread[_p]); > + } > + /* Log final states */ > + for (int _i = _b->size_of_test-1 ; _i >= 0 ; _i--) { > + int _out_0_eax_i = ctx.out_0_eax[_i]; > + int _out_1_eax_i = ctx.out_1_eax[_i]; > + outcome_t o; > + int cond; > + > + cond = final_ok(final_cond(_out_0_eax_i,_out_1_eax_i)); > + o[out_0_eax_f] = _out_0_eax_i; > + o[out_1_eax_f] = _out_1_eax_i; > + add_outcome(hist,1,o,cond); > + if (cond) { hist->n_pos++; } else { hist->n_neg++; } > + } > + } > + > + finalize(&ctx); > + return hist; > +} > + > +#define ENOUGH 10 > + > +static int postlude(FILE *out,cmd_t *cmd,hist_t *hist,count_t p_true,count_t > p_false,tsc_t total) { > + fprintf(out,"Test SAL Forbidden\n"); > + fprintf(out,"Histogram (%i states)\n",finals_outs(hist->outcomes)); > + just_dump_outcomes(out,hist); > + int cond = p_true == 0; > + fprintf(out,"%s\n",cond?"Ok":"No"); > + fprintf(out,"\nWitnesses\n"); > + fprintf(out,"Positive: %" PCTR ", Negative: %" PCTR "\n",p_false,p_true); > + fprintf(out,"Condition ~exists (0:EAX=0 /\\ 1:EAX=0) is > %svalidated\n",cond ? "" : "NOT "); > + fprintf(out,"Hash=d8f89591b2adad11d42d3eeb22d212c6\n"); > + count_t cond_true = p_true; > + count_t cond_false = p_false; > + fprintf(out,"Observation SAL %s %" PCTR " %" PCTR "\n",!cond_true ? > "Never" : !cond_false ? "Always" : "Sometimes",cond_true,cond_false); > + if (p_true > 0) { > + } > + fprintf(out,"Time SAL %.2f\n",total / 1000000.0); > + fflush(out); > + return cond; > +} > + > +static int run(cmd_t *cmd,cpus_t *def_all_cpus,FILE *out) { > + tsc_t start = timeofday(); > + param_t prm ; > +/* Set some parameters */ > + prm.verbose = cmd->verbose; > + prm.size_of_test = cmd->size_of_test; > + prm.max_run = cmd->max_run; > + prm.stride = cmd->stride; > + prm.do_change = 1; > + if (cmd->fix) prm.do_change = 0; > +/* Computes number of test concurrent instances */ > + int n_avail = cmd->avail > 0 ? cmd->avail : cmd->aff_cpus->sz; > + if (n_avail > cmd->aff_cpus->sz) log_error("Warning: avail=%i, > available=%i\n",n_avail, cmd->aff_cpus->sz); > + int n_exe; > + if (cmd->n_exe > 0) { > + n_exe = cmd->n_exe; > + } else { > + n_exe = n_avail < N ? 1 : n_avail / N; > + } > +/* Set affinity parameters */ > + cpus_t *all_cpus = cmd->aff_cpus; > + int aff_cpus_sz = cmd->aff_mode == aff_random ? max(all_cpus->sz,N*n_exe) > : N*n_exe; > + int aff_cpus[aff_cpus_sz]; > + prm.aff_mode = cmd->aff_mode; > + prm.ncpus = aff_cpus_sz; > + prm.ncpus_used = N*n_exe; > +/* Show parameters to user */ > + if (prm.verbose) { > + log_error( "SAL: n=%i, r=%i, s=%i",n_exe,prm.max_run,prm.size_of_test); > + log_error(", st=%i",prm.stride); > + if (cmd->aff_mode == aff_incr) { > + log_error( ", i=%i",cmd->aff_incr); > + } else if (cmd->aff_mode == aff_random) { > + log_error(", +ra"); > + } else if (cmd->aff_mode == aff_custom) { > + log_error(", +ca"); > + } else if (cmd->aff_mode == aff_scan) { > + log_error(", +sa"); > + } > + log_error(", p='"); > + cpus_dump(stderr,cmd->aff_cpus); > + log_error("'"); > + log_error("\n"); > + } > + if (cmd->aff_mode == aff_random) { > + for (int k = 0 ; k < aff_cpus_sz ; k++) { > + aff_cpus[k] = all_cpus->cpu[k % all_cpus->sz]; > + } > + } > + hist_t *hist = NULL; > + int n_th = n_exe-1; > + pthread_t th[n_th]; > + zyva_t zarg[n_exe]; > + pm_t *p_mutex = pm_create(); > + pb_t *p_barrier = pb_create(n_exe); > + int next_cpu = 0; > + int delta = cmd->aff_incr; > + if (delta <= 0) { > + for (int k=0 ; k < all_cpus->sz ; k++) all_cpus->cpu[k] = -1; > + delta = 1; > + } else { > + delta %= all_cpus->sz; > + } > + int start_scan=0, max_start=gcd(delta,all_cpus->sz); > + int *aff_p = aff_cpus; > + for (int k=0 ; k < n_exe ; k++) { > + zyva_t *p = &zarg[k]; > + p->_p = &prm; > + p->p_mutex = p_mutex; p->p_barrier = p_barrier; > + p->z_id = k; > + p->cpus = aff_p; > + if (cmd->aff_mode != aff_incr) { > + aff_p += N; > + } else { > + for (int i=0 ; i < N ; i++) { > + *aff_p = all_cpus->cpu[next_cpu]; aff_p++; > + next_cpu += delta; next_cpu %= all_cpus->sz; > + if (next_cpu == start_scan) { > + start_scan++ ; start_scan %= max_start; > + next_cpu = start_scan; > + } > + } > + } > + if (k < n_th) { > + launch(&th[k],zyva,p); > + } else { > + hist = (hist_t *)zyva(p); > + } > + } > + > + count_t n_outs = prm.size_of_test; n_outs *= prm.max_run; > + for (int k=0 ; k < n_th ; k++) { > + hist_t *hk = (hist_t *)join(&th[k]); > + if (sum_hist(hk) != n_outs || hk->n_pos + hk->n_neg != n_outs) { > + fatal("SAL, sum_hist"); > + } > + merge_hists(hist,hk); > + free_hist(hk); > + } > + cpus_free(all_cpus); > + tsc_t total = timeofday() - start; > + pm_free(p_mutex); > + pb_free(p_barrier); > + > + n_outs *= n_exe ; > + if (sum_hist(hist) != n_outs || hist->n_pos + hist->n_neg != n_outs) { > + fatal("SAL, sum_hist") ; > + } > + count_t p_true = hist->n_pos, p_false = hist->n_neg; > + int cond = postlude(out,cmd,hist,p_true,p_false,total); > + free_hist(hist); > + return cond; > +} > + > + > +int main(int argc, char **argv) { > + cpus_t *def_all_cpus = read_force_affinity(AVAIL,0); > + if (def_all_cpus->sz < N) { > + cpus_free(def_all_cpus); > + return EXIT_SUCCESS; > + } > + cmd_t def = { 0, NUMBER_OF_RUN, SIZE_OF_TEST, STRIDE, AVAIL, 0, 0, > aff_incr, 0, 0, AFF_INCR, def_all_cpus, NULL, -1, MAX_LOOP, NULL, NULL, -1, > -1, -1, 0, 0}; > + cmd_t cmd = def; > + parse_cmd(argc,argv,&def,&cmd); > + int cond = run(&cmd,def_all_cpus,stdout); > + if (def_all_cpus != cmd.aff_cpus) cpus_free(def_all_cpus); > + return cond ? EXIT_SUCCESS : EXIT_FAILURE; > +} > diff --git a/tests/tcg/mttcg/x86/affinity.c b/tests/tcg/mttcg/x86/affinity.c > new file mode 100644 > index 0000000..9535bf2 > --- /dev/null > +++ b/tests/tcg/mttcg/x86/affinity.c > @@ -0,0 +1,159 @@ > +/****************************************************************************/ > +/* the diy toolsuite > */ > +/* > */ > +/* Jade Alglave, University College London, UK. > */ > +/* Luc Maranget, INRIA Paris-Rocquencourt, France. > */ > +/* > */ > +/* Copyright 2015-present Institut National de Recherche en Informatique et > */ > +/* en Automatique and the authors. All rights reserved. > */ > +/* > */ > +/* This software is governed by the CeCILL-B license under French law and > */ > +/* abiding by the rules of distribution of free software. You can use, > */ > +/* modify and/ or redistribute the software under the terms of the CeCILL-B > */ > +/* license as circulated by CEA, CNRS and INRIA at the following URL > */ > +/* "http://www.cecill.info". We also give a copy in LICENSE.txt. > */ > +/****************************************************************************/ > +#include <stdio.h> > +#include <sched.h> > +#include <unistd.h> > +#include "utils.h" > +#include "affinity.h" > + > +#ifdef CPUS_DEFINED > +cpus_t *read_affinity(void) { > + cpu_set_t mask; > + int sz = 0 ; > + int res = pthread_getaffinity_np(pthread_self(), sizeof(mask), &mask) ; > + > + if (res != 0) { > + errexit("pthread_getaffinity_np",res); > + } > + for (int p=0 ; p < CPU_SETSIZE ; p++) { > + if (CPU_ISSET(p,&mask)) sz++ ; > + } > + > + cpus_t *r = cpus_create(sz) ; > + for (int p=0, *q=r->cpu ; p < CPU_SETSIZE ; p++) { > + if (CPU_ISSET(p,&mask)) *q++ = p ; > + } > + return r ; > +} > + > +#endif > +/* Attempt to force processors wake up, on devices where unused procs > + go to sleep... */ > + > + > +#ifdef FORCE_AFFINITY > +const static tsc_t sec = (tsc_t)1000000 ; > + > +static void* loop(void *p) { > + tsc_t *q = p ; > + tsc_t max = *q ; > + while (timeofday() < max) ; > + return NULL ; > +} > + > + > +static void warm_up(int sz, tsc_t d) { > + pthread_t th[sz]; > + d += timeofday() ; > + for (int k = 0 ; k < sz ; k++) launch(&th[k], loop, &d) ; > + for (int k = 0 ; k < sz ; k++) join(&th[k]) ; > +} > + > +#ifdef CPUS_DEFINED > +cpus_t *read_force_affinity(int n_avail, int verbose) { > + int sz = n_avail <= 1 ? 1 : n_avail ; > + tsc_t max = sec / 100 ; > + > + for ( ; ; ) { > + warm_up(sz+1,max) ; > + cpus_t *r = read_affinity() ; > + if (n_avail <= r->sz) return r ; > + if (verbose) { > + fprintf(stderr,"Read affinity: '") ; > + cpus_dump(stderr,r) ; > + fprintf(stderr,"'\n") ; > + } > + cpus_free(r) ; > + } > +} > +#endif > +#endif > + > +#ifdef CPUS_DEFINED > + > +/* Enforcing processor affinity. > + Notice that logical processor numbers may be negative. > + In that case, affinity setting is ignored */ > + > + > +void write_affinity(cpus_t *p) { > + cpu_set_t mask; > + int exists_pos = 0 ; > + > + CPU_ZERO(&mask) ; > + for (int k = 0 ; k < p->sz ; k++) { > + if (p->cpu[k] >= 0) { > + CPU_SET(p->cpu[k],&mask) ; > + exists_pos = 1 ; > + } > + } > + if (exists_pos) { > + int r = pthread_setaffinity_np(pthread_self(),sizeof(mask),&mask) ; > + if (r != 0) { > + errexit("pthread_setaffinity_np",r) ; > + } > + } > +} > +#endif > + > +void write_one_affinity(int a) { > + if (a >= 0) { > + cpu_set_t mask; > + CPU_ZERO(&mask) ; > + CPU_SET(a,&mask) ; > + int r = pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) ; > + if (r != 0) { > + errexit("pthread_setaffinity_np",r) ; > + } > + } > +} > + > +#ifdef FORCE_AFFINITY > +/* Get the number of present cpus, fragile */ > + > +static const char *present = "/sys/devices/system/cpu/present" ; > + > +static int get_present(void) { > + FILE *fp = fopen(present,"r") ; > + if (fp == NULL) return -1 ; > + int r1,r2 ; > + int n = fscanf(fp,"%d-%d\n",&r1,&r2) ; > + fclose(fp) ; > + if (n != 2) return -1 ; > + return r2-r1+1 ; > +} > + > +void force_one_affinity(int a, int sz,int verbose, char *name) { > + if (a >= 0) { > + cpu_set_t mask; > + int r ; > + CPU_ZERO(&mask) ; > + CPU_SET(a,&mask) ; > + do { > + r = pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) ; > + if (r != 0) { > + if (verbose) > + fprintf(stderr,"%s: force %i failed\n",name,a) ; > + int nwarm = get_present() ; > + if (verbose > 1) > + fprintf(stderr,"%s: present=%i\n",name,nwarm) ; > + if (nwarm < 0) nwarm = sz+1 ; > + warm_up(nwarm,sec/100) ; > + } > + } while (r != 0) ; > + } > +} > +#endif > diff --git a/tests/tcg/mttcg/x86/affinity.h b/tests/tcg/mttcg/x86/affinity.h > new file mode 100644 > index 0000000..9fb6a25 > --- /dev/null > +++ b/tests/tcg/mttcg/x86/affinity.h > @@ -0,0 +1,34 @@ > +/****************************************************************************/ > +/* the diy toolsuite > */ > +/* > */ > +/* Jade Alglave, University College London, UK. > */ > +/* Luc Maranget, INRIA Paris-Rocquencourt, France. > */ > +/* > */ > +/* Copyright 2015-present Institut National de Recherche en Informatique et > */ > +/* en Automatique and the authors. All rights reserved. > */ > +/* > */ > +/* This software is governed by the CeCILL-B license under French law and > */ > +/* abiding by the rules of distribution of free software. You can use, > */ > +/* modify and/ or redistribute the software under the terms of the CeCILL-B > */ > +/* license as circulated by CEA, CNRS and INRIA at the following URL > */ > +/* "http://www.cecill.info". We also give a copy in LICENSE.txt. > */ > +/****************************************************************************/ > +#ifndef _AFFINITY_H > +#define _AFFINITY_H 1 > + > +#include "utils.h" > + > +#ifdef CPUS_DEFINED > +cpus_t *read_affinity(void) ; > +#ifdef FORCE_AFFINITY > +cpus_t *read_force_affinity(int n_avail, int verbose) ; > +#endif > +void write_affinity(cpus_t *p) ; > +#endif > + > +void write_one_affinity(int cpu) ; > +#ifdef FORCE_AFFINITY > +void force_one_affinity(int cpu, int sz, int verbose, char *name) ; > +#endif > + > +#endif > diff --git a/tests/tcg/mttcg/x86/comp.sh b/tests/tcg/mttcg/x86/comp.sh > new file mode 100644 > index 0000000..251a710 > --- /dev/null > +++ b/tests/tcg/mttcg/x86/comp.sh > @@ -0,0 +1,10 @@ > +GCC=gcc > +GCCOPTS="-D_GNU_SOURCE -DFORCE_AFFINITY -Wall -std=gnu99 > -fomit-frame-pointer -O2 -pthread" > +LINKOPTS="" > +/bin/rm -f *.exe *.s > +$GCC $GCCOPTS -O2 -c affinity.c > +$GCC $GCCOPTS -O2 -c outs.c > +$GCC $GCCOPTS -O2 -c utils.c > +$GCC $GCCOPTS -O2 -c litmus_rand.c > +$GCC $GCCOPTS $LINKOPTS -o SAL.exe affinity.o outs.o utils.o litmus_rand.o > SAL.c > +$GCC $GCCOPTS -S SAL.c && awk -f show.awk SAL.s > SAL.t && /bin/rm SAL.s > diff --git a/tests/tcg/mttcg/x86/litmus_rand.c > b/tests/tcg/mttcg/x86/litmus_rand.c > new file mode 100644 > index 0000000..de33032 > --- /dev/null > +++ b/tests/tcg/mttcg/x86/litmus_rand.c > @@ -0,0 +1,64 @@ > +/****************************************************************************/ > +/* the diy toolsuite > */ > +/* > */ > +/* Jade Alglave, University College London, UK. > */ > +/* Luc Maranget, INRIA Paris-Rocquencourt, France. > */ > +/* > */ > +/* Copyright 2015-present Institut National de Recherche en Informatique et > */ > +/* en Automatique and the authors. All rights reserved. > */ > +/* > */ > +/* This software is governed by the CeCILL-B license under French law and > */ > +/* abiding by the rules of distribution of free software. You can use, > */ > +/* modify and/ or redistribute the software under the terms of the CeCILL-B > */ > +/* license as circulated by CEA, CNRS and INRIA at the following URL > */ > +/* "http://www.cecill.info". We also give a copy in LICENSE.txt. > */ > +/****************************************************************************/ > +#include <stdint.h> > +#include "litmus_rand.h" > + > +/* > + Simple generator > + http://en.wikipedia.org/wiki/Linear_congruential_generator > +*/ > + > + > +/* > + > + From ocaml sources: (globroot.c) > + Linear congruence with modulus = 2^32, multiplier = 69069 > + (Knuth vol 2 p. 106, line 15 of table 1), additive = 25173. > + > + > + Knuth (vol 2 p. 13) shows that the least significant bits are > + "less random" than the most significant bits with a modulus of 2^m. > + We just swap half words, enough? */ > + > +static const uint32_t a = 69069; > +static const uint32_t c = 25173 ; > + > +inline static uint32_t unlocked_rand(st_t *st) { > + uint32_t r = a * *st + c ; > + *st = r ; > + /* Swap high & low bits */ > + uint32_t low = r & 0xffff ; > + uint32_t high = r >> 16 ; > + r = high | (low << 16) ; > + return r ; > +} > + > +int rand_bit(st_t *st) { > + uint32_t r = unlocked_rand(st) ; > + r &= 1 ; > + return r ; > +} > + > +static const uint32_t r_max = UINT32_MAX ; > + > +uint32_t rand_k (uint32_t *st,uint32_t k) { > + uint32_t r, v ; > + do { > + r = unlocked_rand(st) ; > + v = r % k ; > + } while (r-v > r_max-k+1) ; > + return v ; > +} > diff --git a/tests/tcg/mttcg/x86/litmus_rand.h > b/tests/tcg/mttcg/x86/litmus_rand.h > new file mode 100644 > index 0000000..c358ccb > --- /dev/null > +++ b/tests/tcg/mttcg/x86/litmus_rand.h > @@ -0,0 +1,29 @@ > +/****************************************************************************/ > +/* the diy toolsuite > */ > +/* > */ > +/* Jade Alglave, University College London, UK. > */ > +/* Luc Maranget, INRIA Paris-Rocquencourt, France. > */ > +/* > */ > +/* Copyright 2015-present Institut National de Recherche en Informatique et > */ > +/* en Automatique and the authors. All rights reserved. > */ > +/* > */ > +/* This software is governed by the CeCILL-B license under French law and > */ > +/* abiding by the rules of distribution of free software. You can use, > */ > +/* modify and/ or redistribute the software under the terms of the CeCILL-B > */ > +/* license as circulated by CEA, CNRS and INRIA at the following URL > */ > +/* "http://www.cecill.info". We also give a copy in LICENSE.txt. > */ > +/****************************************************************************/ > +#ifndef _LITMUS_RAND_H > +#define _LITMUS_RAND_H 1 > + > +#include <stdint.h> > + > +/* type of state for pseudorandom generators */ > +typedef uint32_t st_t ; > + > +/* Unlocked random bit */ > + > +int rand_bit(st_t *st) ; > +uint32_t rand_k(st_t *st,uint32_t n) ; > + > +#endif > diff --git a/tests/tcg/mttcg/x86/outs.c b/tests/tcg/mttcg/x86/outs.c > new file mode 100644 > index 0000000..178f1d2 > --- /dev/null > +++ b/tests/tcg/mttcg/x86/outs.c > @@ -0,0 +1,148 @@ > +/****************************************************************************/ > +/* the diy toolsuite > */ > +/* > */ > +/* Jade Alglave, University College London, UK. > */ > +/* Luc Maranget, INRIA Paris-Rocquencourt, France. > */ > +/* > */ > +/* Copyright 2015-present Institut National de Recherche en Informatique et > */ > +/* en Automatique and the authors. All rights reserved. > */ > +/* > */ > +/* This software is governed by the CeCILL-B license under French law and > */ > +/* abiding by the rules of distribution of free software. You can use, > */ > +/* modify and/ or redistribute the software under the terms of the CeCILL-B > */ > +/* license as circulated by CEA, CNRS and INRIA at the following URL > */ > +/* "http://www.cecill.info". We also give a copy in LICENSE.txt. > */ > +/****************************************************************************/ > +#include <stdlib.h> > +#include <stdio.h> > +#include "outs.h" > + > +/**********************/ > +/* Lexicographic tree */ > +/**********************/ > + > +#if 0 > +static void debug(int *t, int i, int j) { > + for (int k=i ; k <= j ; k++) > + fprintf(stderr,"%i",t[k]) ; > + fprintf(stderr,"\n") ; > +} > +#endif > + > + > +void *malloc_check(size_t sz) ; > + > +static outs_t *alloc_outs(intmax_t k) { > + outs_t *r = malloc_check(sizeof(*r)) ; > + r->k = k ; > + r->c = 0 ; > + r->show = 0 ; > + r->next = r->down = NULL ; > + return r ; > +} > + > +void free_outs(outs_t *p) { > + if (p == NULL) return ; > + free_outs(p->next) ; > + free_outs(p->down) ; > + free(p) ; > +} > + > +/* Worth writing as a loop, since called many times */ > +static outs_t *loop_add_outcome_outs(outs_t *p, intmax_t *k, int i, count_t > c, int show) { > + outs_t *r = p ; > + if (p == NULL || k[i] < p->k) { > + r = alloc_outs(k[i]) ; > + r->next = p ; > + p = r ; > + } > + for ( ; ; ) { > + outs_t **q ; > + if (k[i] > p->k) { > + q = &(p->next) ; > + p = p->next ; > + } else if (i <= 0) { > + p->c += c ; > + p->show = show || p->show ; > + return r ; > + } else { > + i-- ; > + q = &(p->down) ; > + p = p->down ; > + } > + if (p == NULL || k[i] < p->k) { > + outs_t *a = alloc_outs(k[i]) ; > + a->next = p ; > + p = a ; > + *q = a ; > + } > + } > +} > + > +outs_t *add_outcome_outs(outs_t *p, intmax_t *k, int sz, count_t c, int > show) { > + return loop_add_outcome_outs(p,k,sz-1,c,show) ; > +} > + > +count_t sum_outs(outs_t *p) { > + count_t r = 0 ; > + for ( ; p ; p = p->next) { > + r += p->c ; > + r += sum_outs(p->down) ; > + } > + return r ; > +} > + > +int finals_outs(outs_t *p) { > + int r = 0 ; > + for ( ; p ; p = p->next) { > + if (p->c > 0) r++ ; > + r += finals_outs(p->down) ; > + } > + return r ; > +} > + > +void dump_outs (FILE *chan, dump_outcome *dout,outs_t *p, intmax_t *buff,int > sz) { > + for ( ; p ; p = p->next) { > + buff[sz-1] = p->k ; > + if (p->c > 0) { > + dout(chan,buff,p->c,p->show) ; > + } else if (p->down) { > + dump_outs(chan,dout,p->down,buff,sz-1) ; > + } > + } > +} > + > +/* merge p and q into p */ > +static outs_t *do_merge_outs(outs_t *p, outs_t *q) { > + if (q == NULL) { // Nothing to add > + return p ; > + } > + if (p == NULL || q->k < p->k) { // Need a cell > + outs_t *r = alloc_outs(q->k) ; > + r->next = p ; > + p = r ; > + } > + if (p->k == q->k) { > + p->c += q->c ; > + p->show = p->show || q->show ; > + p->down = do_merge_outs(p->down,q->down) ; > + p->next = do_merge_outs(p->next,q->next) ; > + } else { > + p->next = do_merge_outs(p->next,q) ; > + } > + return p ; > +} > + > +outs_t *merge_outs(outs_t *p, outs_t *q, int sz) { > + return do_merge_outs(p,q) ; > +} > + > +int same_outs(outs_t *p,outs_t *q) { > + while (p && q) { > + if (p->k != q->k || p->c != q->c || p->show != q->show) return 0 ; > + if (!same_outs(p->down,q->down)) return 0 ; > + p = p->next ; > + q = q->next ; > + } > + return p == q ; /* == NULL */ > +} > diff --git a/tests/tcg/mttcg/x86/outs.h b/tests/tcg/mttcg/x86/outs.h > new file mode 100644 > index 0000000..761590f > --- /dev/null > +++ b/tests/tcg/mttcg/x86/outs.h > @@ -0,0 +1,49 @@ > +/****************************************************************************/ > +/* the diy toolsuite > */ > +/* > */ > +/* Jade Alglave, University College London, UK. > */ > +/* Luc Maranget, INRIA Paris-Rocquencourt, France. > */ > +/* > */ > +/* Copyright 2015-present Institut National de Recherche en Informatique et > */ > +/* en Automatique and the authors. All rights reserved. > */ > +/* > */ > +/* This software is governed by the CeCILL-B license under French law and > */ > +/* abiding by the rules of distribution of free software. You can use, > */ > +/* modify and/ or redistribute the software under the terms of the CeCILL-B > */ > +/* license as circulated by CEA, CNRS and INRIA at the following URL > */ > +/* "http://www.cecill.info". We also give a copy in LICENSE.txt. > */ > +/****************************************************************************/ > +#ifndef _OUTS_H > +#define _OUTS_H 1 > + > +#include <stdio.h> > + > +/************************/ > +/* Histogram structure */ > +/************************/ > + > + > +/* 64bit counters, should be enough! */ > +#include <inttypes.h> > +typedef uint64_t count_t; > +#define PCTR PRIu64 > + > + > + > + > +typedef struct outs_t { > + struct outs_t *next,*down ; > + count_t c ; > + intmax_t k ; > + int show ; > +} outs_t ; > + > +void free_outs(outs_t *p) ; > +outs_t *add_outcome_outs(outs_t *p, intmax_t *o, int sz, count_t v, int > show) ; > +int finals_outs(outs_t *p) ; > +count_t sum_outs(outs_t *p) ; > +typedef void dump_outcome(FILE *chan, intmax_t *o, count_t c, int show) ; > +void dump_outs (FILE *chan, dump_outcome *dout,outs_t *p, intmax_t *buff, > int sz) ; > +outs_t *merge_outs(outs_t *p,outs_t *q, int sz) ; > +int same_outs(outs_t *p,outs_t *q) ; > +#endif > diff --git a/tests/tcg/mttcg/x86/run.sh b/tests/tcg/mttcg/x86/run.sh > new file mode 100755 > index 0000000..e3538b2 > --- /dev/null > +++ b/tests/tcg/mttcg/x86/run.sh > @@ -0,0 +1,56 @@ > +date > +LITMUSOPTS="${@:-$LITMUSOPTS}" > +QEMU=../../../../build/x86_64-linux-user/qemu-x86_64 > +SLEEP=0 > +if [ ! -f SAL.no ]; then > +cat <<'EOF' > +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% > +% Results for x86.tests/SAL.litmus % > +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% > +X86 SAL > +"Fre PodWR Fre PodWR" > + > +{x=0; y=0;} > + > + P0 | P1 ; > + MOV [x],$1 | MOV [y],$1 ; > + MFENCE | MFENCE ; > + MOV EAX,[y] | MOV EAX,[x] ; > + > +~exists (0:EAX=0 /\ 1:EAX=0) > +Generated assembler > +EOF > +cat SAL.t > +$QEMU ./SAL.exe -q $LITMUSOPTS > +ret=$?; > +if [ $ret -eq 1 ]; then > + echo "FAILED"; > + exit $ret; > +fi > +fi > +sleep $SLEEP > + > +cat <<'EOF' > +Revision exported, version 7.22 > +Command line: ../litmus-7.22/litmus -exit true -mach > ../alex_litmus/overdrive01 -o run.x86 x86.tests/SAL.litmus > +Parameters > +#define SIZE_OF_TEST 100000 > +#define NUMBER_OF_RUN 10 > +#define AVAIL 0 > +#define STRIDE 1 > +#define MAX_LOOP 0 > +/* gcc options: -D_GNU_SOURCE -DFORCE_AFFINITY -Wall -std=gnu99 > -fomit-frame-pointer -O2 -pthread */ > +/* barrier: user */ > +/* launch: changing */ > +/* affinity: incr0 */ > +/* alloc: dynamic */ > +/* memory: direct */ > +/* stride: 1 */ > +/* safer: write */ > +/* preload: random */ > +/* speedcheck: no */ > +/* proc used: 0 */ > +EOF > +head -1 comp.sh > +echo "LITMUSOPTS=$LITMUSOPTS" > +date > diff --git a/tests/tcg/mttcg/x86/show.awk b/tests/tcg/mttcg/x86/show.awk > new file mode 100644 > index 0000000..c8ecf20 > --- /dev/null > +++ b/tests/tcg/mttcg/x86/show.awk > @@ -0,0 +1,2 @@ > +/START _litmus_P/ { print $0 } > +/_litmus_P[0-9]+_[0-9]+/ { getline; print $0 ; } > diff --git a/tests/tcg/mttcg/x86/utils.c b/tests/tcg/mttcg/x86/utils.c > new file mode 100644 > index 0000000..cc989b0 > --- /dev/null > +++ b/tests/tcg/mttcg/x86/utils.c > @@ -0,0 +1,1148 @@ > +/****************************************************************************/ > +/* the diy toolsuite > */ > +/* > */ > +/* Jade Alglave, University College London, UK. > */ > +/* Luc Maranget, INRIA Paris-Rocquencourt, France. > */ > +/* > */ > +/* Copyright 2015-present Institut National de Recherche en Informatique et > */ > +/* en Automatique and the authors. All rights reserved. > */ > +/* > */ > +/* This software is governed by the CeCILL-B license under French law and > */ > +/* abiding by the rules of distribution of free software. You can use, > */ > +/* modify and/ or redistribute the software under the terms of the CeCILL-B > */ > +/* license as circulated by CEA, CNRS and INRIA at the following URL > */ > +/* "http://www.cecill.info". We also give a copy in LICENSE.txt. > */ > +/****************************************************************************/ > +#include <stdlib.h> > +#include <stdio.h> > +#include <string.h> > +#include <pthread.h> > +#include <limits.h> > +#include <errno.h> > +#include <stdint.h> > +#include <stdarg.h> > +#include "utils.h" > + > +/********/ > +/* Misc */ > +/********/ > + > +FILE *errlog ; > + > +static void checkerrlog(void) { > + if (!errlog) errlog = stderr ; > +} > + > +void seterrlog(FILE *chan) { > + errlog = chan ; > +} > + > +int log_error(const char *fmt, ...) { > + int result; > + va_list args; > + va_start(args, fmt); > + checkerrlog() ; > + result = vfprintf(errlog, fmt, args); > + fflush(errlog); > + va_end(args); > + return result; > +} > + > +void fatal(char *msg) { > + log_error("Failure: %s\n", msg) ; > + fclose(errlog); > + fprintf(stdout,"Failure: %s\n", msg) ; > + exit(1) ; > +} > + > +void errexit(char *msg,int err) { > + log_error("%s: %s\n",msg,strerror(err)) ; > + fclose(errlog); > + exit(2) ; > +} > + > +void *malloc_check(size_t sz) { > + if (sz == 0) return NULL ; > + void *p = malloc(sz) ; > + if (!p) { > + if (!errno) errno = ENOMEM ; > + errexit("malloc",errno) ; > + } > + return p ; > +} > + > +int max(int n, int m) { return n < m ? m : n ; } > + > +void pp_ints(FILE *fp,int *p,int n) { > + if (n > 0) { > + fprintf(fp,"%i",p[0]) ; > + for (int k = 1 ; k < n ; k++) { > + fprintf(fp,",%i",p[k]) ; > + } > + } > +} > + > + > +void *do_align(void *p,size_t sz) { > + uintptr_t x = (uintptr_t)p ; > + x += sz-1 ; > + x /= sz ; > + x *= sz ; > + return (void *)x ; > +} > + > +void *do_noalign(void *p,size_t sz) { > + void *q = do_align(p,sz) ; > + void *r = q - sz/2 ; > + if (r < p) r = q + sz/2 ; > + return r ; > +} > + > +void cat_file(char *path, char *msg, FILE *out) { > + FILE *fp = fopen(path,"r") ; > + if (fp == NULL) return ; > + fprintf(out,"%s\n",msg) ; > + int c,nl=1 ; > + while ((c = fgetc(fp)) != EOF) { > + fputc(c,out) ; > + nl = c == '\n' ; > + } > + fclose(fp) ; > + if (!nl) fputc('\n',out) ; > +} > + > +/************/ > +/* CPU sets */ > +/************/ > + > +cpus_t *cpus_create(int sz) { > + cpus_t *r = malloc_check(sizeof(*r)) ; > + r->sz = sz ; > + r->cpu = malloc_check(sizeof(r->cpu[0])*sz) ; > + return r ; > +} > + > +cpus_t *cpus_create_init(int sz, int t[]) { > + cpus_t *r = cpus_create(sz) ; > + for (int k = 0 ; k < sz ; k++) r->cpu[k] = t[k] ; > + return r ; > +} > + > +void cpus_free(cpus_t *p) { > + free(p->cpu) ; > + free(p) ; > +} > + > +void cpus_dump(FILE *fp, cpus_t *p) { > + pp_ints(fp,p->cpu,p->sz) ; > +} > + > +void cpus_dump_test(FILE *fp, int *p, int sz, cpus_t *cm,int nprocs) { > + for (int k = 0 ; k < sz ; k += nprocs) { > + fprintf(fp,"[") ; > + pp_ints(fp,&p[k],nprocs) ; > + fprintf(fp,"] {") ; > + if (nprocs > 0) { > + fprintf(fp,"%i",cm->cpu[p[k]]) ; > + for (int i = 1 ; i < nprocs ; i++) { > + fprintf(fp,",%i",cm->cpu[p[k+i]]) ; > + } > + } > + fprintf(fp,"}\n") ; > + } > +} > + > +/*************/ > +/* Int array */ > +/*************/ > + > + > +void ints_dump(FILE *fp, ints_t *p) { > + if (p->sz > 0) { > + fprintf(fp,"%i:%i",0,p->t[0]) ; > + for (int k = 1 ; k < p->sz ; k++) { > + fprintf(fp,",%i:%i",k,p->t[k]) ; > + } > + } > +} > + > +/***********************/ > +/* Prefetch directives */ > +/***********************/ > +void prefetch_dump(FILE *fp, prfdirs_t *p) { > + prfproc_t *q = p->t ; > + int some = 0 ; > + for (int _p = 0 ; _p < p->nthreads ; _p++) { > + int nvars = q[_p].nvars ; > + prfone_t *r = q[_p].t ; > + for (int _v = 0 ; _v < nvars ; _v++) { > + prfdir_t dir = r[_v].dir ; > + if (dir != none) { > + char c = 'I' ; > + if (dir == flush) c = 'F' ; > + else if (dir == touch) c = 'T' ; > + else if (dir == touch_store) c = 'W' ; > + if (some) { > + fprintf(fp,",") ; > + } else { > + some = 1 ; > + } > + fprintf(fp,"%i:%s=%c",_p,r[_v].name,c) ; > + } > + } > + } > +} > + > +static void set_prefetch(prfdirs_t *p, prfdir_t d) { > + prfproc_t *q = p->t ; > + for (int _p = 0 ; _p < p->nthreads ; _p++) { > + int nvars = q[_p].nvars ; > + prfone_t *r = q[_p].t ; > + for (int _v = 0 ; _v < nvars ; _v++) { > + r[_v].dir = d ; > + } > + } > +} > + > +/* ??? */ > + > +int gcd(int a, int b) { > + for ( ; ; ) { > + if (a == 0) return b ; > + int tmp = a ; > + a = b % a ; > + b = tmp ; > + } > +} > + > +/* SMT description */ > + > + > +cpus_t *coremap_seq(int navail, int nways) { > + cpus_t *r = cpus_create(navail) ; > + int ncores = navail / nways ; > + int i = 0 ; > + for (int c = 0 ; c < ncores ; c++) { > + for (int k = 0 ; k < nways ; k++) { > + r->cpu[i++] = c ; > + } > + } > + return r ; > +} > + > +cpus_t *coremap_end(int navail, int nways) { > + cpus_t *r = cpus_create(navail) ; > + int ncores = navail / nways ; > + int i = 0 ; > + for (int k = 0 ; k < nways ; k++) { > + for (int c = 0 ; c < ncores ; c++) { > + r->cpu[i++] = c ; > + } > + } > + return r ; > +} > + > +typedef struct { > + int ncores ; > + cpus_t **core ; > +} mapcore_t ; > + > + > +static void mapcore_free(mapcore_t *p) { > + for (int c = 0 ; c < p->ncores ; c++) cpus_free(p->core[c]) ; > + free(p->core) ; > + free(p) ; > +} > + > +#if 0 > +static mapcore_t *inverse_coremap(cpus_t *p, int nways) { > + mapcore_t *r = malloc_check(sizeof(*r)) ; > + r->ncores = p->sz / nways ; > + r->core = malloc_check(r->ncores * sizeof(r->core[0])) ; > + for (int k = 0 ; k < r->ncores ; k++) { > + r->core[k] = cpus_create(nways) ; > + r->core[k]->sz = 0 ; > + } > + for (int k = 0 ; k < p->sz ; k++) { > + int c = p->cpu[k] ; > + cpus_t *q = r->core[c] ; > + q->cpu[q->sz++] = k ; > + } > + return r ; > +} > +#endif > + > +static int get_ncores(cpus_t *cm) { > + int r = 0; > + for (int k = 0 ; k < cm->sz ; k++) { > + if (cm->cpu[k] > r) r = cm->cpu[k] ; > + } > + return r+1 ; > +} > + > +cpus_t *get_core_procs(cpus_t *cm, cpus_t *p,int c) { > + int sz = 0 ; > + cpus_t *r ; > + for (int k = 0 ; k < p->sz ; k++) { > + if (cm->cpu[p->cpu[k]] == c) sz++ ; > + } > + r = cpus_create(sz) ; > + int i = 0 ; > + for (int k = 0 ; k < p->sz ; k++) { > + int proc = p->cpu[k] ; > + if (cm->cpu[proc] == c) r->cpu[i++] = proc ; > + } > + return r ; > +} > + > +static mapcore_t *inverse_procs(cpus_t *cm, cpus_t *p) { > + int ncores = get_ncores(cm) ; > + mapcore_t *r = malloc_check(sizeof(*r)) ; > + r->ncores = ncores ; > + r->core = malloc_check(sizeof(r->core[0])*ncores) ; > + for (int c = 0 ; c < ncores ; c++) { > + r->core[c] = get_core_procs(cm,p,c) ; > + } > + return r ; > +} > + > +static int get_node_sz(int *p) { > + int r = 0 ; > + while (*p++ >= 0) r++ ; > + return r ; > +} > + > +static int get_n(int **p) { > + int r = 0 ; > + while (*p) { > + r += get_node_sz(*p) ; > + p++ ; > + } > + return r ; > +} > + > +static int ok_one_color(int *cm,int *d,int *a,int n, int p, int c) { > + for (int k = 0 ; k < n ; k++) { > + int op = a[k] ; > + if (op >= 0) { > + if (d[n*p+k]) { > + int oc = cm[op] ; > + if (oc == c) { > + return 0 ; > + } > + } > + } > + } > + return 1 ; > +} > + > +static int ok_color(int *cm,int *d,int *a,int n, int *q, int c) { > + for ( ; *q >= 0 ; q++) { > + if (!ok_one_color(cm,d,a,n,*q,c)) return 0 ; > + } > + return 1 ; > +} > + > +static int find_color_diff > +(int prev,st_t *st,int *cm,mapcore_t *mc,int *d, int *a,int n, int *q) { > + int sz = get_node_sz(q) ; > + int k0 = prev >= 0 && rand_bit(st) ? prev : rand_k(st,mc->ncores) ; > + int k = k0 ; > + do { > + cpus_t *p = mc->core[k] ; > + if (p->sz >= sz && ok_color(cm,d,a,n,q,k)) return k ; > + k++ ; k %= mc->ncores ; > + } while (k != k0) ; > + return -1 ; > +} > + > + > +static int find_one_proc > +(int prev,st_t *st,int *cm,mapcore_t *mc,int *d,int *a,int n,int p) { > + int found = -1 ; > + int k0 = prev >= 0 && rand_bit(st) ? prev : rand_k(st,mc->ncores) ; > + int k = k0 ; > + do { > + cpus_t *pk = mc->core[k] ; > + if (pk->sz > 0) { > + if (found < 0) found = k ; > + if (ok_one_color(cm,d,a,n,p,k)) return k ; > + } > + k++ ; k %= mc->ncores ; > + } while (k != k0) ; > + if (found < 0) fatal("Cannot allocate threads") ; > + return found ; > +} > + > +void custom_affinity (st_t *st,cpus_t *cm,int **color,int *diff,cpus_t > *aff_cpus,int n_exe, int *r) { > + mapcore_t *mc = inverse_procs(cm,aff_cpus) ; > + int n = get_n(color) ; > + /* Diff relation as matrix */ > + int d[n*n] ; > + { > + int *q = diff ; > + for (int k = 0 ; k < n*n ; k++) d[k] = 0 ; > + while (*q >= 0) { > + int x = *q++, y = *q++ ; > + d[n*x+y] = d[n*y+x] = 1 ; > + } > + } > + for (int k = 0 ; k < n_exe ; k++) { > + int *a = &r[k*n] ; > + int prev_core = -1 ; > + for (int i = 0 ; i < n ; i++) a[i] = -1 ; > + for (int **q = color ; *q ; q++) { > + int c = find_color_diff(prev_core,st,aff_cpus->cpu,mc,d,a,n,*q) ; > + if (c >= 0) { > + cpus_t *p = mc->core[c] ; > + for (int *qq = *q ; *qq >= 0 ; qq++) { > + p->sz-- ; > + a[*qq] = p->cpu[p->sz] ; > + } > + prev_core = c ; > + } else { > + for (int *qq = *q ; *qq >= 0 ; qq++) { > + int c = find_one_proc(prev_core,st,aff_cpus->cpu,mc,d,a,n,*qq) ; > + cpus_t *p = mc->core[c] ; > + p->sz-- ; > + a[*qq] = p->cpu[p->sz] ; > + prev_core = c ; > + } > + } > + } > + } > + mapcore_free(mc) ; > +} > + > +/****************/ > +/* Command line */ > +/****************/ > + > +/* usage */ > + > +static void usage(char *prog, cmd_t *d) { > + log_error("usage: %s (options)*\n",prog) ; > + log_error(" -v be verbose\n") ; > + log_error(" -q be quiet\n") ; > + log_error(" -a <n> run maximal number of tests for n available > processors (default %i)\n",d->avail) ; > + log_error(" -n <n> run n tests concurrently\n") ; > + log_error(" -r <n> perform n runs (default %i)\n",d->max_run) ; > + log_error(" -fr <f> multiply run number per f\n") ; > + log_error(" -s <n> outcomes per run (default %i)\n",d->size_of_test) ; > + if (d->stride > 0) { > + log_error(" -st <n> stride (default %i)\n",d->stride) ; > + } > + log_error(" -fs <f> multiply outcomes per f\n") ; > + log_error(" -f <f> multiply outcomes per f, divide run number by f\n") ; > + if (d->aff_mode != aff_none) { > + log_error(" -i <n> increment for allocating logical processors, -i 0 > disables affinity mode") ; > + if (d->aff_mode == aff_incr) { > + log_error(" (default %i)\n",d->aff_incr) ; > + } else { > + log_error("\n") ; > + } > + log_error(" -p <ns> specify logical processors (default '") ; > + cpus_dump(errlog,d->aff_cpus) ; > + log_error("')\n") ; > + log_error(" +ra randomise affinity%s\n",d->aff_mode == aff_random ? > " (default)" : "") ; > + if (d->aff_custom_enabled) { > + log_error(" +ca enable custom affinity%s\n",d->aff_mode == > aff_custom ? " (default)" : "") ; > + } else { > + log_error(" +ca alias for +ra\n") ; > + } > + if (d->aff_scan_enabled) { > + log_error(" +sa enable scanning affinity%s\n",d->aff_mode == > aff_scan ? " (default)" : "") ; > + log_error(" +ta <topo> set topology affinity\n") ; > + } else { > + log_error(" +sa alias for +ra\n") ; > + } > + } > + if (d->shuffle >= 0) { > + log_error(" +rm randomise memory accesses%s\n",d->shuffle ? " > (default)" : "") ; > + log_error(" -rm do not randomise memory accesses%s\n",!d->shuffle ? > " (default)" : "") ; > + } > + if (d->speedcheck >= 0) { > + log_error(" +sc stop as soon as possible%s\n",d->speedcheck ? " > (default)" : "") ; > + log_error(" -sc run test completly%s\n",!d->speedcheck ? " > (default)" : "") ; > + } > + if (!d->fix) { > + log_error(" +fix fix thread launch order\n") ; > + } > + if (d->delta_tb) { > + log_error(" -tb <list> set timebase delays, default '") ; > + ints_dump(errlog,d->delta_tb) ; > + log_error("'\n") ; > + log_error(" List syntax is comma separated proc:delay\n") ; > + log_error(" -ta <n> set all timebase delays\n") ; > + } > + if (d->verbose_barrier >= 0) { > + log_error(" +vb show iteration timings%s\n",d->verbose_barrier ? " > (default)" : "") ; > + log_error(" -vb do not show iteration > timings%s\n",!d->verbose_barrier ? " (default)" : "") ; > + } > + if (d->prefetch) { > + log_error(" -pra (I|F|T|W) set all prefetch\n") ; > + log_error(" -prf <list> set prefetch, default '") ; > + prefetch_dump(errlog,d->prefetch) ; > + log_error("'\n") ; > + log_error(" List syntax is comma separated proc:name=(I|F|T|W)\n") ; > + } > + if (d->static_prefetch >= 0) { > + log_error(" -prs <n> prefetch probability is 1/n, -prs 0 disables > feature, default %i\n",d->static_prefetch) ; > + } > + if (d->max_loop > 0) { > + log_error(" -l <n> measure time by running assembly in a loop of size > <n> (default %i)\n",d->max_loop) ; > + } > + if (d->prelude > 0) { > + log_error(" -vp no verbose prelude\n") ; > + } > + if (d->sync_n > 0) { > + log_error(" -k <n> undocumented (default %i)\n",d->sync_n) ; > + } > + exit(2) ; > +} > + > +static long my_add (long x, long y) { > + long r = x+y ; > + if (r < x || r < y) { errno = ERANGE ; fatal("overflow") ; } > + return r ; > +} > + > +static long my_pow10(int p,long x) { > + long r = x ; > + for ( ; p > 0 ; p--) { > + long y2 = my_add(r,r) ; > + long y4 = my_add(y2,y2) ; > + long y8 = my_add(y4,y4) ; > + r = my_add(y8,y2) ; > + } > + if (r >= INT_MAX || r <= 0) { errno = ERANGE ; fatal("overflow") ; } > + return r ; > +} > + > +static int do_argint(char *p, char **q) { > + long r = strtol(p,q,10) ; > + if (errno == ERANGE) { fatal("overflow") ; } > + if (**q == 'k' || **q == 'K') { r = my_pow10(3,r) ; *q += 1; } > + else if (**q == 'm' || **q == 'M') { r = my_pow10(6,r) ; *q +=1 ; } > + return (int)r ; > +} > + > +static int argint(char *prog,char *p,cmd_t *d) { > + char *q ; > + long r = do_argint(p,&q) ; > + if (*p == '\0' || *q != '\0') { > + usage(prog,d) ; > + } > + return (int)r ; > +} > + > +static cpus_t *argcpus(char *prog,char *p0,cmd_t *d) { > + int sz = 0 ; > + char *p ; > + > + p = p0 ; > + for ( ; ; ) { > + char *q ; > + int x = (int)strtol(p,&q,10) ; > + if (x < 0 || *p == '\0' || (*q != '\0' && *q != ',')) usage(prog,d) ; > + sz++ ; > + if (*q == '\0') break ; > + p = q+1 ; > + } > + cpus_t *r = cpus_create(sz) ; > + p = p0 ; > + for (int k = 0 ; k < sz ; k++) { > + char *q ; > + r->cpu[k] = (int)strtol(p,&q,10) ; > + p = q+1 ; > + } > + return r ; > +} > + > +static void argints(char *prog,cmd_t *d, char *p,ints_t *r) { > + while (*p) { > + char *q ; > + int idx = (int)strtol(p,&q,10) ; > + if (idx < 0 || idx >= r->sz || *p == '\0' || *q != ':') usage(prog,d) ; > + p = q+1 ; > + int v = do_argint(p,&q) ; > + if (*p == '\0' || (*q != '\0' && *q != ',')) usage(prog,d) ; > + r->t[idx] = v ; > + if (*q == '\0') { > + p = q ; > + } else { > + p = q+1 ; > + } > + } > +} > + > +static prfone_t *get_name_slot(prfproc_t *p,char *name) { > + int nvars = p->nvars ; > + prfone_t *q = p->t ; > + for (int _v = 0 ; _v < nvars ; _v++) { > + if (strcmp(name,q[_v].name) == 0) return &q[_v] ; > + } > + return NULL ; /* Name not found */ > +} > + > + > +static void argoneprefetch(char *prog,cmd_t *d, char *p, prfdirs_t *r) { > + prfdir_t dir = none ; > + switch (*p) { > + case 'F': > + dir = flush ; > + break ; > + case 'T': > + dir = touch ; > + break ; > + case 'W': > + dir = touch_store ; > + break ; > + } > + set_prefetch(r,dir) ; > +} > + > +int parse_prefetch(char *p, prfdirs_t *r) { > + if (!*p) return 1 ; > + for ( ;; ) { > + char *q ; > + int proc = (int)strtol(p,&q,10) ; > + if (proc < 0 || proc >= r->nthreads || *p == '\0' || *q != ':') > + return 0 ; > + p = q+1 ; > + char *p0 = p ; > + while (*p != '=') { > + if (*p == '\0') return 0 ; > + p++ ; > + } > + *p = '\0' ; > + prfone_t *loc_slot = get_name_slot(&r->t[proc],p0) ; > + if (loc_slot == NULL) { > + log_error("Proc %i does not access variable %s\n",proc,p0) ; > + *p = '=' ; > + return 0 ; > + } > + *p = '=' ; > + char c = *++p; > + prfdir_t dir = none ; > + switch (c) { > + case 'F': > + dir = flush ; > + break ; > + case 'T': > + dir = touch ; > + break ; > + case 'W': > + dir = touch_store ; > + break ; > + } > + loc_slot->dir = dir ; > + c = *++p ; > + if (c == '\0') return 1 ; > + else if (c == ',') p++ ; > + else return 0 ; > + } > +} > + > +static void argprefetch(char *prog,cmd_t *d, char *p, prfdirs_t *r) { > + if (!parse_prefetch(p,r)) usage(prog,d) ; > +} > + > +static double argdouble(char *prog,char *p,cmd_t *d) { > + char *q ; > + double r = strtod(p,&q) ; > + if (*p == '\0' || *q != '\0') { > + usage(prog,d) ; > + } > + return r ; > +} > + > +void parse_cmd(int argc, char **argv, cmd_t *d, cmd_t *p) { > + char *prog = argv[0] ; > + > + /* Options */ > + for ( ; ; ) { > + --argc ; ++argv ; > + if (!*argv) break ; > + char fst = **argv ; > + if (fst != '-' && fst != '+') break ; > + if (strcmp(*argv,"-q") == 0) p->verbose=0 ; > + else if (strcmp(*argv,"-v") == 0) p->verbose++ ; > + else if (strcmp(*argv,"-r") == 0) { > + --argc ; ++argv ; > + if (!*argv) usage(prog,d) ; > + p->max_run = argint(prog,argv[0],d) ; > + } else if (strcmp(*argv,"-fr") == 0) { > + --argc ; ++argv ; > + if (!*argv) usage(prog,d) ; > + p->max_run *= argdouble(prog,argv[0],d) ; > + } else if (strcmp(*argv,"-s") == 0) { > + --argc ; ++argv ; > + if (!*argv) usage(prog,d) ; > + p->size_of_test = argint(prog,argv[0],d) ; > + } else if (d->stride > 0 && strcmp(*argv,"-st") == 0) { > + --argc ; ++argv ; > + if (!*argv) usage(prog,d) ; > + p->stride = argint(prog,argv[0],d) ; > + if (p->stride <= 0) p->stride = 1 ; > + } else if (strcmp(*argv,"-fs") == 0) { > + --argc ; ++argv ; > + if (!*argv) usage(prog,d) ; > + p->size_of_test *= argdouble(prog,argv[0],d) ; > + } else if (strcmp(*argv,"-f") == 0) { > + --argc ; ++argv ; > + if (!*argv) usage(prog,d) ; > + double f = argdouble(prog,argv[0],d) ; > + p->size_of_test *= f ; > + p->max_run /= f ; > + } else if (strcmp(*argv,"-n") == 0) { > + --argc ; ++argv ; > + if (!*argv) usage(prog,d) ; > + p->n_exe = argint(prog,argv[0],d) ; > + if (p->n_exe < 1) p->n_exe = 1 ; > + } else if (strcmp(*argv,"-a") == 0) { > + --argc ; ++argv ; > + if (!*argv) usage(prog,d) ; > + int a = argint(prog,argv[0],d) ; > + p->avail = a ; > + } else if (d->sync_n > 0 && strcmp(*argv,"-k") == 0) { > + --argc ; ++argv ; > + if (!*argv) usage(prog,d) ; > + int a = argint(prog,argv[0],d) ; > + p->sync_n = a < 0 ? 0 : a ; > + } else if (d->aff_mode != aff_none && strcmp(*argv,"-i") == 0) { > + --argc ; ++argv ; > + if (!*argv) usage(prog,d) ; > + int i = argint(prog,argv[0],d) ; > + p->aff_mode = aff_incr ; > + p->aff_incr = i < 0 ? 0 : i ; > + } else if (d->aff_mode != aff_none && strcmp(*argv,"-p") == 0) { > + --argc ; ++argv ; > + if (!*argv) usage(prog,d) ; > + cpus_t *cpus = argcpus(prog,argv[0],d) ; > + p->aff_cpus = cpus ; > + } else if (d->aff_mode != aff_none && strcmp(*argv,"+ra") == 0) { > + p->aff_mode = aff_random ; > + } else if (d->aff_custom_enabled && strcmp(*argv,"+ca") == 0) { > + p->aff_mode = aff_custom ; > + } else if (d->aff_mode != aff_none && strcmp(*argv,"+ca") == 0) { > + p->aff_mode = aff_random ; > + } else if (d->aff_scan_enabled && strcmp(*argv,"+sa") == 0) { > + p->aff_mode = aff_scan ; > + } else if (d->aff_mode != aff_none && strcmp(*argv,"+sa") == 0) { > + p->aff_mode = aff_random ; > + } else if (d->aff_scan_enabled && strcmp(*argv,"+ta") == 0) { > + p->aff_mode = aff_topo ; > + --argc ; ++argv ; > + if (!*argv) usage(prog,d) ; > + p->aff_topo = argv[0] ; > + } else if (d->aff_mode != aff_none && strcmp(*argv,"+sa") == 0) { > + p->aff_mode = aff_random ; > + } else if (d->shuffle >= 0 && strcmp(*argv,"+rm") == 0) { > + p->shuffle = 1 ; > + } else if (d->shuffle >= 0 && strcmp(*argv,"-rm") == 0) { > + p->shuffle = 0 ; > + } else if (d->speedcheck >= 0 && strcmp(*argv,"+sc") == 0) { > + p->speedcheck = 1 ; > + } else if (d->speedcheck >= 0 && strcmp(*argv,"-sc") == 0) { > + p->speedcheck = 0 ; > + } else if (!d->fix && strcmp(*argv,"+fix") == 0) { > + p->fix = 1 ; > + } else if (d->verbose_barrier >= 0 && strcmp(*argv,"+vb") == 0) { > + p->verbose_barrier++ ; > + } else if (d->verbose_barrier >= 0 && strcmp(*argv,"-vb") == 0) { > + p->verbose_barrier = 0 ; > + } else if (d->prelude > 0 && strcmp(*argv,"-vp") == 0) { > + p->prelude = 0 ; > + } else if (d->delta_tb && strcmp(*argv,"-tb") == 0) { > + --argc ; ++argv ; > + if (!*argv) usage(prog,d) ; > + argints(prog,d,argv[0],p->delta_tb) ; > + } else if (d->delta_tb && strcmp(*argv,"-ta") == 0) { > + --argc ; ++argv ; > + if (!*argv) usage(prog,d) ; > + int da = argint(prog,argv[0],d) ; > + for (int k = 0 ; k < p->delta_tb->sz ; k++) p->delta_tb->t[k] = da ; > + } else if (d->prefetch && strcmp(*argv,"-prf") == 0) { > + --argc ; ++argv ; > + if (!*argv) usage(prog,d) ; > + argprefetch(prog,d,argv[0],p->prefetch) ; > + } else if (d->prefetch && strcmp(*argv,"-pra") == 0) { > + --argc ; ++argv ; > + if (!*argv) usage(prog,d) ; > + argoneprefetch(prog,d,argv[0],p->prefetch) ; > + } else if (d->static_prefetch >= 0 && strcmp(*argv,"-prs") == 0) { > + --argc ; ++argv ; > + if (!*argv) usage(prog,d) ; > + int prs = argint(prog,argv[0],d) ; > + p->static_prefetch = prs >= 0 ? prs : 0 ; > + } else if (d->max_loop > 0 && strcmp(*argv,"-l") == 0) { > + --argc ; ++argv ; > + if (!*argv) usage(prog,d) ; > + int i = argint(prog,argv[0],d) ; > + p->max_loop = i < 1 ? 1 : i ; > + } else usage(prog,d) ; > + } > + > + /* Argument */ > + if (argc == 0) return ; > + usage(prog,d) ; > +} > + > +/*************************/ > +/* Concurrency utilities */ > +/*************************/ > + > +/* phread based mutex */ > + > +pm_t *pm_create(void) { > + pm_t *p = malloc_check(sizeof(*p)) ; > + int ret = pthread_mutex_init(p,NULL) ; > + if (ret) { errexit("mutex_init",ret) ; } > + return p ; > +} > + > +void pm_free(pm_t *p) { > + free(p) ; > +} > + > +void pm_lock(pm_t *m) { > + int ret = pthread_mutex_lock(m) ; > + if (ret) { errexit("mutex_lock",ret) ; } > +} > + > +void pm_unlock(pm_t *m) { > + int ret = pthread_mutex_unlock(m) ; > + if (ret) { errexit("mutex_unlock",ret) ; } > +} > + > +/* phread condition */ > + > +pc_t *pc_create(void) { > + pc_t *p = malloc_check(sizeof(*p)) ; > + p->c_mutex = pm_create() ; > + p->c_cond = malloc_check(sizeof(*(p->c_cond))) ; > + int e = pthread_cond_init(p->c_cond,NULL) ; > + if (e) { errexit("cond_init",e); } > + return p ; > +} > + > +void pc_free(pc_t *p) { > + pm_free(p->c_mutex) ; > + free(p->c_cond) ; > + free(p) ; > +} > + > +static void pc_lock(pc_t *p) { > + pm_lock(p->c_mutex) ; > +} > + > +static void pc_unlock(pc_t *p) { > + pm_unlock(p->c_mutex) ; > +} > + > +void pc_wait(pc_t *p) { > + int e = pthread_cond_wait(p->c_cond, p->c_mutex) ; > + if (e) { errexit("cond_wait",e) ; } > +} > + > +void pc_broadcast (pc_t *p) { > + int e = pthread_cond_broadcast(p->c_cond) ; > + if (e) { errexit("cond_broadcast",e) ; } > +} > + > +static void pc_signal(pc_t *p) { > + int e = pthread_cond_signal(p->c_cond); > + if (e) errexit("cond_signal",e) ; > +} > + > + > +/* pthread based barrier, usable for nproc threads */ > + > + > +pb_t *pb_create(int nprocs) { > + pb_t *p = malloc_check(sizeof(*p)) ; > + p->cond = pc_create() ; > + p->count = p->nprocs = nprocs ; > + p->turn = 0 ; > + return p ; > +} > + > +void pb_free(pb_t *p) { > + pc_free(p->cond) ; > + free(p) ; > +} > + > +/* The following code should protect us against spurious wake ups */ > +void pb_wait(pb_t *p) { > + pc_lock(p->cond) ; > + int t = p->turn ; > + --p->count ; > + if (p->count == 0) { > + p->count = p->nprocs ; > + p->turn = !t ; > + pc_broadcast(p->cond) ; > + } else { > + do { > + pc_wait(p->cond) ; > + } while (p->turn == t) ; > + } > + pc_unlock(p->cond) ; > +} > + > + > +/* pthread based or flag */ > + > +po_t *po_create(int nprocs) { > + po_t *p = malloc_check(sizeof(*p)) ; > + p->cond = pc_create() ; > + p->nprocs = p->count = nprocs ; > + p->val = 0 ; > + p->turn = 0 ; > + return p ; > +} > + > +void po_free(po_t *p) { > + pc_free(p->cond) ; > + free(p) ; > +} > + > +void po_reinit(po_t *p) { > + pc_lock(p->cond) ; > + int t = p->turn ; > + --p->count ; > + if (p->count == 0) { > + p->count = p->nprocs ; > + p->val = 0 ; > + p->turn = !t ; > + pc_broadcast(p->cond) ; > + } else { > + do { > + pc_wait(p->cond) ; > + } while (p->turn == t) ; > + } > + pc_unlock(p->cond) ; > +} > + > +int po_wait(po_t *p, int v) { > + pc_lock(p->cond) ; > + int t = p->turn ; > + --p->count ; > + p->val = p->val || v ; > + if (p->count == 0) { > + p->count = p->nprocs ; > + p->turn = !t ; > + pc_broadcast(p->cond) ; > + } else { > + do { > + pc_wait(p->cond) ; > + } while (p->turn == t) ; > + } > + int r = p->val ; > + pc_unlock(p->cond) ; > + return r ; > +} > + > + > +/* One place buffer */ > + > +op_t *op_create(void) { > + op_t *p = malloc_check(sizeof(*p)) ; > + p->cond = pc_create() ; > + p->val = NULL ; > + p->some = 0 ; > + return p; > +} > + > +void op_free(op_t *p) { > + pc_free(p->cond) ; > + free(p) ; > +} > + > +void op_set(op_t *p, void *v) { > + pc_lock(p->cond) ; > + if (p->some) { fatal("op_set") ; } > + p->val = v ; > + p->some = 1 ; > + pc_signal(p->cond) ; > + pc_unlock(p->cond) ; > +} > + > +void *op_get(op_t *p) { > + void *v = NULL ; > + pc_lock(p->cond) ; > + while (!p->some) { > + pc_wait(p->cond) ; > + } > + v = (void *) p->val ; > + p->val = NULL ; > + p->some = 0 ; > + pc_unlock(p->cond) ; > + return v ; > +} > + > +/* Thread launch and join */ > + > +void launch(pthread_t *th, f_t *f, void *a) { > + int e = pthread_create(th,NULL,f,a); > + if (e) errexit("phread_create",e); > +} > + > +void *join(pthread_t *th) { > + void *r ; > + int e = pthread_join(*th,&r) ; > + if (e) errexit("pthread_join",e); > + return r ; > +} > + > +/* Detached */ > + > +typedef struct { > + f_t *f; > + void *a ; > + op_t *op; > +} detarg_t ; > + > +static void *zyva_det(void *_b) { > + detarg_t *b = (detarg_t *)_b; > + f_t *f = b->f ; > + void *a = b->a ; > + op_t *op = b->op ; > + free(b) ; > + int e = pthread_detach(pthread_self()); > + if (e) errexit("pthread_detach",e) ; > + void *r = f(a) ; > + op_set(op,r) ; > + return NULL ; > +} > + > +op_t *launch_detached(f_t *f,void *a) { > + op_t *op = op_create() ; > + detarg_t *b = malloc_check(sizeof(*b)) ; > + b->f = f ; b->a = a; b->op = op ; > + pthread_t th ; > + launch(&th,zyva_det,b) ; > + return op ; > +} > + > +void *join_detached(op_t *op) { > + void *r = op_get(op) ; > + op_free(op) ; > + return r ; > +} > + > +/* Thread cache */ > + > +void *start_thread(void *_a) { > + sarg_t *_b = (sarg_t *)_a ; > + for (int _k = _b->max_run ; _k > 0 ; _k--) { > + void *_c = op_get(_b->op_arg) ; > + f_t *f = (f_t *)_c ; > + if (f == NULL) break ; > + void *ret = f(_b->arg) ; > + op_set(_b->op_ret,ret) ; > + } > + return NULL ; > +} > + > +/*****************/ > +/* Random things */ > +/*****************/ > + > +void perm_prefix_ints(unsigned *st,int *_t, int m, int n) { > + int k; > + for (k = 0 ; k < m ; k++) { > + int j = k+rand_k(st,n-k); > + int x = _t[k]; _t[k] = _t[j]; _t[j] = x; > + } > +} > + > +void perm_ints(unsigned *st,int *_t, int n) { > + perm_prefix_ints(st, _t,n-1,n) ; > +} > + > +void perm_funs(unsigned *st,f_t *fun[], int n) { > + int k; > + for (k = 0 ; k < n-1 ; k++) { > + int j = k+rand_k(st,n-k); > + f_t *t = fun[j]; > + fun[j] = fun[k]; fun[k] = t; > + } > +} > + > +void perm_ops(unsigned *st,op_t *op[], int n) { > + int k; > + for (k = 0 ; k < n-1 ; k++) { > + int j = k+rand_k(st,n-k); > + op_t *t = op[j]; > + op[j] = op[k]; op[k] = t; > + } > +} > + > +void perm_threads(unsigned *st,pthread_t thread[], int n) { > + int k; > + for (k = 0 ; k < n-1 ; k++) { > + int j = k+rand_k(st,n-k); > + pthread_t t = thread[j]; > + thread[j] = thread[k]; thread[k] = t; > + } > +} > + > +static int int_cmp(const void *_p, const void *_q) { > + int x = *((int *)_p) ; > + int y = *((int *)_q) ; > + if (x < y) return -1 ; > + else if (x > y) return 1 ; > + else return 0 ; > +} > + > +int check_shuffle(int **t, int *min, int sz) { > + int *idx = malloc_check(sizeof(*idx)*sz) ; > + for (int k=0 ; k < sz ; k++) { > + idx[k] = (int)(t[k] - min) ; > + // fprintf(stderr," %i",idx[k]) ; > + } > + // fprintf(stderr,"\n") ; > + qsort(&idx[0],sz, sizeof(idx[0]), int_cmp) ; > + for (int k=0 ; k < sz ; k++) { > + if (idx[k] != k) { > + free(idx) ; > + return 0 ; > + } > + } > + free(idx) ; > + return 1 ; > +} > + > +/****************/ > +/* Time counter */ > +/****************/ > + > +#include <sys/time.h> > +#include <time.h> > + > +tsc_t timeofday(void) { > + struct timeval tv ; > + if (gettimeofday(&tv,NULL)) errexit("gettimeoday",errno) ; > + return tv.tv_sec * ((tsc_t)1000000) + tv.tv_usec ; > +} > + > +double tsc_ratio(tsc_t t1, tsc_t t2) { > + return ((double) t1) / ((double)t2) ; > +} > + > + > +double tsc_millions(tsc_t t) { > + return t / 1000000.0 ; > +} > + > +/*******************/ > +/* String handling */ > +/*******************/ > + > +int find_string(char *t[], int sz, char *s) { > + for (int k = 0 ; k < sz ; k++) { > + if (strcmp(t[k],s) == 0) return k ; > + } > + return -1 ; > +} > diff --git a/tests/tcg/mttcg/x86/utils.h b/tests/tcg/mttcg/x86/utils.h > new file mode 100644 > index 0000000..99e756e > --- /dev/null > +++ b/tests/tcg/mttcg/x86/utils.h > @@ -0,0 +1,275 @@ > +/****************************************************************************/ > +/* the diy toolsuite > */ > +/* > */ > +/* Jade Alglave, University College London, UK. > */ > +/* Luc Maranget, INRIA Paris-Rocquencourt, France. > */ > +/* > */ > +/* Copyright 2015-present Institut National de Recherche en Informatique et > */ > +/* en Automatique and the authors. All rights reserved. > */ > +/* > */ > +/* This software is governed by the CeCILL-B license under French law and > */ > +/* abiding by the rules of distribution of free software. You can use, > */ > +/* modify and/ or redistribute the software under the terms of the CeCILL-B > */ > +/* license as circulated by CEA, CNRS and INRIA at the following URL > */ > +/* "http://www.cecill.info". We also give a copy in LICENSE.txt. > */ > +/****************************************************************************/ > +#ifndef _UTILS_H > +#define _UTILS_H 1 > + > +#include <stdio.h> > +#include <inttypes.h> > +#include <pthread.h> > +#include "litmus_rand.h" > + > + > +/********/ > +/* Misc */ > +/********/ > + > +void seterrlog(FILE *chan) ; > + > +int log_error(const char *fmt,...) ; > + > +void fatal(char *msg) ; > +/* e is errno */ > +void errexit(char *msg,int e) ; > + > +void *malloc_check(size_t sz) ; > + > +int max(int n,int m) ; > + > +void pp_ints (FILE *fp,int *p,int n) ; > + > +void *do_align(void *p, size_t sz) ; > + > +void *do_noalign(void *p, size_t sz) ; > + > +void cat_file(char *path,char *msg,FILE *out) ; > + > +/***********/ > +/* CPU set */ > +/***********/ > + > +#define CPUS_DEFINED 1 > +typedef struct { > + int sz ; > + int *cpu ; > +} cpus_t ; > + > +cpus_t *cpus_create(int sz) ; > +cpus_t *cpus_create_init(int sz, int t[]) ; > +void cpus_free(cpus_t *p) ; > +void cpus_dump(FILE *fp, cpus_t *p) ; > +void cpus_dump_test(FILE *fp, int *p, int sz, cpus_t *cm,int nprocs) ; > + > +int gcd(int a, int b) ; > + > +cpus_t *coremap_seq(int navail, int nways) ; > +cpus_t *coremap_end(int navail, int nways) ; > + > +void custom_affinity > +(st_t *st,cpus_t *cm,int **color,int *diff,cpus_t *aff_cpus,int n_exe, int > *r) ; > + > +/*************/ > +/* Int array */ > +/*************/ > + > +typedef struct { > + int sz ; > + int *t ; > +} ints_t ; > + > +void ints_dump(FILE *fp, ints_t *p) ; > + > +/* Prefetch directives */ > +typedef enum {none, flush, touch, touch_store} prfdir_t ; > + > +typedef struct { > + char *name ; > + prfdir_t dir ; > +} prfone_t ; > + > +typedef struct { > + int nvars ; > + prfone_t *t ; > +} prfproc_t ; > + > +typedef struct { > + int nthreads ; > + prfproc_t *t ; > +} prfdirs_t ; > + > +void prefetch_dump(FILE *fp, prfdirs_t *p) ; > +int parse_prefetch(char *p, prfdirs_t *r) ; > + > +/************************/ > +/* Command line options */ > +/************************/ > +typedef enum > + { aff_none, aff_incr, aff_random, aff_custom, > + aff_scan, aff_topo} aff_mode_t ; > + > +typedef struct { > + int verbose ; > + /* Test parmeters */ > + int max_run ; > + int size_of_test ; > + int stride ; > + int avail ; > + int n_exe ; > + int sync_n ; > + /* Affinity */ > + aff_mode_t aff_mode ; > + int aff_custom_enabled ; > + int aff_scan_enabled ; > + int aff_incr ; > + cpus_t *aff_cpus ; > + char *aff_topo ; > + /* indirect mode */ > + int shuffle ; > + /* loop test */ > + int max_loop ; > + /* time base delays */ > + ints_t * delta_tb ; > + /* prefetch control */ > + prfdirs_t *prefetch ; > + int static_prefetch ; > + /* show time of synchronisation */ > + int verbose_barrier ; > + /* Stop as soon as condition is settled */ > + int speedcheck ; > + /* Enforce fixed launch order (ie cancel change lauch) */ > + int fix ; > + /* Dump prelude to test output */ > + int prelude ; > +} cmd_t ; > + > +void parse_cmd(int argc, char **argv, cmd_t *def, cmd_t *p) ; > + > + > +/********************/ > +/* Thread utilities */ > +/********************/ > + > +/* Mutex */ > + > +typedef pthread_mutex_t pm_t ; > + > +pm_t *pm_create(void) ; > +void pm_free(pm_t *p) ; > +void pm_lock(pm_t *m) ; > +void pm_unlock(pm_t *m) ; > + > +/* Condition variable */ > + > +typedef struct { > + pm_t *c_mutex ; > + pthread_cond_t *c_cond ; > +} pc_t ; > + > +pc_t *pc_create(void) ; > +void pc_free(pc_t *p) ; > +void pc_wait(pc_t *p) ; > +void pc_broadcast (pc_t *p) ; > + > +/* Barrier */ > + > +/* Avoid pthread supplied barrier as they are not available in old versions > */ > + > +typedef struct { > + volatile unsigned int count ; > + volatile int turn ; > + pc_t *cond ; > + unsigned int nprocs ; > +} pb_t ; > + > + > +pb_t *pb_create(int nprocs) ; > +void pb_free(pb_t *p) ; > +void pb_wait(pb_t *p) ; > + > + > +/* Or flag */ > + > +typedef struct { > + pc_t *cond ; > + int nprocs ; > + int count ; > + volatile int val ; > + volatile int turn ; > +} po_t ; > + > +po_t *po_create(int nprocs) ; > +void po_free(po_t *p) ; > +/* Initialize flag, must be called by all participant */ > +void po_reinit(po_t *p) ; > +/* Return the 'or' of the v arguments of all participants */ > +int po_wait(po_t *p, int v) ; > + > +/* One place buffer */ > + > +typedef struct { > + pc_t *cond ; > + int volatile some ; > + void * volatile val ; > +} op_t ; > + > +op_t *op_create(void) ; > +void op_free(op_t *p) ; > +void op_set(op_t *p, void *v) ; > +void *op_get(op_t *p) ; > + > +/* Thread launch and join */ > + > +typedef void* f_t(void *); > + > +void launch(pthread_t *th, f_t *f, void *a) ; > + > +void *join(pthread_t *th) ; > + > +/* Detached lauch and join */ > + > +op_t *launch_detached(f_t *f,void *a) ; > +void *join_detached(op_t *p) ; > + > +/* Thread cache */ > + > +typedef struct { > + int max_run ; > + op_t *op_arg,*op_ret ; > + void *arg ; > +} sarg_t ; > + > +f_t start_thread ; > + > +/*****************/ > +/* Random things */ > +/*****************/ > + > +/* permutations */ > + > +void perm_prefix_ints(st_t *st,int t[], int used, int sz) ; > +void perm_ints(st_t *st,int t[], int sz) ; > +void perm_funs(st_t *st,f_t *t[], int sz) ; > +void perm_threads(st_t *st,pthread_t t[], int sz) ; > +void perm_ops(st_t *st,op_t *t[], int sz) ; > + > +/* check permutation */ > +int check_shuffle(int **t, int *min, int sz) ; > + > +/*********************/ > +/* Real time counter */ > +/*********************/ > + > +typedef unsigned long long tsc_t ; > +#define PTSC "%llu" > + > +/* Result in micro-seconds */ > +tsc_t timeofday(void) ; > +double tsc_ratio(tsc_t t1, tsc_t t2) ; > +double tsc_millions(tsc_t t) ; > + > +/* String utilities */ > +int find_string(char *t[],int sz,char *s) ; > + > +#endif -- Alex Bennée