new file mode 100644
@@ -0,0 +1,42 @@
+GCC=gcc
+GCCOPTS=-D_GNU_SOURCE -DFORCE_AFFINITY -Wall -std=gnu99 -fomit-frame-pointer -O2 -pthread
+LINKOPTS=
+SRC=\
+ SAL.c\
+
+EXE=$(SRC:.c=.exe)
+T=$(SRC:.c=.t)
+
+all: $(EXE) $(T)
+
+clean:
+ /bin/rm -f *.o *.s *.t *.exe *~
+
+cleansource:
+ /bin/rm -f *.o *.c *.h *.s *~
+
+affinity.o: affinity.c
+ $(GCC) $(GCCOPTS) -O2 -c affinity.c
+
+outs.o: outs.c
+ $(GCC) $(GCCOPTS) -O2 -c outs.c
+
+utils.o: utils.c
+ $(GCC) $(GCCOPTS) -O2 -c utils.c
+
+litmus_rand.o: litmus_rand.c
+ $(GCC) $(GCCOPTS) -O2 -c litmus_rand.c
+
+UTILS=affinity.o outs.o utils.o litmus_rand.o
+
+%.exe:%.s $(UTILS)
+ $(GCC) $(GCCOPTS) $(LINKOPTS) -o $@ $(UTILS) $<
+
+%.s:%.c
+ $(GCC) $(GCCOPTS) -S $<
+
+%.t:%.s
+ awk -f show.awk $< > $@
+
+tests: all
+ ./run.sh
new file mode 100644
@@ -0,0 +1,22 @@
+Tests produced by litmus for architecture X86 on linux
+
+COMPILING
+ with command 'make [-j N]' or 'sh comp.sh'
+
+RUNNING ALL TESTS
+ with command 'sh run.sh'. Test result on standard output.
+
+RUNNING ONE TEST
+ Tests are .exe files, for instance SAL.exe, run it by './SAL.exe'
+
+RUNNING OPTIONS
+ Main options to the run.sh script and to .exe files:
+ -v be verbose (can be repeated).
+ -a <n> number of (logical) processors available, default 0.
+ The default value of 0 means that .exe files attempt
+ to infer the actual number of logical threads.
+ -s <n> one run operates on arrays of size <n>, default 100000.
+ -r <n> number of runs, default 10.
+
+ For more options see for instance './SAL.exe -help' and litmus documentation
+ <http://diy.inria.fr/doc/litmus.html>
new file mode 100644
@@ -0,0 +1,491 @@
+/****************************************************************************/
+/* the diy toolsuite */
+/* */
+/* Jade Alglave, University College London, UK. */
+/* Luc Maranget, INRIA Paris-Rocquencourt, France. */
+/* */
+/* This C source is a product of litmus7 and includes source that is */
+/* governed by the CeCILL-B license. */
+/****************************************************************************/
+/* Parameters */
+#define SIZE_OF_TEST 100000
+#define NUMBER_OF_RUN 10
+#define AVAIL 0
+#define STRIDE 1
+#define MAX_LOOP 0
+#define N 2
+#define AFF_INCR (0)
+/* Includes */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+#include <time.h>
+#include <limits.h>
+#include "utils.h"
+#include "outs.h"
+#include "affinity.h"
+
+/* params */
+typedef struct {
+ int verbose;
+ int size_of_test,max_run;
+ int stride;
+ aff_mode_t aff_mode;
+ int ncpus, ncpus_used;
+ int do_change;
+} param_t;
+
+
+/* Full memory barrier */
+inline static void mbar(void) {
+ asm __volatile__ ("mfence" ::: "memory");
+}
+
+/* Barriers macros */
+inline static void barrier_wait(unsigned int id, unsigned int k, int volatile *b) {
+ if ((k % N) == id) {
+ *b = 1 ;
+ } else {
+ while (*b == 0) ;
+ }
+}
+
+/**********************/
+/* Context definition */
+/**********************/
+
+
+typedef struct {
+/* Shared variables */
+ int *y;
+ int *x;
+/* Final content of observed registers */
+ int *out_0_eax;
+ int *out_1_eax;
+/* Check data */
+ pb_t *fst_barrier;
+/* Barrier for litmus loop */
+ int volatile *barrier;
+/* Instance seed */
+ st_t seed;
+/* Parameters */
+ param_t *_p;
+} ctx_t;
+
+inline static int final_cond(int _out_0_eax,int _out_1_eax) {
+ switch (_out_0_eax) {
+ case 0:
+ switch (_out_1_eax) {
+ case 0:
+ return 1;
+ default:
+ return 0;
+ }
+ default:
+ return 0;
+ }
+}
+
+inline static int final_ok(int cond) {
+ return cond;
+}
+
+/**********************/
+/* Outcome collection */
+/**********************/
+#define NOUTS 2
+typedef intmax_t outcome_t[NOUTS];
+
+static const int out_0_eax_f = 0 ;
+static const int out_1_eax_f = 1 ;
+
+
+typedef struct hist_t {
+ outs_t *outcomes ;
+ count_t n_pos,n_neg ;
+} hist_t ;
+
+static hist_t *alloc_hist(void) {
+ hist_t *p = malloc_check(sizeof(*p)) ;
+ p->outcomes = NULL ;
+ p->n_pos = p->n_neg = 0 ;
+ return p ;
+}
+
+static void free_hist(hist_t *h) {
+ free_outs(h->outcomes) ;
+ free(h) ;
+}
+
+static void add_outcome(hist_t *h, count_t v, outcome_t o, int show) {
+ h->outcomes = add_outcome_outs(h->outcomes,o,NOUTS,v,show) ;
+}
+
+static void merge_hists(hist_t *h0, hist_t *h1) {
+ h0->n_pos += h1->n_pos ;
+ h0->n_neg += h1->n_neg ;
+ h0->outcomes = merge_outs(h0->outcomes,h1->outcomes,NOUTS) ;
+}
+
+static count_t sum_hist(hist_t *h) {
+ return sum_outs(h->outcomes) ;
+}
+
+
+static void do_dump_outcome(FILE *fhist, intmax_t *o, count_t c, int show) {
+ fprintf(fhist,"%-6"PCTR"%c>0:EAX=%i; 1:EAX=%i;\n",c,show ? '*' : ':',(int)o[out_0_eax_f],(int)o[out_1_eax_f]);
+}
+
+static void just_dump_outcomes(FILE *fhist, hist_t *h) {
+ outcome_t buff ;
+ dump_outs(fhist,do_dump_outcome,h->outcomes,buff,NOUTS) ;
+}
+
+/*******************************************************/
+/* Context allocation, freeing and reinitialization */
+/*******************************************************/
+
+static void init(ctx_t *_a) {
+ int size_of_test = _a->_p->size_of_test;
+
+ _a->seed = rand();
+ _a->out_0_eax = malloc_check(size_of_test*sizeof(*(_a->out_0_eax)));
+ _a->out_1_eax = malloc_check(size_of_test*sizeof(*(_a->out_1_eax)));
+ _a->y = malloc_check(size_of_test*sizeof(*(_a->y)));
+ _a->x = malloc_check(size_of_test*sizeof(*(_a->x)));
+ _a->fst_barrier = pb_create(N);
+ _a->barrier = malloc_check(size_of_test*sizeof(*(_a->barrier)));
+}
+
+static void finalize(ctx_t *_a) {
+ free((void *)_a->y);
+ free((void *)_a->x);
+ free((void *)_a->out_0_eax);
+ free((void *)_a->out_1_eax);
+ pb_free(_a->fst_barrier);
+ free((void *)_a->barrier);
+}
+
+static void reinit(ctx_t *_a) {
+ for (int _i = _a->_p->size_of_test-1 ; _i >= 0 ; _i--) {
+ _a->y[_i] = 0;
+ _a->x[_i] = 0;
+ _a->out_0_eax[_i] = -239487;
+ _a->out_1_eax[_i] = -239487;
+ _a->barrier[_i] = 0;
+ }
+}
+
+/**************************************/
+/* Prefetch (and check) global values */
+/**************************************/
+
+static void check_globals(ctx_t *_a) {
+ int *y = _a->y;
+ int *x = _a->x;
+ for (int _i = _a->_p->size_of_test-1 ; _i >= 0 ; _i--) {
+ if (rand_bit(&(_a->seed)) && y[_i] != 0) fatal("SAL, check_globals failed");
+ if (rand_bit(&(_a->seed)) && x[_i] != 0) fatal("SAL, check_globals failed");
+ }
+ pb_wait(_a->fst_barrier);
+}
+
+/***************/
+/* Litmus code */
+/***************/
+
+typedef struct {
+ int th_id; /* I am running on this thread */
+ int *cpu; /* On this cpu */
+ ctx_t *_a; /* In this context */
+} parg_t;
+
+
+
+
+
+static void *P0(void *_vb) {
+ mbar();
+ parg_t *_b = (parg_t *)_vb;
+ ctx_t *_a = _b->_a;
+ int _ecpu = _b->cpu[_b->th_id];
+ force_one_affinity(_ecpu,AVAIL,_a->_p->verbose,"SAL");
+ check_globals(_a);
+ int _th_id = _b->th_id;
+ int volatile *barrier = _a->barrier;
+ int _size_of_test = _a->_p->size_of_test;
+ int _stride = _a->_p->stride;
+ int *out_0_eax = _a->out_0_eax;
+ for (int _j = _stride ; _j > 0 ; _j--) {
+ for (int _i = _size_of_test-_j ; _i >= 0 ; _i -= _stride) {
+ barrier_wait(_th_id,_i,&barrier[_i]);
+asm __volatile__ (
+"\n"
+"#START _litmus_P0\n"
+"#_litmus_P0_0\n\t"
+"movl $1,%[x]\n"
+"#_litmus_P0_1\n\t"
+"mfence\n"
+"#_litmus_P0_2\n\t"
+"movl %[y],%[eax]\n"
+"#END _litmus_P0\n\t"
+:[x] "=m" (_a->x[_i]),[y] "=m" (_a->y[_i]),[eax] "=&a" (out_0_eax[_i])
+:
+:"cc","memory"
+);
+ }
+ }
+ mbar();
+ return NULL;
+}
+
+static void *P1(void *_vb) {
+ mbar();
+ parg_t *_b = (parg_t *)_vb;
+ ctx_t *_a = _b->_a;
+ int _ecpu = _b->cpu[_b->th_id];
+ force_one_affinity(_ecpu,AVAIL,_a->_p->verbose,"SAL");
+ check_globals(_a);
+ int _th_id = _b->th_id;
+ int volatile *barrier = _a->barrier;
+ int _size_of_test = _a->_p->size_of_test;
+ int _stride = _a->_p->stride;
+ int *out_1_eax = _a->out_1_eax;
+ for (int _j = _stride ; _j > 0 ; _j--) {
+ for (int _i = _size_of_test-_j ; _i >= 0 ; _i -= _stride) {
+ barrier_wait(_th_id,_i,&barrier[_i]);
+asm __volatile__ (
+"\n"
+"#START _litmus_P1\n"
+"#_litmus_P1_0\n\t"
+"movl $1,%[y]\n"
+"#_litmus_P1_1\n\t"
+"mfence\n"
+"#_litmus_P1_2\n\t"
+"movl %[x],%[eax]\n"
+"#END _litmus_P1\n\t"
+:[x] "=m" (_a->x[_i]),[y] "=m" (_a->y[_i]),[eax] "=&a" (out_1_eax[_i])
+:
+:"cc","memory"
+);
+ }
+ }
+ mbar();
+ return NULL;
+}
+
+typedef struct {
+ pm_t *p_mutex;
+ pb_t *p_barrier;
+ param_t *_p;
+ int z_id;
+ int *cpus;
+} zyva_t;
+
+#define NT N
+
+static void *zyva(void *_va) {
+ zyva_t *_a = (zyva_t *) _va;
+ param_t *_b = _a->_p;
+ pb_wait(_a->p_barrier);
+ pthread_t thread[NT];
+ parg_t parg[N];
+ f_t *fun[] = {&P0,&P1};
+ hist_t *hist = alloc_hist();
+ ctx_t ctx;
+ ctx._p = _b;
+
+ init(&ctx);
+ for (int _p = N-1 ; _p >= 0 ; _p--) {
+ parg[_p].th_id = _p; parg[_p]._a = &ctx;
+ parg[_p].cpu = &(_a->cpus[0]);
+ }
+
+ for (int n_run = 0 ; n_run < _b->max_run ; n_run++) {
+ if (_b->aff_mode == aff_random) {
+ pb_wait(_a->p_barrier);
+ if (_a->z_id == 0) perm_prefix_ints(&ctx.seed,_a->cpus,_b->ncpus_used,_b->ncpus);
+ pb_wait(_a->p_barrier);
+ } else {
+ }
+ if (_b->verbose>1) fprintf(stderr,"Run %i of %i\r", n_run, _b->max_run);
+ reinit(&ctx);
+ if (_b->do_change) perm_funs(&ctx.seed,fun,N);
+ for (int _p = NT-1 ; _p >= 0 ; _p--) {
+ launch(&thread[_p],fun[_p],&parg[_p]);
+ }
+ if (_b->do_change) perm_threads(&ctx.seed,thread,NT);
+ for (int _p = NT-1 ; _p >= 0 ; _p--) {
+ join(&thread[_p]);
+ }
+ /* Log final states */
+ for (int _i = _b->size_of_test-1 ; _i >= 0 ; _i--) {
+ int _out_0_eax_i = ctx.out_0_eax[_i];
+ int _out_1_eax_i = ctx.out_1_eax[_i];
+ outcome_t o;
+ int cond;
+
+ cond = final_ok(final_cond(_out_0_eax_i,_out_1_eax_i));
+ o[out_0_eax_f] = _out_0_eax_i;
+ o[out_1_eax_f] = _out_1_eax_i;
+ add_outcome(hist,1,o,cond);
+ if (cond) { hist->n_pos++; } else { hist->n_neg++; }
+ }
+ }
+
+ finalize(&ctx);
+ return hist;
+}
+
+#define ENOUGH 10
+
+static int postlude(FILE *out,cmd_t *cmd,hist_t *hist,count_t p_true,count_t p_false,tsc_t total) {
+ fprintf(out,"Test SAL Forbidden\n");
+ fprintf(out,"Histogram (%i states)\n",finals_outs(hist->outcomes));
+ just_dump_outcomes(out,hist);
+ int cond = p_true == 0;
+ fprintf(out,"%s\n",cond?"Ok":"No");
+ fprintf(out,"\nWitnesses\n");
+ fprintf(out,"Positive: %" PCTR ", Negative: %" PCTR "\n",p_false,p_true);
+ fprintf(out,"Condition ~exists (0:EAX=0 /\\ 1:EAX=0) is %svalidated\n",cond ? "" : "NOT ");
+ fprintf(out,"Hash=d8f89591b2adad11d42d3eeb22d212c6\n");
+ count_t cond_true = p_true;
+ count_t cond_false = p_false;
+ fprintf(out,"Observation SAL %s %" PCTR " %" PCTR "\n",!cond_true ? "Never" : !cond_false ? "Always" : "Sometimes",cond_true,cond_false);
+ if (p_true > 0) {
+ }
+ fprintf(out,"Time SAL %.2f\n",total / 1000000.0);
+ fflush(out);
+ return cond;
+}
+
+static int run(cmd_t *cmd,cpus_t *def_all_cpus,FILE *out) {
+ tsc_t start = timeofday();
+ param_t prm ;
+/* Set some parameters */
+ prm.verbose = cmd->verbose;
+ prm.size_of_test = cmd->size_of_test;
+ prm.max_run = cmd->max_run;
+ prm.stride = cmd->stride;
+ prm.do_change = 1;
+ if (cmd->fix) prm.do_change = 0;
+/* Computes number of test concurrent instances */
+ int n_avail = cmd->avail > 0 ? cmd->avail : cmd->aff_cpus->sz;
+ if (n_avail > cmd->aff_cpus->sz) log_error("Warning: avail=%i, available=%i\n",n_avail, cmd->aff_cpus->sz);
+ int n_exe;
+ if (cmd->n_exe > 0) {
+ n_exe = cmd->n_exe;
+ } else {
+ n_exe = n_avail < N ? 1 : n_avail / N;
+ }
+/* Set affinity parameters */
+ cpus_t *all_cpus = cmd->aff_cpus;
+ int aff_cpus_sz = cmd->aff_mode == aff_random ? max(all_cpus->sz,N*n_exe) : N*n_exe;
+ int aff_cpus[aff_cpus_sz];
+ prm.aff_mode = cmd->aff_mode;
+ prm.ncpus = aff_cpus_sz;
+ prm.ncpus_used = N*n_exe;
+/* Show parameters to user */
+ if (prm.verbose) {
+ log_error( "SAL: n=%i, r=%i, s=%i",n_exe,prm.max_run,prm.size_of_test);
+ log_error(", st=%i",prm.stride);
+ if (cmd->aff_mode == aff_incr) {
+ log_error( ", i=%i",cmd->aff_incr);
+ } else if (cmd->aff_mode == aff_random) {
+ log_error(", +ra");
+ } else if (cmd->aff_mode == aff_custom) {
+ log_error(", +ca");
+ } else if (cmd->aff_mode == aff_scan) {
+ log_error(", +sa");
+ }
+ log_error(", p='");
+ cpus_dump(stderr,cmd->aff_cpus);
+ log_error("'");
+ log_error("\n");
+ }
+ if (cmd->aff_mode == aff_random) {
+ for (int k = 0 ; k < aff_cpus_sz ; k++) {
+ aff_cpus[k] = all_cpus->cpu[k % all_cpus->sz];
+ }
+ }
+ hist_t *hist = NULL;
+ int n_th = n_exe-1;
+ pthread_t th[n_th];
+ zyva_t zarg[n_exe];
+ pm_t *p_mutex = pm_create();
+ pb_t *p_barrier = pb_create(n_exe);
+ int next_cpu = 0;
+ int delta = cmd->aff_incr;
+ if (delta <= 0) {
+ for (int k=0 ; k < all_cpus->sz ; k++) all_cpus->cpu[k] = -1;
+ delta = 1;
+ } else {
+ delta %= all_cpus->sz;
+ }
+ int start_scan=0, max_start=gcd(delta,all_cpus->sz);
+ int *aff_p = aff_cpus;
+ for (int k=0 ; k < n_exe ; k++) {
+ zyva_t *p = &zarg[k];
+ p->_p = &prm;
+ p->p_mutex = p_mutex; p->p_barrier = p_barrier;
+ p->z_id = k;
+ p->cpus = aff_p;
+ if (cmd->aff_mode != aff_incr) {
+ aff_p += N;
+ } else {
+ for (int i=0 ; i < N ; i++) {
+ *aff_p = all_cpus->cpu[next_cpu]; aff_p++;
+ next_cpu += delta; next_cpu %= all_cpus->sz;
+ if (next_cpu == start_scan) {
+ start_scan++ ; start_scan %= max_start;
+ next_cpu = start_scan;
+ }
+ }
+ }
+ if (k < n_th) {
+ launch(&th[k],zyva,p);
+ } else {
+ hist = (hist_t *)zyva(p);
+ }
+ }
+
+ count_t n_outs = prm.size_of_test; n_outs *= prm.max_run;
+ for (int k=0 ; k < n_th ; k++) {
+ hist_t *hk = (hist_t *)join(&th[k]);
+ if (sum_hist(hk) != n_outs || hk->n_pos + hk->n_neg != n_outs) {
+ fatal("SAL, sum_hist");
+ }
+ merge_hists(hist,hk);
+ free_hist(hk);
+ }
+ cpus_free(all_cpus);
+ tsc_t total = timeofday() - start;
+ pm_free(p_mutex);
+ pb_free(p_barrier);
+
+ n_outs *= n_exe ;
+ if (sum_hist(hist) != n_outs || hist->n_pos + hist->n_neg != n_outs) {
+ fatal("SAL, sum_hist") ;
+ }
+ count_t p_true = hist->n_pos, p_false = hist->n_neg;
+ int cond = postlude(out,cmd,hist,p_true,p_false,total);
+ free_hist(hist);
+ return cond;
+}
+
+
+int main(int argc, char **argv) {
+ cpus_t *def_all_cpus = read_force_affinity(AVAIL,0);
+ if (def_all_cpus->sz < N) {
+ cpus_free(def_all_cpus);
+ return EXIT_SUCCESS;
+ }
+ cmd_t def = { 0, NUMBER_OF_RUN, SIZE_OF_TEST, STRIDE, AVAIL, 0, 0, aff_incr, 0, 0, AFF_INCR, def_all_cpus, NULL, -1, MAX_LOOP, NULL, NULL, -1, -1, -1, 0, 0};
+ cmd_t cmd = def;
+ parse_cmd(argc,argv,&def,&cmd);
+ int cond = run(&cmd,def_all_cpus,stdout);
+ if (def_all_cpus != cmd.aff_cpus) cpus_free(def_all_cpus);
+ return cond ? EXIT_SUCCESS : EXIT_FAILURE;
+}
new file mode 100644
@@ -0,0 +1,159 @@
+/****************************************************************************/
+/* the diy toolsuite */
+/* */
+/* Jade Alglave, University College London, UK. */
+/* Luc Maranget, INRIA Paris-Rocquencourt, France. */
+/* */
+/* Copyright 2015-present Institut National de Recherche en Informatique et */
+/* en Automatique and the authors. All rights reserved. */
+/* */
+/* This software is governed by the CeCILL-B license under French law and */
+/* abiding by the rules of distribution of free software. You can use, */
+/* modify and/ or redistribute the software under the terms of the CeCILL-B */
+/* license as circulated by CEA, CNRS and INRIA at the following URL */
+/* "http://www.cecill.info". We also give a copy in LICENSE.txt. */
+/****************************************************************************/
+#include <stdio.h>
+#include <sched.h>
+#include <unistd.h>
+#include "utils.h"
+#include "affinity.h"
+
+#ifdef CPUS_DEFINED
+cpus_t *read_affinity(void) {
+ cpu_set_t mask;
+ int sz = 0 ;
+ int res = pthread_getaffinity_np(pthread_self(), sizeof(mask), &mask) ;
+
+ if (res != 0) {
+ errexit("pthread_getaffinity_np",res);
+ }
+ for (int p=0 ; p < CPU_SETSIZE ; p++) {
+ if (CPU_ISSET(p,&mask)) sz++ ;
+ }
+
+ cpus_t *r = cpus_create(sz) ;
+ for (int p=0, *q=r->cpu ; p < CPU_SETSIZE ; p++) {
+ if (CPU_ISSET(p,&mask)) *q++ = p ;
+ }
+ return r ;
+}
+
+#endif
+/* Attempt to force processors wake up, on devices where unused procs
+ go to sleep... */
+
+
+#ifdef FORCE_AFFINITY
+const static tsc_t sec = (tsc_t)1000000 ;
+
+static void* loop(void *p) {
+ tsc_t *q = p ;
+ tsc_t max = *q ;
+ while (timeofday() < max) ;
+ return NULL ;
+}
+
+
+static void warm_up(int sz, tsc_t d) {
+ pthread_t th[sz];
+ d += timeofday() ;
+ for (int k = 0 ; k < sz ; k++) launch(&th[k], loop, &d) ;
+ for (int k = 0 ; k < sz ; k++) join(&th[k]) ;
+}
+
+#ifdef CPUS_DEFINED
+cpus_t *read_force_affinity(int n_avail, int verbose) {
+ int sz = n_avail <= 1 ? 1 : n_avail ;
+ tsc_t max = sec / 100 ;
+
+ for ( ; ; ) {
+ warm_up(sz+1,max) ;
+ cpus_t *r = read_affinity() ;
+ if (n_avail <= r->sz) return r ;
+ if (verbose) {
+ fprintf(stderr,"Read affinity: '") ;
+ cpus_dump(stderr,r) ;
+ fprintf(stderr,"'\n") ;
+ }
+ cpus_free(r) ;
+ }
+}
+#endif
+#endif
+
+#ifdef CPUS_DEFINED
+
+/* Enforcing processor affinity.
+ Notice that logical processor numbers may be negative.
+ In that case, affinity setting is ignored */
+
+
+void write_affinity(cpus_t *p) {
+ cpu_set_t mask;
+ int exists_pos = 0 ;
+
+ CPU_ZERO(&mask) ;
+ for (int k = 0 ; k < p->sz ; k++) {
+ if (p->cpu[k] >= 0) {
+ CPU_SET(p->cpu[k],&mask) ;
+ exists_pos = 1 ;
+ }
+ }
+ if (exists_pos) {
+ int r = pthread_setaffinity_np(pthread_self(),sizeof(mask),&mask) ;
+ if (r != 0) {
+ errexit("pthread_setaffinity_np",r) ;
+ }
+ }
+}
+#endif
+
+void write_one_affinity(int a) {
+ if (a >= 0) {
+ cpu_set_t mask;
+ CPU_ZERO(&mask) ;
+ CPU_SET(a,&mask) ;
+ int r = pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) ;
+ if (r != 0) {
+ errexit("pthread_setaffinity_np",r) ;
+ }
+ }
+}
+
+#ifdef FORCE_AFFINITY
+/* Get the number of present cpus, fragile */
+
+static const char *present = "/sys/devices/system/cpu/present" ;
+
+static int get_present(void) {
+ FILE *fp = fopen(present,"r") ;
+ if (fp == NULL) return -1 ;
+ int r1,r2 ;
+ int n = fscanf(fp,"%d-%d\n",&r1,&r2) ;
+ fclose(fp) ;
+ if (n != 2) return -1 ;
+ return r2-r1+1 ;
+}
+
+void force_one_affinity(int a, int sz,int verbose, char *name) {
+ if (a >= 0) {
+ cpu_set_t mask;
+ int r ;
+ CPU_ZERO(&mask) ;
+ CPU_SET(a,&mask) ;
+ do {
+ r = pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) ;
+ if (r != 0) {
+ if (verbose)
+ fprintf(stderr,"%s: force %i failed\n",name,a) ;
+ int nwarm = get_present() ;
+ if (verbose > 1)
+ fprintf(stderr,"%s: present=%i\n",name,nwarm) ;
+ if (nwarm < 0) nwarm = sz+1 ;
+ warm_up(nwarm,sec/100) ;
+ }
+ } while (r != 0) ;
+ }
+}
+#endif
new file mode 100644
@@ -0,0 +1,34 @@
+/****************************************************************************/
+/* the diy toolsuite */
+/* */
+/* Jade Alglave, University College London, UK. */
+/* Luc Maranget, INRIA Paris-Rocquencourt, France. */
+/* */
+/* Copyright 2015-present Institut National de Recherche en Informatique et */
+/* en Automatique and the authors. All rights reserved. */
+/* */
+/* This software is governed by the CeCILL-B license under French law and */
+/* abiding by the rules of distribution of free software. You can use, */
+/* modify and/ or redistribute the software under the terms of the CeCILL-B */
+/* license as circulated by CEA, CNRS and INRIA at the following URL */
+/* "http://www.cecill.info". We also give a copy in LICENSE.txt. */
+/****************************************************************************/
+#ifndef _AFFINITY_H
+#define _AFFINITY_H 1
+
+#include "utils.h"
+
+#ifdef CPUS_DEFINED
+cpus_t *read_affinity(void) ;
+#ifdef FORCE_AFFINITY
+cpus_t *read_force_affinity(int n_avail, int verbose) ;
+#endif
+void write_affinity(cpus_t *p) ;
+#endif
+
+void write_one_affinity(int cpu) ;
+#ifdef FORCE_AFFINITY
+void force_one_affinity(int cpu, int sz, int verbose, char *name) ;
+#endif
+
+#endif
new file mode 100644
@@ -0,0 +1,10 @@
+GCC=gcc
+GCCOPTS="-D_GNU_SOURCE -DFORCE_AFFINITY -Wall -std=gnu99 -fomit-frame-pointer -O2 -pthread"
+LINKOPTS=""
+/bin/rm -f *.exe *.s
+$GCC $GCCOPTS -O2 -c affinity.c
+$GCC $GCCOPTS -O2 -c outs.c
+$GCC $GCCOPTS -O2 -c utils.c
+$GCC $GCCOPTS -O2 -c litmus_rand.c
+$GCC $GCCOPTS $LINKOPTS -o SAL.exe affinity.o outs.o utils.o litmus_rand.o SAL.c
+$GCC $GCCOPTS -S SAL.c && awk -f show.awk SAL.s > SAL.t && /bin/rm SAL.s
new file mode 100644
@@ -0,0 +1,64 @@
+/****************************************************************************/
+/* the diy toolsuite */
+/* */
+/* Jade Alglave, University College London, UK. */
+/* Luc Maranget, INRIA Paris-Rocquencourt, France. */
+/* */
+/* Copyright 2015-present Institut National de Recherche en Informatique et */
+/* en Automatique and the authors. All rights reserved. */
+/* */
+/* This software is governed by the CeCILL-B license under French law and */
+/* abiding by the rules of distribution of free software. You can use, */
+/* modify and/ or redistribute the software under the terms of the CeCILL-B */
+/* license as circulated by CEA, CNRS and INRIA at the following URL */
+/* "http://www.cecill.info". We also give a copy in LICENSE.txt. */
+/****************************************************************************/
+#include <stdint.h>
+#include "litmus_rand.h"
+
+/*
+ Simple generator
+ http://en.wikipedia.org/wiki/Linear_congruential_generator
+*/
+
+
+/*
+
+ From ocaml sources: (globroot.c)
+ Linear congruence with modulus = 2^32, multiplier = 69069
+ (Knuth vol 2 p. 106, line 15 of table 1), additive = 25173.
+
+
+ Knuth (vol 2 p. 13) shows that the least significant bits are
+ "less random" than the most significant bits with a modulus of 2^m.
+ We just swap half words, enough? */
+
+static const uint32_t a = 69069;
+static const uint32_t c = 25173 ;
+
+inline static uint32_t unlocked_rand(st_t *st) {
+ uint32_t r = a * *st + c ;
+ *st = r ;
+ /* Swap high & low bits */
+ uint32_t low = r & 0xffff ;
+ uint32_t high = r >> 16 ;
+ r = high | (low << 16) ;
+ return r ;
+}
+
+int rand_bit(st_t *st) {
+ uint32_t r = unlocked_rand(st) ;
+ r &= 1 ;
+ return r ;
+}
+
+static const uint32_t r_max = UINT32_MAX ;
+
+uint32_t rand_k (uint32_t *st,uint32_t k) {
+ uint32_t r, v ;
+ do {
+ r = unlocked_rand(st) ;
+ v = r % k ;
+ } while (r-v > r_max-k+1) ;
+ return v ;
+}
new file mode 100644
@@ -0,0 +1,29 @@
+/****************************************************************************/
+/* the diy toolsuite */
+/* */
+/* Jade Alglave, University College London, UK. */
+/* Luc Maranget, INRIA Paris-Rocquencourt, France. */
+/* */
+/* Copyright 2015-present Institut National de Recherche en Informatique et */
+/* en Automatique and the authors. All rights reserved. */
+/* */
+/* This software is governed by the CeCILL-B license under French law and */
+/* abiding by the rules of distribution of free software. You can use, */
+/* modify and/ or redistribute the software under the terms of the CeCILL-B */
+/* license as circulated by CEA, CNRS and INRIA at the following URL */
+/* "http://www.cecill.info". We also give a copy in LICENSE.txt. */
+/****************************************************************************/
+#ifndef _LITMUS_RAND_H
+#define _LITMUS_RAND_H 1
+
+#include <stdint.h>
+
+/* type of state for pseudorandom generators */
+typedef uint32_t st_t ;
+
+/* Unlocked random bit */
+
+int rand_bit(st_t *st) ;
+uint32_t rand_k(st_t *st,uint32_t n) ;
+
+#endif
new file mode 100644
@@ -0,0 +1,148 @@
+/****************************************************************************/
+/* the diy toolsuite */
+/* */
+/* Jade Alglave, University College London, UK. */
+/* Luc Maranget, INRIA Paris-Rocquencourt, France. */
+/* */
+/* Copyright 2015-present Institut National de Recherche en Informatique et */
+/* en Automatique and the authors. All rights reserved. */
+/* */
+/* This software is governed by the CeCILL-B license under French law and */
+/* abiding by the rules of distribution of free software. You can use, */
+/* modify and/ or redistribute the software under the terms of the CeCILL-B */
+/* license as circulated by CEA, CNRS and INRIA at the following URL */
+/* "http://www.cecill.info". We also give a copy in LICENSE.txt. */
+/****************************************************************************/
+#include <stdlib.h>
+#include <stdio.h>
+#include "outs.h"
+
+/**********************/
+/* Lexicographic tree */
+/**********************/
+
+#if 0
+static void debug(int *t, int i, int j) {
+ for (int k=i ; k <= j ; k++)
+ fprintf(stderr,"%i",t[k]) ;
+ fprintf(stderr,"\n") ;
+}
+#endif
+
+
+void *malloc_check(size_t sz) ;
+
+static outs_t *alloc_outs(intmax_t k) {
+ outs_t *r = malloc_check(sizeof(*r)) ;
+ r->k = k ;
+ r->c = 0 ;
+ r->show = 0 ;
+ r->next = r->down = NULL ;
+ return r ;
+}
+
+void free_outs(outs_t *p) {
+ if (p == NULL) return ;
+ free_outs(p->next) ;
+ free_outs(p->down) ;
+ free(p) ;
+}
+
+/* Worth writing as a loop, since called many times */
+static outs_t *loop_add_outcome_outs(outs_t *p, intmax_t *k, int i, count_t c, int show) {
+ outs_t *r = p ;
+ if (p == NULL || k[i] < p->k) {
+ r = alloc_outs(k[i]) ;
+ r->next = p ;
+ p = r ;
+ }
+ for ( ; ; ) {
+ outs_t **q ;
+ if (k[i] > p->k) {
+ q = &(p->next) ;
+ p = p->next ;
+ } else if (i <= 0) {
+ p->c += c ;
+ p->show = show || p->show ;
+ return r ;
+ } else {
+ i-- ;
+ q = &(p->down) ;
+ p = p->down ;
+ }
+ if (p == NULL || k[i] < p->k) {
+ outs_t *a = alloc_outs(k[i]) ;
+ a->next = p ;
+ p = a ;
+ *q = a ;
+ }
+ }
+}
+
+outs_t *add_outcome_outs(outs_t *p, intmax_t *k, int sz, count_t c, int show) {
+ return loop_add_outcome_outs(p,k,sz-1,c,show) ;
+}
+
+count_t sum_outs(outs_t *p) {
+ count_t r = 0 ;
+ for ( ; p ; p = p->next) {
+ r += p->c ;
+ r += sum_outs(p->down) ;
+ }
+ return r ;
+}
+
+int finals_outs(outs_t *p) {
+ int r = 0 ;
+ for ( ; p ; p = p->next) {
+ if (p->c > 0) r++ ;
+ r += finals_outs(p->down) ;
+ }
+ return r ;
+}
+
+void dump_outs (FILE *chan, dump_outcome *dout,outs_t *p, intmax_t *buff,int sz) {
+ for ( ; p ; p = p->next) {
+ buff[sz-1] = p->k ;
+ if (p->c > 0) {
+ dout(chan,buff,p->c,p->show) ;
+ } else if (p->down) {
+ dump_outs(chan,dout,p->down,buff,sz-1) ;
+ }
+ }
+}
+
+/* merge p and q into p */
+static outs_t *do_merge_outs(outs_t *p, outs_t *q) {
+ if (q == NULL) { // Nothing to add
+ return p ;
+ }
+ if (p == NULL || q->k < p->k) { // Need a cell
+ outs_t *r = alloc_outs(q->k) ;
+ r->next = p ;
+ p = r ;
+ }
+ if (p->k == q->k) {
+ p->c += q->c ;
+ p->show = p->show || q->show ;
+ p->down = do_merge_outs(p->down,q->down) ;
+ p->next = do_merge_outs(p->next,q->next) ;
+ } else {
+ p->next = do_merge_outs(p->next,q) ;
+ }
+ return p ;
+}
+
+outs_t *merge_outs(outs_t *p, outs_t *q, int sz) {
+ return do_merge_outs(p,q) ;
+}
+
+int same_outs(outs_t *p,outs_t *q) {
+ while (p && q) {
+ if (p->k != q->k || p->c != q->c || p->show != q->show) return 0 ;
+ if (!same_outs(p->down,q->down)) return 0 ;
+ p = p->next ;
+ q = q->next ;
+ }
+ return p == q ; /* == NULL */
+}
new file mode 100644
@@ -0,0 +1,49 @@
+/****************************************************************************/
+/* the diy toolsuite */
+/* */
+/* Jade Alglave, University College London, UK. */
+/* Luc Maranget, INRIA Paris-Rocquencourt, France. */
+/* */
+/* Copyright 2015-present Institut National de Recherche en Informatique et */
+/* en Automatique and the authors. All rights reserved. */
+/* */
+/* This software is governed by the CeCILL-B license under French law and */
+/* abiding by the rules of distribution of free software. You can use, */
+/* modify and/ or redistribute the software under the terms of the CeCILL-B */
+/* license as circulated by CEA, CNRS and INRIA at the following URL */
+/* "http://www.cecill.info". We also give a copy in LICENSE.txt. */
+/****************************************************************************/
+#ifndef _OUTS_H
+#define _OUTS_H 1
+
+#include <stdio.h>
+
+/************************/
+/* Histogram structure */
+/************************/
+
+
+/* 64bit counters, should be enough! */
+#include <inttypes.h>
+typedef uint64_t count_t;
+#define PCTR PRIu64
+
+
+
+
+typedef struct outs_t {
+ struct outs_t *next,*down ;
+ count_t c ;
+ intmax_t k ;
+ int show ;
+} outs_t ;
+
+void free_outs(outs_t *p) ;
+outs_t *add_outcome_outs(outs_t *p, intmax_t *o, int sz, count_t v, int show) ;
+int finals_outs(outs_t *p) ;
+count_t sum_outs(outs_t *p) ;
+typedef void dump_outcome(FILE *chan, intmax_t *o, count_t c, int show) ;
+void dump_outs (FILE *chan, dump_outcome *dout,outs_t *p, intmax_t *buff, int sz) ;
+outs_t *merge_outs(outs_t *p,outs_t *q, int sz) ;
+int same_outs(outs_t *p,outs_t *q) ;
+#endif
new file mode 100755
@@ -0,0 +1,55 @@
+date
+LITMUSOPTS="${@:-$LITMUSOPTS}"
+SLEEP=0
+if [ ! -f SAL.no ]; then
+cat <<'EOF'
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Results for x86.tests/SAL.litmus %
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+X86 SAL
+"Fre PodWR Fre PodWR"
+
+{x=0; y=0;}
+
+ P0 | P1 ;
+ MOV [x],$1 | MOV [y],$1 ;
+ MFENCE | MFENCE ;
+ MOV EAX,[y] | MOV EAX,[x] ;
+
+~exists (0:EAX=0 /\ 1:EAX=0)
+Generated assembler
+EOF
+cat SAL.t
+$QEMU ./SAL.exe -q $LITMUSOPTS
+ret=$?;
+if [ $ret -eq 1 ]; then
+ echo "FAILED";
+ exit $ret;
+fi
+fi
+sleep $SLEEP
+
+cat <<'EOF'
+Revision exported, version 7.22
+Command line: ../litmus-7.22/litmus -exit true -mach ../alex_litmus/overdrive01 -o run.x86 x86.tests/SAL.litmus
+Parameters
+#define SIZE_OF_TEST 100000
+#define NUMBER_OF_RUN 10
+#define AVAIL 0
+#define STRIDE 1
+#define MAX_LOOP 0
+/* gcc options: -D_GNU_SOURCE -DFORCE_AFFINITY -Wall -std=gnu99 -fomit-frame-pointer -O2 -pthread */
+/* barrier: user */
+/* launch: changing */
+/* affinity: incr0 */
+/* alloc: dynamic */
+/* memory: direct */
+/* stride: 1 */
+/* safer: write */
+/* preload: random */
+/* speedcheck: no */
+/* proc used: 0 */
+EOF
+head -1 comp.sh
+echo "LITMUSOPTS=$LITMUSOPTS"
+date
new file mode 100644
@@ -0,0 +1,2 @@
+/START _litmus_P/ { print $0 }
+/_litmus_P[0-9]+_[0-9]+/ { getline; print $0 ; }
new file mode 100644
@@ -0,0 +1,1148 @@
+/****************************************************************************/
+/* the diy toolsuite */
+/* */
+/* Jade Alglave, University College London, UK. */
+/* Luc Maranget, INRIA Paris-Rocquencourt, France. */
+/* */
+/* Copyright 2015-present Institut National de Recherche en Informatique et */
+/* en Automatique and the authors. All rights reserved. */
+/* */
+/* This software is governed by the CeCILL-B license under French law and */
+/* abiding by the rules of distribution of free software. You can use, */
+/* modify and/ or redistribute the software under the terms of the CeCILL-B */
+/* license as circulated by CEA, CNRS and INRIA at the following URL */
+/* "http://www.cecill.info". We also give a copy in LICENSE.txt. */
+/****************************************************************************/
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <pthread.h>
+#include <limits.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include "utils.h"
+
+/********/
+/* Misc */
+/********/
+
+FILE *errlog ;
+
+static void checkerrlog(void) {
+ if (!errlog) errlog = stderr ;
+}
+
+void seterrlog(FILE *chan) {
+ errlog = chan ;
+}
+
+int log_error(const char *fmt, ...) {
+ int result;
+ va_list args;
+ va_start(args, fmt);
+ checkerrlog() ;
+ result = vfprintf(errlog, fmt, args);
+ fflush(errlog);
+ va_end(args);
+ return result;
+}
+
+void fatal(char *msg) {
+ log_error("Failure: %s\n", msg) ;
+ fclose(errlog);
+ fprintf(stdout,"Failure: %s\n", msg) ;
+ exit(1) ;
+}
+
+void errexit(char *msg,int err) {
+ log_error("%s: %s\n",msg,strerror(err)) ;
+ fclose(errlog);
+ exit(2) ;
+}
+
+void *malloc_check(size_t sz) {
+ if (sz == 0) return NULL ;
+ void *p = malloc(sz) ;
+ if (!p) {
+ if (!errno) errno = ENOMEM ;
+ errexit("malloc",errno) ;
+ }
+ return p ;
+}
+
+int max(int n, int m) { return n < m ? m : n ; }
+
+void pp_ints(FILE *fp,int *p,int n) {
+ if (n > 0) {
+ fprintf(fp,"%i",p[0]) ;
+ for (int k = 1 ; k < n ; k++) {
+ fprintf(fp,",%i",p[k]) ;
+ }
+ }
+}
+
+
+void *do_align(void *p,size_t sz) {
+ uintptr_t x = (uintptr_t)p ;
+ x += sz-1 ;
+ x /= sz ;
+ x *= sz ;
+ return (void *)x ;
+}
+
+void *do_noalign(void *p,size_t sz) {
+ void *q = do_align(p,sz) ;
+ void *r = q - sz/2 ;
+ if (r < p) r = q + sz/2 ;
+ return r ;
+}
+
+void cat_file(char *path, char *msg, FILE *out) {
+ FILE *fp = fopen(path,"r") ;
+ if (fp == NULL) return ;
+ fprintf(out,"%s\n",msg) ;
+ int c,nl=1 ;
+ while ((c = fgetc(fp)) != EOF) {
+ fputc(c,out) ;
+ nl = c == '\n' ;
+ }
+ fclose(fp) ;
+ if (!nl) fputc('\n',out) ;
+}
+
+/************/
+/* CPU sets */
+/************/
+
+cpus_t *cpus_create(int sz) {
+ cpus_t *r = malloc_check(sizeof(*r)) ;
+ r->sz = sz ;
+ r->cpu = malloc_check(sizeof(r->cpu[0])*sz) ;
+ return r ;
+}
+
+cpus_t *cpus_create_init(int sz, int t[]) {
+ cpus_t *r = cpus_create(sz) ;
+ for (int k = 0 ; k < sz ; k++) r->cpu[k] = t[k] ;
+ return r ;
+}
+
+void cpus_free(cpus_t *p) {
+ free(p->cpu) ;
+ free(p) ;
+}
+
+void cpus_dump(FILE *fp, cpus_t *p) {
+ pp_ints(fp,p->cpu,p->sz) ;
+}
+
+void cpus_dump_test(FILE *fp, int *p, int sz, cpus_t *cm,int nprocs) {
+ for (int k = 0 ; k < sz ; k += nprocs) {
+ fprintf(fp,"[") ;
+ pp_ints(fp,&p[k],nprocs) ;
+ fprintf(fp,"] {") ;
+ if (nprocs > 0) {
+ fprintf(fp,"%i",cm->cpu[p[k]]) ;
+ for (int i = 1 ; i < nprocs ; i++) {
+ fprintf(fp,",%i",cm->cpu[p[k+i]]) ;
+ }
+ }
+ fprintf(fp,"}\n") ;
+ }
+}
+
+/*************/
+/* Int array */
+/*************/
+
+
+void ints_dump(FILE *fp, ints_t *p) {
+ if (p->sz > 0) {
+ fprintf(fp,"%i:%i",0,p->t[0]) ;
+ for (int k = 1 ; k < p->sz ; k++) {
+ fprintf(fp,",%i:%i",k,p->t[k]) ;
+ }
+ }
+}
+
+/***********************/
+/* Prefetch directives */
+/***********************/
+void prefetch_dump(FILE *fp, prfdirs_t *p) {
+ prfproc_t *q = p->t ;
+ int some = 0 ;
+ for (int _p = 0 ; _p < p->nthreads ; _p++) {
+ int nvars = q[_p].nvars ;
+ prfone_t *r = q[_p].t ;
+ for (int _v = 0 ; _v < nvars ; _v++) {
+ prfdir_t dir = r[_v].dir ;
+ if (dir != none) {
+ char c = 'I' ;
+ if (dir == flush) c = 'F' ;
+ else if (dir == touch) c = 'T' ;
+ else if (dir == touch_store) c = 'W' ;
+ if (some) {
+ fprintf(fp,",") ;
+ } else {
+ some = 1 ;
+ }
+ fprintf(fp,"%i:%s=%c",_p,r[_v].name,c) ;
+ }
+ }
+ }
+}
+
+static void set_prefetch(prfdirs_t *p, prfdir_t d) {
+ prfproc_t *q = p->t ;
+ for (int _p = 0 ; _p < p->nthreads ; _p++) {
+ int nvars = q[_p].nvars ;
+ prfone_t *r = q[_p].t ;
+ for (int _v = 0 ; _v < nvars ; _v++) {
+ r[_v].dir = d ;
+ }
+ }
+}
+
+/* ??? */
+
+int gcd(int a, int b) {
+ for ( ; ; ) {
+ if (a == 0) return b ;
+ int tmp = a ;
+ a = b % a ;
+ b = tmp ;
+ }
+}
+
+/* SMT description */
+
+
+cpus_t *coremap_seq(int navail, int nways) {
+ cpus_t *r = cpus_create(navail) ;
+ int ncores = navail / nways ;
+ int i = 0 ;
+ for (int c = 0 ; c < ncores ; c++) {
+ for (int k = 0 ; k < nways ; k++) {
+ r->cpu[i++] = c ;
+ }
+ }
+ return r ;
+}
+
+cpus_t *coremap_end(int navail, int nways) {
+ cpus_t *r = cpus_create(navail) ;
+ int ncores = navail / nways ;
+ int i = 0 ;
+ for (int k = 0 ; k < nways ; k++) {
+ for (int c = 0 ; c < ncores ; c++) {
+ r->cpu[i++] = c ;
+ }
+ }
+ return r ;
+}
+
+typedef struct {
+ int ncores ;
+ cpus_t **core ;
+} mapcore_t ;
+
+
+static void mapcore_free(mapcore_t *p) {
+ for (int c = 0 ; c < p->ncores ; c++) cpus_free(p->core[c]) ;
+ free(p->core) ;
+ free(p) ;
+}
+
+#if 0
+static mapcore_t *inverse_coremap(cpus_t *p, int nways) {
+ mapcore_t *r = malloc_check(sizeof(*r)) ;
+ r->ncores = p->sz / nways ;
+ r->core = malloc_check(r->ncores * sizeof(r->core[0])) ;
+ for (int k = 0 ; k < r->ncores ; k++) {
+ r->core[k] = cpus_create(nways) ;
+ r->core[k]->sz = 0 ;
+ }
+ for (int k = 0 ; k < p->sz ; k++) {
+ int c = p->cpu[k] ;
+ cpus_t *q = r->core[c] ;
+ q->cpu[q->sz++] = k ;
+ }
+ return r ;
+}
+#endif
+
+static int get_ncores(cpus_t *cm) {
+ int r = 0;
+ for (int k = 0 ; k < cm->sz ; k++) {
+ if (cm->cpu[k] > r) r = cm->cpu[k] ;
+ }
+ return r+1 ;
+}
+
+cpus_t *get_core_procs(cpus_t *cm, cpus_t *p,int c) {
+ int sz = 0 ;
+ cpus_t *r ;
+ for (int k = 0 ; k < p->sz ; k++) {
+ if (cm->cpu[p->cpu[k]] == c) sz++ ;
+ }
+ r = cpus_create(sz) ;
+ int i = 0 ;
+ for (int k = 0 ; k < p->sz ; k++) {
+ int proc = p->cpu[k] ;
+ if (cm->cpu[proc] == c) r->cpu[i++] = proc ;
+ }
+ return r ;
+}
+
+static mapcore_t *inverse_procs(cpus_t *cm, cpus_t *p) {
+ int ncores = get_ncores(cm) ;
+ mapcore_t *r = malloc_check(sizeof(*r)) ;
+ r->ncores = ncores ;
+ r->core = malloc_check(sizeof(r->core[0])*ncores) ;
+ for (int c = 0 ; c < ncores ; c++) {
+ r->core[c] = get_core_procs(cm,p,c) ;
+ }
+ return r ;
+}
+
+static int get_node_sz(int *p) {
+ int r = 0 ;
+ while (*p++ >= 0) r++ ;
+ return r ;
+}
+
+static int get_n(int **p) {
+ int r = 0 ;
+ while (*p) {
+ r += get_node_sz(*p) ;
+ p++ ;
+ }
+ return r ;
+}
+
+static int ok_one_color(int *cm,int *d,int *a,int n, int p, int c) {
+ for (int k = 0 ; k < n ; k++) {
+ int op = a[k] ;
+ if (op >= 0) {
+ if (d[n*p+k]) {
+ int oc = cm[op] ;
+ if (oc == c) {
+ return 0 ;
+ }
+ }
+ }
+ }
+ return 1 ;
+}
+
+static int ok_color(int *cm,int *d,int *a,int n, int *q, int c) {
+ for ( ; *q >= 0 ; q++) {
+ if (!ok_one_color(cm,d,a,n,*q,c)) return 0 ;
+ }
+ return 1 ;
+}
+
+static int find_color_diff
+(int prev,st_t *st,int *cm,mapcore_t *mc,int *d, int *a,int n, int *q) {
+ int sz = get_node_sz(q) ;
+ int k0 = prev >= 0 && rand_bit(st) ? prev : rand_k(st,mc->ncores) ;
+ int k = k0 ;
+ do {
+ cpus_t *p = mc->core[k] ;
+ if (p->sz >= sz && ok_color(cm,d,a,n,q,k)) return k ;
+ k++ ; k %= mc->ncores ;
+ } while (k != k0) ;
+ return -1 ;
+}
+
+
+static int find_one_proc
+(int prev,st_t *st,int *cm,mapcore_t *mc,int *d,int *a,int n,int p) {
+ int found = -1 ;
+ int k0 = prev >= 0 && rand_bit(st) ? prev : rand_k(st,mc->ncores) ;
+ int k = k0 ;
+ do {
+ cpus_t *pk = mc->core[k] ;
+ if (pk->sz > 0) {
+ if (found < 0) found = k ;
+ if (ok_one_color(cm,d,a,n,p,k)) return k ;
+ }
+ k++ ; k %= mc->ncores ;
+ } while (k != k0) ;
+ if (found < 0) fatal("Cannot allocate threads") ;
+ return found ;
+}
+
+void custom_affinity (st_t *st,cpus_t *cm,int **color,int *diff,cpus_t *aff_cpus,int n_exe, int *r) {
+ mapcore_t *mc = inverse_procs(cm,aff_cpus) ;
+ int n = get_n(color) ;
+ /* Diff relation as matrix */
+ int d[n*n] ;
+ {
+ int *q = diff ;
+ for (int k = 0 ; k < n*n ; k++) d[k] = 0 ;
+ while (*q >= 0) {
+ int x = *q++, y = *q++ ;
+ d[n*x+y] = d[n*y+x] = 1 ;
+ }
+ }
+ for (int k = 0 ; k < n_exe ; k++) {
+ int *a = &r[k*n] ;
+ int prev_core = -1 ;
+ for (int i = 0 ; i < n ; i++) a[i] = -1 ;
+ for (int **q = color ; *q ; q++) {
+ int c = find_color_diff(prev_core,st,aff_cpus->cpu,mc,d,a,n,*q) ;
+ if (c >= 0) {
+ cpus_t *p = mc->core[c] ;
+ for (int *qq = *q ; *qq >= 0 ; qq++) {
+ p->sz-- ;
+ a[*qq] = p->cpu[p->sz] ;
+ }
+ prev_core = c ;
+ } else {
+ for (int *qq = *q ; *qq >= 0 ; qq++) {
+ int c = find_one_proc(prev_core,st,aff_cpus->cpu,mc,d,a,n,*qq) ;
+ cpus_t *p = mc->core[c] ;
+ p->sz-- ;
+ a[*qq] = p->cpu[p->sz] ;
+ prev_core = c ;
+ }
+ }
+ }
+ }
+ mapcore_free(mc) ;
+}
+
+/****************/
+/* Command line */
+/****************/
+
+/* usage */
+
+static void usage(char *prog, cmd_t *d) {
+ log_error("usage: %s (options)*\n",prog) ;
+ log_error(" -v be verbose\n") ;
+ log_error(" -q be quiet\n") ;
+ log_error(" -a <n> run maximal number of tests for n available processors (default %i)\n",d->avail) ;
+ log_error(" -n <n> run n tests concurrently\n") ;
+ log_error(" -r <n> perform n runs (default %i)\n",d->max_run) ;
+ log_error(" -fr <f> multiply run number per f\n") ;
+ log_error(" -s <n> outcomes per run (default %i)\n",d->size_of_test) ;
+ if (d->stride > 0) {
+ log_error(" -st <n> stride (default %i)\n",d->stride) ;
+ }
+ log_error(" -fs <f> multiply outcomes per f\n") ;
+ log_error(" -f <f> multiply outcomes per f, divide run number by f\n") ;
+ if (d->aff_mode != aff_none) {
+ log_error(" -i <n> increment for allocating logical processors, -i 0 disables affinity mode") ;
+ if (d->aff_mode == aff_incr) {
+ log_error(" (default %i)\n",d->aff_incr) ;
+ } else {
+ log_error("\n") ;
+ }
+ log_error(" -p <ns> specify logical processors (default '") ;
+ cpus_dump(errlog,d->aff_cpus) ;
+ log_error("')\n") ;
+ log_error(" +ra randomise affinity%s\n",d->aff_mode == aff_random ? " (default)" : "") ;
+ if (d->aff_custom_enabled) {
+ log_error(" +ca enable custom affinity%s\n",d->aff_mode == aff_custom ? " (default)" : "") ;
+ } else {
+ log_error(" +ca alias for +ra\n") ;
+ }
+ if (d->aff_scan_enabled) {
+ log_error(" +sa enable scanning affinity%s\n",d->aff_mode == aff_scan ? " (default)" : "") ;
+ log_error(" +ta <topo> set topology affinity\n") ;
+ } else {
+ log_error(" +sa alias for +ra\n") ;
+ }
+ }
+ if (d->shuffle >= 0) {
+ log_error(" +rm randomise memory accesses%s\n",d->shuffle ? " (default)" : "") ;
+ log_error(" -rm do not randomise memory accesses%s\n",!d->shuffle ? " (default)" : "") ;
+ }
+ if (d->speedcheck >= 0) {
+ log_error(" +sc stop as soon as possible%s\n",d->speedcheck ? " (default)" : "") ;
+ log_error(" -sc run test completly%s\n",!d->speedcheck ? " (default)" : "") ;
+ }
+ if (!d->fix) {
+ log_error(" +fix fix thread launch order\n") ;
+ }
+ if (d->delta_tb) {
+ log_error(" -tb <list> set timebase delays, default '") ;
+ ints_dump(errlog,d->delta_tb) ;
+ log_error("'\n") ;
+ log_error(" List syntax is comma separated proc:delay\n") ;
+ log_error(" -ta <n> set all timebase delays\n") ;
+ }
+ if (d->verbose_barrier >= 0) {
+ log_error(" +vb show iteration timings%s\n",d->verbose_barrier ? " (default)" : "") ;
+ log_error(" -vb do not show iteration timings%s\n",!d->verbose_barrier ? " (default)" : "") ;
+ }
+ if (d->prefetch) {
+ log_error(" -pra (I|F|T|W) set all prefetch\n") ;
+ log_error(" -prf <list> set prefetch, default '") ;
+ prefetch_dump(errlog,d->prefetch) ;
+ log_error("'\n") ;
+ log_error(" List syntax is comma separated proc:name=(I|F|T|W)\n") ;
+ }
+ if (d->static_prefetch >= 0) {
+ log_error(" -prs <n> prefetch probability is 1/n, -prs 0 disables feature, default %i\n",d->static_prefetch) ;
+ }
+ if (d->max_loop > 0) {
+ log_error(" -l <n> measure time by running assembly in a loop of size <n> (default %i)\n",d->max_loop) ;
+ }
+ if (d->prelude > 0) {
+ log_error(" -vp no verbose prelude\n") ;
+ }
+ if (d->sync_n > 0) {
+ log_error(" -k <n> undocumented (default %i)\n",d->sync_n) ;
+ }
+ exit(2) ;
+}
+
+static long my_add (long x, long y) {
+ long r = x+y ;
+ if (r < x || r < y) { errno = ERANGE ; fatal("overflow") ; }
+ return r ;
+}
+
+static long my_pow10(int p,long x) {
+ long r = x ;
+ for ( ; p > 0 ; p--) {
+ long y2 = my_add(r,r) ;
+ long y4 = my_add(y2,y2) ;
+ long y8 = my_add(y4,y4) ;
+ r = my_add(y8,y2) ;
+ }
+ if (r >= INT_MAX || r <= 0) { errno = ERANGE ; fatal("overflow") ; }
+ return r ;
+}
+
+static int do_argint(char *p, char **q) {
+ long r = strtol(p,q,10) ;
+ if (errno == ERANGE) { fatal("overflow") ; }
+ if (**q == 'k' || **q == 'K') { r = my_pow10(3,r) ; *q += 1; }
+ else if (**q == 'm' || **q == 'M') { r = my_pow10(6,r) ; *q +=1 ; }
+ return (int)r ;
+}
+
+static int argint(char *prog,char *p,cmd_t *d) {
+ char *q ;
+ long r = do_argint(p,&q) ;
+ if (*p == '\0' || *q != '\0') {
+ usage(prog,d) ;
+ }
+ return (int)r ;
+}
+
+static cpus_t *argcpus(char *prog,char *p0,cmd_t *d) {
+ int sz = 0 ;
+ char *p ;
+
+ p = p0 ;
+ for ( ; ; ) {
+ char *q ;
+ int x = (int)strtol(p,&q,10) ;
+ if (x < 0 || *p == '\0' || (*q != '\0' && *q != ',')) usage(prog,d) ;
+ sz++ ;
+ if (*q == '\0') break ;
+ p = q+1 ;
+ }
+ cpus_t *r = cpus_create(sz) ;
+ p = p0 ;
+ for (int k = 0 ; k < sz ; k++) {
+ char *q ;
+ r->cpu[k] = (int)strtol(p,&q,10) ;
+ p = q+1 ;
+ }
+ return r ;
+}
+
+static void argints(char *prog,cmd_t *d, char *p,ints_t *r) {
+ while (*p) {
+ char *q ;
+ int idx = (int)strtol(p,&q,10) ;
+ if (idx < 0 || idx >= r->sz || *p == '\0' || *q != ':') usage(prog,d) ;
+ p = q+1 ;
+ int v = do_argint(p,&q) ;
+ if (*p == '\0' || (*q != '\0' && *q != ',')) usage(prog,d) ;
+ r->t[idx] = v ;
+ if (*q == '\0') {
+ p = q ;
+ } else {
+ p = q+1 ;
+ }
+ }
+}
+
+static prfone_t *get_name_slot(prfproc_t *p,char *name) {
+ int nvars = p->nvars ;
+ prfone_t *q = p->t ;
+ for (int _v = 0 ; _v < nvars ; _v++) {
+ if (strcmp(name,q[_v].name) == 0) return &q[_v] ;
+ }
+ return NULL ; /* Name not found */
+}
+
+
+static void argoneprefetch(char *prog,cmd_t *d, char *p, prfdirs_t *r) {
+ prfdir_t dir = none ;
+ switch (*p) {
+ case 'F':
+ dir = flush ;
+ break ;
+ case 'T':
+ dir = touch ;
+ break ;
+ case 'W':
+ dir = touch_store ;
+ break ;
+ }
+ set_prefetch(r,dir) ;
+}
+
+int parse_prefetch(char *p, prfdirs_t *r) {
+ if (!*p) return 1 ;
+ for ( ;; ) {
+ char *q ;
+ int proc = (int)strtol(p,&q,10) ;
+ if (proc < 0 || proc >= r->nthreads || *p == '\0' || *q != ':')
+ return 0 ;
+ p = q+1 ;
+ char *p0 = p ;
+ while (*p != '=') {
+ if (*p == '\0') return 0 ;
+ p++ ;
+ }
+ *p = '\0' ;
+ prfone_t *loc_slot = get_name_slot(&r->t[proc],p0) ;
+ if (loc_slot == NULL) {
+ log_error("Proc %i does not access variable %s\n",proc,p0) ;
+ *p = '=' ;
+ return 0 ;
+ }
+ *p = '=' ;
+ char c = *++p;
+ prfdir_t dir = none ;
+ switch (c) {
+ case 'F':
+ dir = flush ;
+ break ;
+ case 'T':
+ dir = touch ;
+ break ;
+ case 'W':
+ dir = touch_store ;
+ break ;
+ }
+ loc_slot->dir = dir ;
+ c = *++p ;
+ if (c == '\0') return 1 ;
+ else if (c == ',') p++ ;
+ else return 0 ;
+ }
+}
+
+static void argprefetch(char *prog,cmd_t *d, char *p, prfdirs_t *r) {
+ if (!parse_prefetch(p,r)) usage(prog,d) ;
+}
+
+static double argdouble(char *prog,char *p,cmd_t *d) {
+ char *q ;
+ double r = strtod(p,&q) ;
+ if (*p == '\0' || *q != '\0') {
+ usage(prog,d) ;
+ }
+ return r ;
+}
+
+void parse_cmd(int argc, char **argv, cmd_t *d, cmd_t *p) {
+ char *prog = argv[0] ;
+
+ /* Options */
+ for ( ; ; ) {
+ --argc ; ++argv ;
+ if (!*argv) break ;
+ char fst = **argv ;
+ if (fst != '-' && fst != '+') break ;
+ if (strcmp(*argv,"-q") == 0) p->verbose=0 ;
+ else if (strcmp(*argv,"-v") == 0) p->verbose++ ;
+ else if (strcmp(*argv,"-r") == 0) {
+ --argc ; ++argv ;
+ if (!*argv) usage(prog,d) ;
+ p->max_run = argint(prog,argv[0],d) ;
+ } else if (strcmp(*argv,"-fr") == 0) {
+ --argc ; ++argv ;
+ if (!*argv) usage(prog,d) ;
+ p->max_run *= argdouble(prog,argv[0],d) ;
+ } else if (strcmp(*argv,"-s") == 0) {
+ --argc ; ++argv ;
+ if (!*argv) usage(prog,d) ;
+ p->size_of_test = argint(prog,argv[0],d) ;
+ } else if (d->stride > 0 && strcmp(*argv,"-st") == 0) {
+ --argc ; ++argv ;
+ if (!*argv) usage(prog,d) ;
+ p->stride = argint(prog,argv[0],d) ;
+ if (p->stride <= 0) p->stride = 1 ;
+ } else if (strcmp(*argv,"-fs") == 0) {
+ --argc ; ++argv ;
+ if (!*argv) usage(prog,d) ;
+ p->size_of_test *= argdouble(prog,argv[0],d) ;
+ } else if (strcmp(*argv,"-f") == 0) {
+ --argc ; ++argv ;
+ if (!*argv) usage(prog,d) ;
+ double f = argdouble(prog,argv[0],d) ;
+ p->size_of_test *= f ;
+ p->max_run /= f ;
+ } else if (strcmp(*argv,"-n") == 0) {
+ --argc ; ++argv ;
+ if (!*argv) usage(prog,d) ;
+ p->n_exe = argint(prog,argv[0],d) ;
+ if (p->n_exe < 1) p->n_exe = 1 ;
+ } else if (strcmp(*argv,"-a") == 0) {
+ --argc ; ++argv ;
+ if (!*argv) usage(prog,d) ;
+ int a = argint(prog,argv[0],d) ;
+ p->avail = a ;
+ } else if (d->sync_n > 0 && strcmp(*argv,"-k") == 0) {
+ --argc ; ++argv ;
+ if (!*argv) usage(prog,d) ;
+ int a = argint(prog,argv[0],d) ;
+ p->sync_n = a < 0 ? 0 : a ;
+ } else if (d->aff_mode != aff_none && strcmp(*argv,"-i") == 0) {
+ --argc ; ++argv ;
+ if (!*argv) usage(prog,d) ;
+ int i = argint(prog,argv[0],d) ;
+ p->aff_mode = aff_incr ;
+ p->aff_incr = i < 0 ? 0 : i ;
+ } else if (d->aff_mode != aff_none && strcmp(*argv,"-p") == 0) {
+ --argc ; ++argv ;
+ if (!*argv) usage(prog,d) ;
+ cpus_t *cpus = argcpus(prog,argv[0],d) ;
+ p->aff_cpus = cpus ;
+ } else if (d->aff_mode != aff_none && strcmp(*argv,"+ra") == 0) {
+ p->aff_mode = aff_random ;
+ } else if (d->aff_custom_enabled && strcmp(*argv,"+ca") == 0) {
+ p->aff_mode = aff_custom ;
+ } else if (d->aff_mode != aff_none && strcmp(*argv,"+ca") == 0) {
+ p->aff_mode = aff_random ;
+ } else if (d->aff_scan_enabled && strcmp(*argv,"+sa") == 0) {
+ p->aff_mode = aff_scan ;
+ } else if (d->aff_mode != aff_none && strcmp(*argv,"+sa") == 0) {
+ p->aff_mode = aff_random ;
+ } else if (d->aff_scan_enabled && strcmp(*argv,"+ta") == 0) {
+ p->aff_mode = aff_topo ;
+ --argc ; ++argv ;
+ if (!*argv) usage(prog,d) ;
+ p->aff_topo = argv[0] ;
+ } else if (d->aff_mode != aff_none && strcmp(*argv,"+sa") == 0) {
+ p->aff_mode = aff_random ;
+ } else if (d->shuffle >= 0 && strcmp(*argv,"+rm") == 0) {
+ p->shuffle = 1 ;
+ } else if (d->shuffle >= 0 && strcmp(*argv,"-rm") == 0) {
+ p->shuffle = 0 ;
+ } else if (d->speedcheck >= 0 && strcmp(*argv,"+sc") == 0) {
+ p->speedcheck = 1 ;
+ } else if (d->speedcheck >= 0 && strcmp(*argv,"-sc") == 0) {
+ p->speedcheck = 0 ;
+ } else if (!d->fix && strcmp(*argv,"+fix") == 0) {
+ p->fix = 1 ;
+ } else if (d->verbose_barrier >= 0 && strcmp(*argv,"+vb") == 0) {
+ p->verbose_barrier++ ;
+ } else if (d->verbose_barrier >= 0 && strcmp(*argv,"-vb") == 0) {
+ p->verbose_barrier = 0 ;
+ } else if (d->prelude > 0 && strcmp(*argv,"-vp") == 0) {
+ p->prelude = 0 ;
+ } else if (d->delta_tb && strcmp(*argv,"-tb") == 0) {
+ --argc ; ++argv ;
+ if (!*argv) usage(prog,d) ;
+ argints(prog,d,argv[0],p->delta_tb) ;
+ } else if (d->delta_tb && strcmp(*argv,"-ta") == 0) {
+ --argc ; ++argv ;
+ if (!*argv) usage(prog,d) ;
+ int da = argint(prog,argv[0],d) ;
+ for (int k = 0 ; k < p->delta_tb->sz ; k++) p->delta_tb->t[k] = da ;
+ } else if (d->prefetch && strcmp(*argv,"-prf") == 0) {
+ --argc ; ++argv ;
+ if (!*argv) usage(prog,d) ;
+ argprefetch(prog,d,argv[0],p->prefetch) ;
+ } else if (d->prefetch && strcmp(*argv,"-pra") == 0) {
+ --argc ; ++argv ;
+ if (!*argv) usage(prog,d) ;
+ argoneprefetch(prog,d,argv[0],p->prefetch) ;
+ } else if (d->static_prefetch >= 0 && strcmp(*argv,"-prs") == 0) {
+ --argc ; ++argv ;
+ if (!*argv) usage(prog,d) ;
+ int prs = argint(prog,argv[0],d) ;
+ p->static_prefetch = prs >= 0 ? prs : 0 ;
+ } else if (d->max_loop > 0 && strcmp(*argv,"-l") == 0) {
+ --argc ; ++argv ;
+ if (!*argv) usage(prog,d) ;
+ int i = argint(prog,argv[0],d) ;
+ p->max_loop = i < 1 ? 1 : i ;
+ } else usage(prog,d) ;
+ }
+
+ /* Argument */
+ if (argc == 0) return ;
+ usage(prog,d) ;
+}
+
+/*************************/
+/* Concurrency utilities */
+/*************************/
+
+/* phread based mutex */
+
+pm_t *pm_create(void) {
+ pm_t *p = malloc_check(sizeof(*p)) ;
+ int ret = pthread_mutex_init(p,NULL) ;
+ if (ret) { errexit("mutex_init",ret) ; }
+ return p ;
+}
+
+void pm_free(pm_t *p) {
+ free(p) ;
+}
+
+void pm_lock(pm_t *m) {
+ int ret = pthread_mutex_lock(m) ;
+ if (ret) { errexit("mutex_lock",ret) ; }
+}
+
+void pm_unlock(pm_t *m) {
+ int ret = pthread_mutex_unlock(m) ;
+ if (ret) { errexit("mutex_unlock",ret) ; }
+}
+
+/* phread condition */
+
+pc_t *pc_create(void) {
+ pc_t *p = malloc_check(sizeof(*p)) ;
+ p->c_mutex = pm_create() ;
+ p->c_cond = malloc_check(sizeof(*(p->c_cond))) ;
+ int e = pthread_cond_init(p->c_cond,NULL) ;
+ if (e) { errexit("cond_init",e); }
+ return p ;
+}
+
+void pc_free(pc_t *p) {
+ pm_free(p->c_mutex) ;
+ free(p->c_cond) ;
+ free(p) ;
+}
+
+static void pc_lock(pc_t *p) {
+ pm_lock(p->c_mutex) ;
+}
+
+static void pc_unlock(pc_t *p) {
+ pm_unlock(p->c_mutex) ;
+}
+
+void pc_wait(pc_t *p) {
+ int e = pthread_cond_wait(p->c_cond, p->c_mutex) ;
+ if (e) { errexit("cond_wait",e) ; }
+}
+
+void pc_broadcast (pc_t *p) {
+ int e = pthread_cond_broadcast(p->c_cond) ;
+ if (e) { errexit("cond_broadcast",e) ; }
+}
+
+static void pc_signal(pc_t *p) {
+ int e = pthread_cond_signal(p->c_cond);
+ if (e) errexit("cond_signal",e) ;
+}
+
+
+/* pthread based barrier, usable for nproc threads */
+
+
+pb_t *pb_create(int nprocs) {
+ pb_t *p = malloc_check(sizeof(*p)) ;
+ p->cond = pc_create() ;
+ p->count = p->nprocs = nprocs ;
+ p->turn = 0 ;
+ return p ;
+}
+
+void pb_free(pb_t *p) {
+ pc_free(p->cond) ;
+ free(p) ;
+}
+
+/* The following code should protect us against spurious wake ups */
+void pb_wait(pb_t *p) {
+ pc_lock(p->cond) ;
+ int t = p->turn ;
+ --p->count ;
+ if (p->count == 0) {
+ p->count = p->nprocs ;
+ p->turn = !t ;
+ pc_broadcast(p->cond) ;
+ } else {
+ do {
+ pc_wait(p->cond) ;
+ } while (p->turn == t) ;
+ }
+ pc_unlock(p->cond) ;
+}
+
+
+/* pthread based or flag */
+
+po_t *po_create(int nprocs) {
+ po_t *p = malloc_check(sizeof(*p)) ;
+ p->cond = pc_create() ;
+ p->nprocs = p->count = nprocs ;
+ p->val = 0 ;
+ p->turn = 0 ;
+ return p ;
+}
+
+void po_free(po_t *p) {
+ pc_free(p->cond) ;
+ free(p) ;
+}
+
+void po_reinit(po_t *p) {
+ pc_lock(p->cond) ;
+ int t = p->turn ;
+ --p->count ;
+ if (p->count == 0) {
+ p->count = p->nprocs ;
+ p->val = 0 ;
+ p->turn = !t ;
+ pc_broadcast(p->cond) ;
+ } else {
+ do {
+ pc_wait(p->cond) ;
+ } while (p->turn == t) ;
+ }
+ pc_unlock(p->cond) ;
+}
+
+int po_wait(po_t *p, int v) {
+ pc_lock(p->cond) ;
+ int t = p->turn ;
+ --p->count ;
+ p->val = p->val || v ;
+ if (p->count == 0) {
+ p->count = p->nprocs ;
+ p->turn = !t ;
+ pc_broadcast(p->cond) ;
+ } else {
+ do {
+ pc_wait(p->cond) ;
+ } while (p->turn == t) ;
+ }
+ int r = p->val ;
+ pc_unlock(p->cond) ;
+ return r ;
+}
+
+
+/* One place buffer */
+
+op_t *op_create(void) {
+ op_t *p = malloc_check(sizeof(*p)) ;
+ p->cond = pc_create() ;
+ p->val = NULL ;
+ p->some = 0 ;
+ return p;
+}
+
+void op_free(op_t *p) {
+ pc_free(p->cond) ;
+ free(p) ;
+}
+
+void op_set(op_t *p, void *v) {
+ pc_lock(p->cond) ;
+ if (p->some) { fatal("op_set") ; }
+ p->val = v ;
+ p->some = 1 ;
+ pc_signal(p->cond) ;
+ pc_unlock(p->cond) ;
+}
+
+void *op_get(op_t *p) {
+ void *v = NULL ;
+ pc_lock(p->cond) ;
+ while (!p->some) {
+ pc_wait(p->cond) ;
+ }
+ v = (void *) p->val ;
+ p->val = NULL ;
+ p->some = 0 ;
+ pc_unlock(p->cond) ;
+ return v ;
+}
+
+/* Thread launch and join */
+
+void launch(pthread_t *th, f_t *f, void *a) {
+ int e = pthread_create(th,NULL,f,a);
+ if (e) errexit("phread_create",e);
+}
+
+void *join(pthread_t *th) {
+ void *r ;
+ int e = pthread_join(*th,&r) ;
+ if (e) errexit("pthread_join",e);
+ return r ;
+}
+
+/* Detached */
+
+typedef struct {
+ f_t *f;
+ void *a ;
+ op_t *op;
+} detarg_t ;
+
+static void *zyva_det(void *_b) {
+ detarg_t *b = (detarg_t *)_b;
+ f_t *f = b->f ;
+ void *a = b->a ;
+ op_t *op = b->op ;
+ free(b) ;
+ int e = pthread_detach(pthread_self());
+ if (e) errexit("pthread_detach",e) ;
+ void *r = f(a) ;
+ op_set(op,r) ;
+ return NULL ;
+}
+
+op_t *launch_detached(f_t *f,void *a) {
+ op_t *op = op_create() ;
+ detarg_t *b = malloc_check(sizeof(*b)) ;
+ b->f = f ; b->a = a; b->op = op ;
+ pthread_t th ;
+ launch(&th,zyva_det,b) ;
+ return op ;
+}
+
+void *join_detached(op_t *op) {
+ void *r = op_get(op) ;
+ op_free(op) ;
+ return r ;
+}
+
+/* Thread cache */
+
+void *start_thread(void *_a) {
+ sarg_t *_b = (sarg_t *)_a ;
+ for (int _k = _b->max_run ; _k > 0 ; _k--) {
+ void *_c = op_get(_b->op_arg) ;
+ f_t *f = (f_t *)_c ;
+ if (f == NULL) break ;
+ void *ret = f(_b->arg) ;
+ op_set(_b->op_ret,ret) ;
+ }
+ return NULL ;
+}
+
+/*****************/
+/* Random things */
+/*****************/
+
+void perm_prefix_ints(unsigned *st,int *_t, int m, int n) {
+ int k;
+ for (k = 0 ; k < m ; k++) {
+ int j = k+rand_k(st,n-k);
+ int x = _t[k]; _t[k] = _t[j]; _t[j] = x;
+ }
+}
+
+void perm_ints(unsigned *st,int *_t, int n) {
+ perm_prefix_ints(st, _t,n-1,n) ;
+}
+
+void perm_funs(unsigned *st,f_t *fun[], int n) {
+ int k;
+ for (k = 0 ; k < n-1 ; k++) {
+ int j = k+rand_k(st,n-k);
+ f_t *t = fun[j];
+ fun[j] = fun[k]; fun[k] = t;
+ }
+}
+
+void perm_ops(unsigned *st,op_t *op[], int n) {
+ int k;
+ for (k = 0 ; k < n-1 ; k++) {
+ int j = k+rand_k(st,n-k);
+ op_t *t = op[j];
+ op[j] = op[k]; op[k] = t;
+ }
+}
+
+void perm_threads(unsigned *st,pthread_t thread[], int n) {
+ int k;
+ for (k = 0 ; k < n-1 ; k++) {
+ int j = k+rand_k(st,n-k);
+ pthread_t t = thread[j];
+ thread[j] = thread[k]; thread[k] = t;
+ }
+}
+
+static int int_cmp(const void *_p, const void *_q) {
+ int x = *((int *)_p) ;
+ int y = *((int *)_q) ;
+ if (x < y) return -1 ;
+ else if (x > y) return 1 ;
+ else return 0 ;
+}
+
+int check_shuffle(int **t, int *min, int sz) {
+ int *idx = malloc_check(sizeof(*idx)*sz) ;
+ for (int k=0 ; k < sz ; k++) {
+ idx[k] = (int)(t[k] - min) ;
+ // fprintf(stderr," %i",idx[k]) ;
+ }
+ // fprintf(stderr,"\n") ;
+ qsort(&idx[0],sz, sizeof(idx[0]), int_cmp) ;
+ for (int k=0 ; k < sz ; k++) {
+ if (idx[k] != k) {
+ free(idx) ;
+ return 0 ;
+ }
+ }
+ free(idx) ;
+ return 1 ;
+}
+
+/****************/
+/* Time counter */
+/****************/
+
+#include <sys/time.h>
+#include <time.h>
+
+tsc_t timeofday(void) {
+ struct timeval tv ;
+ if (gettimeofday(&tv,NULL)) errexit("gettimeoday",errno) ;
+ return tv.tv_sec * ((tsc_t)1000000) + tv.tv_usec ;
+}
+
+double tsc_ratio(tsc_t t1, tsc_t t2) {
+ return ((double) t1) / ((double)t2) ;
+}
+
+
+double tsc_millions(tsc_t t) {
+ return t / 1000000.0 ;
+}
+
+/*******************/
+/* String handling */
+/*******************/
+
+int find_string(char *t[], int sz, char *s) {
+ for (int k = 0 ; k < sz ; k++) {
+ if (strcmp(t[k],s) == 0) return k ;
+ }
+ return -1 ;
+}
new file mode 100644
@@ -0,0 +1,275 @@
+/****************************************************************************/
+/* the diy toolsuite */
+/* */
+/* Jade Alglave, University College London, UK. */
+/* Luc Maranget, INRIA Paris-Rocquencourt, France. */
+/* */
+/* Copyright 2015-present Institut National de Recherche en Informatique et */
+/* en Automatique and the authors. All rights reserved. */
+/* */
+/* This software is governed by the CeCILL-B license under French law and */
+/* abiding by the rules of distribution of free software. You can use, */
+/* modify and/ or redistribute the software under the terms of the CeCILL-B */
+/* license as circulated by CEA, CNRS and INRIA at the following URL */
+/* "http://www.cecill.info". We also give a copy in LICENSE.txt. */
+/****************************************************************************/
+#ifndef _UTILS_H
+#define _UTILS_H 1
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include "litmus_rand.h"
+
+
+/********/
+/* Misc */
+/********/
+
+void seterrlog(FILE *chan) ;
+
+int log_error(const char *fmt,...) ;
+
+void fatal(char *msg) ;
+/* e is errno */
+void errexit(char *msg,int e) ;
+
+void *malloc_check(size_t sz) ;
+
+int max(int n,int m) ;
+
+void pp_ints (FILE *fp,int *p,int n) ;
+
+void *do_align(void *p, size_t sz) ;
+
+void *do_noalign(void *p, size_t sz) ;
+
+void cat_file(char *path,char *msg,FILE *out) ;
+
+/***********/
+/* CPU set */
+/***********/
+
+#define CPUS_DEFINED 1
+typedef struct {
+ int sz ;
+ int *cpu ;
+} cpus_t ;
+
+cpus_t *cpus_create(int sz) ;
+cpus_t *cpus_create_init(int sz, int t[]) ;
+void cpus_free(cpus_t *p) ;
+void cpus_dump(FILE *fp, cpus_t *p) ;
+void cpus_dump_test(FILE *fp, int *p, int sz, cpus_t *cm,int nprocs) ;
+
+int gcd(int a, int b) ;
+
+cpus_t *coremap_seq(int navail, int nways) ;
+cpus_t *coremap_end(int navail, int nways) ;
+
+void custom_affinity
+(st_t *st,cpus_t *cm,int **color,int *diff,cpus_t *aff_cpus,int n_exe, int *r) ;
+
+/*************/
+/* Int array */
+/*************/
+
+typedef struct {
+ int sz ;
+ int *t ;
+} ints_t ;
+
+void ints_dump(FILE *fp, ints_t *p) ;
+
+/* Prefetch directives */
+typedef enum {none, flush, touch, touch_store} prfdir_t ;
+
+typedef struct {
+ char *name ;
+ prfdir_t dir ;
+} prfone_t ;
+
+typedef struct {
+ int nvars ;
+ prfone_t *t ;
+} prfproc_t ;
+
+typedef struct {
+ int nthreads ;
+ prfproc_t *t ;
+} prfdirs_t ;
+
+void prefetch_dump(FILE *fp, prfdirs_t *p) ;
+int parse_prefetch(char *p, prfdirs_t *r) ;
+
+/************************/
+/* Command line options */
+/************************/
+typedef enum
+ { aff_none, aff_incr, aff_random, aff_custom,
+ aff_scan, aff_topo} aff_mode_t ;
+
+typedef struct {
+ int verbose ;
+ /* Test parmeters */
+ int max_run ;
+ int size_of_test ;
+ int stride ;
+ int avail ;
+ int n_exe ;
+ int sync_n ;
+ /* Affinity */
+ aff_mode_t aff_mode ;
+ int aff_custom_enabled ;
+ int aff_scan_enabled ;
+ int aff_incr ;
+ cpus_t *aff_cpus ;
+ char *aff_topo ;
+ /* indirect mode */
+ int shuffle ;
+ /* loop test */
+ int max_loop ;
+ /* time base delays */
+ ints_t * delta_tb ;
+ /* prefetch control */
+ prfdirs_t *prefetch ;
+ int static_prefetch ;
+ /* show time of synchronisation */
+ int verbose_barrier ;
+ /* Stop as soon as condition is settled */
+ int speedcheck ;
+ /* Enforce fixed launch order (ie cancel change lauch) */
+ int fix ;
+ /* Dump prelude to test output */
+ int prelude ;
+} cmd_t ;
+
+void parse_cmd(int argc, char **argv, cmd_t *def, cmd_t *p) ;
+
+
+/********************/
+/* Thread utilities */
+/********************/
+
+/* Mutex */
+
+typedef pthread_mutex_t pm_t ;
+
+pm_t *pm_create(void) ;
+void pm_free(pm_t *p) ;
+void pm_lock(pm_t *m) ;
+void pm_unlock(pm_t *m) ;
+
+/* Condition variable */
+
+typedef struct {
+ pm_t *c_mutex ;
+ pthread_cond_t *c_cond ;
+} pc_t ;
+
+pc_t *pc_create(void) ;
+void pc_free(pc_t *p) ;
+void pc_wait(pc_t *p) ;
+void pc_broadcast (pc_t *p) ;
+
+/* Barrier */
+
+/* Avoid pthread supplied barrier as they are not available in old versions */
+
+typedef struct {
+ volatile unsigned int count ;
+ volatile int turn ;
+ pc_t *cond ;
+ unsigned int nprocs ;
+} pb_t ;
+
+
+pb_t *pb_create(int nprocs) ;
+void pb_free(pb_t *p) ;
+void pb_wait(pb_t *p) ;
+
+
+/* Or flag */
+
+typedef struct {
+ pc_t *cond ;
+ int nprocs ;
+ int count ;
+ volatile int val ;
+ volatile int turn ;
+} po_t ;
+
+po_t *po_create(int nprocs) ;
+void po_free(po_t *p) ;
+/* Initialize flag, must be called by all participant */
+void po_reinit(po_t *p) ;
+/* Return the 'or' of the v arguments of all participants */
+int po_wait(po_t *p, int v) ;
+
+/* One place buffer */
+
+typedef struct {
+ pc_t *cond ;
+ int volatile some ;
+ void * volatile val ;
+} op_t ;
+
+op_t *op_create(void) ;
+void op_free(op_t *p) ;
+void op_set(op_t *p, void *v) ;
+void *op_get(op_t *p) ;
+
+/* Thread launch and join */
+
+typedef void* f_t(void *);
+
+void launch(pthread_t *th, f_t *f, void *a) ;
+
+void *join(pthread_t *th) ;
+
+/* Detached lauch and join */
+
+op_t *launch_detached(f_t *f,void *a) ;
+void *join_detached(op_t *p) ;
+
+/* Thread cache */
+
+typedef struct {
+ int max_run ;
+ op_t *op_arg,*op_ret ;
+ void *arg ;
+} sarg_t ;
+
+f_t start_thread ;
+
+/*****************/
+/* Random things */
+/*****************/
+
+/* permutations */
+
+void perm_prefix_ints(st_t *st,int t[], int used, int sz) ;
+void perm_ints(st_t *st,int t[], int sz) ;
+void perm_funs(st_t *st,f_t *t[], int sz) ;
+void perm_threads(st_t *st,pthread_t t[], int sz) ;
+void perm_ops(st_t *st,op_t *t[], int sz) ;
+
+/* check permutation */
+int check_shuffle(int **t, int *min, int sz) ;
+
+/*********************/
+/* Real time counter */
+/*********************/
+
+typedef unsigned long long tsc_t ;
+#define PTSC "%llu"
+
+/* Result in micro-seconds */
+tsc_t timeofday(void) ;
+double tsc_ratio(tsc_t t1, tsc_t t2) ;
+double tsc_millions(tsc_t t) ;
+
+/* String utilities */
+int find_string(char *t[],int sz,char *s) ;
+
+#endif
This adds the x86 store-after-load re-ordering litmus test. Signed-off-by: Pranith Kumar <bobby.prani@gmail.com> --- tests/tcg/i386/litmus/Makefile | 42 ++ tests/tcg/i386/litmus/README.txt | 22 + tests/tcg/i386/litmus/SAL.c | 491 +++++++++++++++ tests/tcg/i386/litmus/affinity.c | 159 +++++ tests/tcg/i386/litmus/affinity.h | 34 ++ tests/tcg/i386/litmus/comp.sh | 10 + tests/tcg/i386/litmus/litmus_rand.c | 64 ++ tests/tcg/i386/litmus/litmus_rand.h | 29 + tests/tcg/i386/litmus/outs.c | 148 +++++ tests/tcg/i386/litmus/outs.h | 49 ++ tests/tcg/i386/litmus/run.sh | 55 ++ tests/tcg/i386/litmus/show.awk | 2 + tests/tcg/i386/litmus/utils.c | 1148 +++++++++++++++++++++++++++++++++++ tests/tcg/i386/litmus/utils.h | 275 +++++++++ 14 files changed, 2528 insertions(+) create mode 100644 tests/tcg/i386/litmus/Makefile create mode 100644 tests/tcg/i386/litmus/README.txt create mode 100644 tests/tcg/i386/litmus/SAL.c create mode 100644 tests/tcg/i386/litmus/affinity.c create mode 100644 tests/tcg/i386/litmus/affinity.h create mode 100644 tests/tcg/i386/litmus/comp.sh create mode 100644 tests/tcg/i386/litmus/litmus_rand.c create mode 100644 tests/tcg/i386/litmus/litmus_rand.h create mode 100644 tests/tcg/i386/litmus/outs.c create mode 100644 tests/tcg/i386/litmus/outs.h create mode 100755 tests/tcg/i386/litmus/run.sh create mode 100644 tests/tcg/i386/litmus/show.awk create mode 100644 tests/tcg/i386/litmus/utils.c create mode 100644 tests/tcg/i386/litmus/utils.h