> Could it be that your CPU has a single floating-point unit shared by 4 > cores on a single die, and thus only 2 floating-point units total for > all 8 of your cores? If so, then that fact, plus the fact that each > core has its own separate ALU for integer operations, would seem to > explain the results you are seeing.
Exactly, this would explain the behaviour. But unfortunately it is not the case. I implemented a small example using Java (Java Threads) and C (PThreads) and both times I get a linear speedup. See the attached code below. The cores only share 12 MB cache, but this should be enough memory for my micro-benchmark. Seeing the linear speedup in Java and C, I would negate a hardware limitation. _ Johann ### C ### #include <pthread.h> #include <stdio.h> #include <stdlib.h> #define NUM_THREADS 8 int inc(int); double inc_d(int); int inc(int x){ int y; y = x + 1; return y; } double inc_d(int x){ double y; y = (double)x + 1.0; return y; } void *BusyWork(void *t){ int i; long tid; int result=0; tid = (long)t; printf("Thread %ld starting...\n",tid); for (i=0; i<1000000000; i++){ /* result = result + sin(i) * tan(i); */ result = inc(i); } printf("Thread %ld done. Result = %i\n",tid, result); pthread_exit((void*) t); } void *BusyWork_d(void *t){ int i; long tid; double result=0.0; tid = (long)t; printf("Thread %ld starting...\n",tid); for (i=0; i<1000000000; i++){ /* result = result + sin(i) * tan(i); */ result = inc_d(i); } printf("Thread %ld done. Result = %e\n",tid, result); pthread_exit((void*) t); } void *BusyWork_single(){ int i; double result=0.0; for (i=0; i<1000000000; i++){ /* result = result + sin(i) * tan(i); */ result = inc_d(i); } } int main (int argc, char *argv[]){ time_t start,end; double dif; pthread_t thread[NUM_THREADS]; pthread_attr_t attr; int rc; long t; void *status; start = time(NULL); /* Running serial code */ for(t=0; t<NUM_THREADS; t++){ printf("Running serial code #: %ld\n", t); BusyWork_single(); } end = time(NULL); dif = difftime(end,start); printf("Runtime for serial code: %f\n", dif); start = time(NULL); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); for(t=0; t<NUM_THREADS; t++){ printf("Main: creating thread %ld\n", t); /* Let's rock */ /* rc = pthread_create(&thread[t], &attr, BusyWork, (void *)t); */ rc = pthread_create(&thread[t], &attr, BusyWork_d, (void *)t); if (rc) { printf("ERROR; return code from pthread_create() is %d\n", rc); exit(-1); } } /* Free attribute and wait for the other threads */ pthread_attr_destroy(&attr); for(t=0; t<NUM_THREADS; t++){ rc = pthread_join(thread[t], &status); if (rc) { printf("ERROR; return code from pthread_join() is %d\n", rc); exit(-1); } printf("Main: completed join with thread %ld having a status of %ld \n",t,(long)status); } end = time(NULL); dif = difftime(end,start); printf("Runtime for parallel code: %f\n", dif); printf("Main: program completed. Exiting.\n"); pthread_exit(NULL); } ### Java ### import java.text.DecimalFormat; public class MapTest{ public static class IntTest implements Runnable{ long loops; long result; public IntTest(long loops){ this.loops = loops; } // stupid work public void run(){ result = 0; for (long i = 0L; i < loops; i++){ result = result + 1; } System.out.println(result); } } public static class DoubleTest implements Runnable{ long loops; double result; public DoubleTest(long loops){ this.loops = loops; } // stupid work public void run(){ result = 0.0; for (long i = 0L; i < loops; i++){ result = result + 1.0; } System.out.println(result); } } public static void main(String[] args){ try{ long loops = 10000000000L; // number of threads int tcount = 8; System.out.println("Number of Runs: "+tcount); // IntTest sequential in one Block ;) { Thread[] tarray = new Thread[tcount]; for (int i = 0; i < tcount; i++) tarray[i] = new Thread(new IntTest(loops)); long startTime = System.nanoTime(); long stopTime = 0; long runTime = 0; for (int i = 0; i < tcount; i++){ tarray[i].start(); tarray[i].join(); } stopTime = System.nanoTime(); runTime = stopTime - startTime; System.out.println(); System.out.println("Int ALL RUNS FINISHED for JOB."); System.out.println("Int OVERALL Time: "+new DecimalFormat ("0.0000").format((double)runTime/1000000)+" ms"); System.out.println(); } System.out.println("Number of Runs: "+tcount); // DoubleTest sequential in one Block ;) { Thread[] tarray = new Thread[tcount]; for (int i = 0; i < tcount; i++) tarray[i] = new Thread(new DoubleTest(loops)); long startTime = System.nanoTime(); long stopTime = 0; long runTime = 0; for (int i = 0; i < tcount; i++){ tarray[i].start(); tarray[i].join(); } stopTime = System.nanoTime(); runTime = stopTime - startTime; System.out.println(); System.out.println("Double ALL RUNS FINISHED for JOB."); System.out.println("Double OVERALL Time: "+new DecimalFormat ("0.0000").format((double)runTime/1000000)+" ms"); System.out.println(); } System.out.println("Number of Threads: "+tcount); // IntTest parallel in one Block ;) { Thread[] tarray = new Thread[tcount]; for (int i = 0; i < tcount; i++) tarray[i] = new Thread(new IntTest(loops)); long startTime = System.nanoTime(); long stopTime = 0; long runTime = 0; for (int i = 0; i < tcount; i++) tarray[i].start(); for (int i = 0; i < tcount; i++) tarray[i].join(); stopTime = System.nanoTime(); runTime = stopTime - startTime; System.out.println(); System.out.println("Int ALL THREADS FINISHED for JOB."); System.out.println("Int OVERALL Time: "+new DecimalFormat ("0.0000").format((double)runTime/1000000)+" ms"); System.out.println(); } // DoubleTest parallel in one Block ;) { Thread[] tarray = new Thread[tcount]; for (int i = 0; i < tcount; i++) tarray[i] = new Thread(new DoubleTest(loops)); long startTime = System.nanoTime(); long stopTime = 0; long runTime = 0; for (int i = 0; i < tcount; i++) tarray[i].start(); for (int i = 0; i < tcount; i++) tarray[i].join(); stopTime = System.nanoTime(); runTime = stopTime - startTime; System.out.println(); System.out.println("Double ALL THREADS FINISHED for JOB. "); System.out.println("Double OVERALL Time: "+new DecimalFormat ("0.0000").format((double)runTime/1000000)+" ms"); System.out.println(); } } catch(Exception e){} } } --~--~---------~--~----~------------~-------~--~----~ You received this message because you are subscribed to the Google Groups "Clojure" group. To post to this group, send email to clojure@googlegroups.com Note that posts from new members are moderated - please be patient with your first post. To unsubscribe from this group, send email to clojure+unsubscr...@googlegroups.com For more options, visit this group at http://groups.google.com/group/clojure?hl=en -~----------~----~----~----~------~----~------~--~---