> Could it be that your CPU has a single floating-point unit shared by 4
> cores on a single die, and thus only 2 floating-point units total for
> all 8 of your cores?  If so, then that fact, plus the fact that each
> core has its own separate ALU for integer operations, would seem to
> explain the results you are seeing.

Exactly, this would explain the behaviour. But unfortunately it is not
the case. I implemented a small example using Java (Java Threads) and
C (PThreads) and both times I get a linear speedup. See the attached
code below. The cores only share 12 MB cache, but this should be
enough memory for my micro-benchmark. Seeing the linear speedup in
Java and C, I would negate a hardware limitation.

_
Johann

### C ###

#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#define NUM_THREADS     8

int inc(int);
double inc_d(int);

int inc(int x){
        int y;
        y = x + 1;
        return y;
}

double inc_d(int x){
        double y;
        y = (double)x + 1.0;
        return y;
}

void *BusyWork(void *t){
        int i;
        long tid;
        int result=0;
        tid = (long)t;
        printf("Thread %ld starting...\n",tid);
        for (i=0; i<1000000000; i++){
                /* result = result + sin(i) * tan(i); */
                result = inc(i);
        }
        printf("Thread %ld done. Result = %i\n",tid, result);
        pthread_exit((void*) t);
}

void *BusyWork_d(void *t){
        int i;
        long tid;
        double result=0.0;
        tid = (long)t;
        printf("Thread %ld starting...\n",tid);
        for (i=0; i<1000000000; i++){
                /* result = result + sin(i) * tan(i); */
                result = inc_d(i);
        }
        printf("Thread %ld done. Result = %e\n",tid, result);
        pthread_exit((void*) t);
}

void *BusyWork_single(){
        int i;
        double result=0.0;
        for (i=0; i<1000000000; i++){
                /* result = result + sin(i) * tan(i); */
                result = inc_d(i);
        }
}

int main (int argc, char *argv[]){

        time_t start,end;
        double dif;

        pthread_t thread[NUM_THREADS];
        pthread_attr_t attr;
        int rc;
        long t;
        void *status;

        start = time(NULL);

        /* Running serial code */
        for(t=0; t<NUM_THREADS; t++){
                printf("Running serial code #: %ld\n", t);
                BusyWork_single();
        }

        end = time(NULL);
        dif = difftime(end,start);
        printf("Runtime for serial code: %f\n", dif);

        start = time(NULL);

        /* Initialize and set thread detached attribute */
        pthread_attr_init(&attr);
        pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);

        for(t=0; t<NUM_THREADS; t++){
                printf("Main: creating thread %ld\n", t);
                /* Let's rock */
                /* rc = pthread_create(&thread[t], &attr, BusyWork, (void *)t); 
*/
                rc = pthread_create(&thread[t], &attr, BusyWork_d, (void *)t);
                if (rc) {
                        printf("ERROR; return code from pthread_create() is 
%d\n", rc);
                        exit(-1);
                }
        }

        /* Free attribute and wait for the other threads */
        pthread_attr_destroy(&attr);
        for(t=0; t<NUM_THREADS; t++){
                rc = pthread_join(thread[t], &status);
                if (rc) {
                        printf("ERROR; return code from pthread_join() is 
%d\n", rc);
                        exit(-1);
                }
                printf("Main: completed join with thread %ld having a status of 
%ld
\n",t,(long)status);
        }

        end = time(NULL);
        dif = difftime(end,start);
        printf("Runtime for parallel code: %f\n", dif);

        printf("Main: program completed. Exiting.\n");
        pthread_exit(NULL);
}

### Java ###

import java.text.DecimalFormat;

public class MapTest{

        public static class IntTest implements Runnable{
                long loops;
                long result;

                public IntTest(long loops){
                        this.loops = loops;
                }

                // stupid work
                public void run(){
                        result = 0;
                        for (long i = 0L; i < loops; i++){
                                result = result + 1;
                        }
                        System.out.println(result);
                }
        }

        public static class DoubleTest implements Runnable{
                long loops;
                double result;

                public DoubleTest(long loops){
                        this.loops = loops;
                }

                // stupid work
                public void run(){
                        result = 0.0;
                        for (long i = 0L; i < loops; i++){
                                result = result + 1.0;
                        }
                        System.out.println(result);
                }
        }

        public static void main(String[] args){
                try{

                long loops = 10000000000L;
                        // number of threads
                int tcount = 8;
                System.out.println("Number of Runs: "+tcount);


                // IntTest sequential in one Block ;)
                {
                Thread[] tarray = new Thread[tcount];
                for (int i = 0; i < tcount; i++)
                        tarray[i] = new Thread(new IntTest(loops));

                long startTime  = System.nanoTime();
                long stopTime   = 0;
                long runTime    = 0;
                for (int i = 0; i < tcount; i++){
                        tarray[i].start();
                        tarray[i].join();
                }

                stopTime                                = System.nanoTime();
                runTime                                 = stopTime - startTime;
                System.out.println();
                System.out.println("Int ALL RUNS FINISHED for JOB.");
                System.out.println("Int OVERALL Time:   "+new DecimalFormat
("0.0000").format((double)runTime/1000000)+" ms");
                System.out.println();
                }
                System.out.println("Number of Runs: "+tcount);

                // DoubleTest sequential in one Block ;)
                {
                Thread[] tarray = new Thread[tcount];
                for (int i = 0; i < tcount; i++)
                        tarray[i] = new Thread(new DoubleTest(loops));

                long startTime  = System.nanoTime();
                long stopTime   = 0;
                long runTime    = 0;
                for (int i = 0; i < tcount; i++){
                        tarray[i].start();
                        tarray[i].join();
                }

                stopTime                                = System.nanoTime();
                runTime                                 = stopTime - startTime;
                System.out.println();
                System.out.println("Double ALL RUNS FINISHED for JOB.");
                System.out.println("Double OVERALL Time:   "+new DecimalFormat
("0.0000").format((double)runTime/1000000)+" ms");
                System.out.println();
                }

                System.out.println("Number of Threads: "+tcount);

                // IntTest parallel in one Block ;)
                {
                Thread[] tarray = new Thread[tcount];
                for (int i = 0; i < tcount; i++)
                        tarray[i] = new Thread(new IntTest(loops));

                long startTime  = System.nanoTime();
                long stopTime   = 0;
                long runTime    = 0;
                for (int i = 0; i < tcount; i++)
                        tarray[i].start();
                for (int i = 0; i < tcount; i++)
                        tarray[i].join();

                stopTime                                = System.nanoTime();
                runTime                                 = stopTime - startTime;
                System.out.println();
                System.out.println("Int ALL THREADS FINISHED for JOB.");
                System.out.println("Int OVERALL Time:   "+new DecimalFormat
("0.0000").format((double)runTime/1000000)+" ms");
                System.out.println();
                }

                // DoubleTest parallel in one Block ;)
                {
                Thread[] tarray = new Thread[tcount];
                for (int i = 0; i < tcount; i++)
                        tarray[i] = new Thread(new DoubleTest(loops));

                long startTime  = System.nanoTime();
                long stopTime   = 0;
                long runTime    = 0;
                for (int i = 0; i < tcount; i++)
                        tarray[i].start();
                for (int i = 0; i < tcount; i++)
                        tarray[i].join();
                stopTime                                = System.nanoTime();
                runTime                                 = stopTime - startTime;
                System.out.println();
                System.out.println("Double ALL THREADS FINISHED for JOB. ");
                System.out.println("Double OVERALL Time:   "+new DecimalFormat
("0.0000").format((double)runTime/1000000)+" ms");
                System.out.println();
                }
                }
                catch(Exception e){}
        }
}
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google
Groups "Clojure" group.
To post to this group, send email to clojure@googlegroups.com
Note that posts from new members are moderated - please be patient with your 
first post.
To unsubscribe from this group, send email to
clojure+unsubscr...@googlegroups.com
For more options, visit this group at
http://groups.google.com/group/clojure?hl=en
-~----------~----~----~----~------~----~------~--~---

Reply via email to