On Thu, 2007-10-18 at 12:02 +0800, Biplab Kumar Modak wrote: > skaller wrote: > > On Wed, 2007-10-17 at 18:14 +0100, Biagio Lucini wrote: > >> skaller wrote: > > > >> It would be interesting to try with another compiler. Do you have access > >> to another OpenMP-enabled compiler? > > > > Unfortunately no, unless MSVC++ in VS2005 has openMP. > > I have an Intel licence but they're too tied up with commerical > > vendors and it doesn't work on Ubuntu (it's built for Fedora and Suse). > > > If possible, you can post the source code. I've a MSVC 2005 license (I > bought it to get OpenMP working with it). > > I can then give it a try. I have a dual core PC. :)
OK, attached. -- John Skaller <skaller at users dot sf dot net> Felix, successor to C++: http://felix.sf.net
/* * LU.c * * A prgram to d an LU decomposition. * */ #include <time.h> #include <stdio.h> #include <stdlib.h> #define SIZE 800 int main(int argc, char *argv[]) { double start, stop; /* for keeping track of running time */ double A[SIZE][SIZE]; double col[SIZE], row[SIZE]; int i, j, k, n; /* preload A with random values */ for (i = 0; i<SIZE; i++) for (j = 0; j<SIZE; j++) A[i][j] = rand(); /* time start now */ start = clock(); /* The core algorithm */ // #pragma omp parallel shared(A, col, row) for (k = 0; k<SIZE-1; k++) { /* set col values to column k of A */ for (n = k; n<SIZE; n++) { col[n] = A[n][k]; } /* scale values of A by multiplier */ for (n = k+1; n<SIZE; n++) { A[k][n] /= col[k]; } /* set row values to row k of A */ for (n = k+1; n<SIZE; n++) { row[n] = A[k][n]; } /* Here we update A by subtracting the appropriate values from row and column. Note that these adjustments to A can be done in any order */ #pragma omp parallel for shared(A, row, col) for (i = k+1; i<SIZE; i++) { for (j = k+1; j<SIZE; j++) { A[i][j] = A[i][j] - row[i] * col[j]; } } } /* we're done so stop the timer */ stop = clock(); printf("Completed decomposition in %.3f seconds\n", (stop-start)/CLOCKS_PER_SEC); return 0; }
/* * combined.c * * This program combines what we saw before. It calculates e and pi * and then integrates the x^2. We also print out the elapsed time in * ms at several points in our program. We have replaced the function y=x^2 * with a more complex polynomial 3x^3 + 2x^2 + x. */ #include <stdio.h> #include <time.h> #define num_steps 10000000 /* steps to use in taylor expansions */ #define int_steps (1<<30) /* steps to use in integration */ int main(int argc, char *argv[]) { double start, stop; /* times of beginning and end of procedure */ /* Values for part 1 */ double e, pi, factorial, product; int i; /* Values for part 2 */ double sum; double x; /* start the timer */ start = clock(); #pragma omp parallel reduction(+: sum) { #pragma omp sections nowait { #pragma omp section { /* First we calculate e from its taylor expansion */ printf("e started at %.0f\n", clock()-start); e = 1; factorial = 1; for (i = 1; i<num_steps; i++) { factorial *= i; e += 1.0/factorial; } printf("e done at %.0f\n", clock()-start); } #pragma omp section { /* Then we calculate pi from its taylor expansion */ printf("pi started at %.0f\n", clock()-start); pi = 0; for (i = 0; i < num_steps*20; i++) { pi += 1.0/(i*4.0 + 1.0); pi -= 1.0/(i*4.0 + 3.0); } pi = pi * 4.0; printf("pi done at %.0f\n", clock()-start); } } /* sections */ /* Now we integrate the function */ printf("integration started at %.0f\n", clock()-start); sum = 0; #pragma omp for nowait for (i = 0; i<int_steps; i++) { x = 2.0 * (double)i / (double)(int_steps); /* value of x */ sum += ( 3*x*x*x + 2*x*x + x ) / int_steps; } #pragma omp single /* we only need to print this once */ printf("integration done at %.0f\n", clock()-start); #pragma omp barrier /* make sure all threads are caught up before we do the multiplication */ product = e * pi; } /* omp parallel */ /* we're done so stop the timer */ stop = clock(); printf("Values: e*pi = %f, integral = %f\n", product, sum); printf("Total elapsed time: %.3f seconds\n", (stop-start)/1000); return 0; }