------- Comment #3 from jb at gcc dot gnu dot org 2006-11-04 21:24 ------- Well, redoing the C benchmark above to use 1d arrays and manual index calculations, the results are now essentially the same as for the Fortran version. And a commercial compiler produces about the same results for the Fortran version as gfortran, which means the reason for our poor complex matmul performance lies elsewhere.
#include <stdio.h> #include <stdlib.h> #include <complex.h> #include <sys/time.h> #include <time.h> int main(void) { int n = 300; complex float *a, *b, *c; int i, j, k, tc; a = malloc (n*n * sizeof (*a)); b = malloc (n*n * sizeof (*b)); c = malloc (n*n * sizeof (*c)); struct timeval tv, tv2; float res; FILE *fp; tc = 0; for (i = 0; i < n*n; i++) { a[i] = i*10.0 + 100.0*I; b[i] = 1.0 + 42.0*I; c[i] = 0.0 + 0.0*I; } gettimeofday (&tv, NULL); for (i = 0; i < n; i++) { for (j = 0; j < n; j++) { c[i*n + j] = 0.0 + 0.0*I; for (k = 0; k < n; k++) { c[i*n + j] = c[i*n + j] + a[i*n + k] * b[k*n + j]; tc++; } } } gettimeofday (&tv2, NULL); res = tv2.tv_sec - tv.tv_sec + (tv2.tv_usec - tv.tv_usec) / 1000000.0; printf ("gemm time: %f\n", res); fp = fopen ("c-matrix", "w"); for (i = 0; i < n; i++) { for (j = 0; j<n; j++) { fprintf (fp, "%f ", c[i*n + j]); } fprintf (fp, "\n"); } fclose (fp); printf ("trip count: %i\n", tc); } -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=29549