------- Comment #2 from jb at gcc dot gnu dot org  2006-11-04 20:34 -------
I did some experimenting, and it seems the C version of a trivial matrix
multiply program is much slower than the same program written in Fortran?

Switch the commented declarations and c[i][j] = 0 in the loop to get the float
version.

#include <stdio.h>
#include <complex.h>
#include <sys/time.h>
#include <time.h>

int main(void)
{
  const int n = 300;
  complex float a[n][n], b[n][n], c[n][n];
  //float a[n][n], b[n][n], c[n][n];
  int i, j, k, tc;

  struct timeval tv, tv2;
  float res;
  tc = 0;
  gettimeofday (&tv, NULL);
  for (i = 0; i < n; i++)
    {
      for (j = 0; j < n; j++)
        {
          c[i][j] = 0.0 + 0.0*I;
          //c[i][j] = 0.0;
          for (k = 0; k < n; k++)
            {
            //      printf("i %i, j %i, k %i\n", i, j, k);
              c[i][j] = c[i][j] + a[i][k] * b[k][j];
              tc++;
            }
        }
    }
  gettimeofday (&tv2, NULL);
  res = tv2.tv_sec - tv.tv_sec + (tv2.tv_usec - tv.tv_usec) / 1000000.0;
  printf ("gemm time: %f\n", res);
  printf ("trip count: %i\n", tc);
}


Fortran version:

program mymatmul
  implicit none
  integer, parameter :: n = 300
  real, dimension(n,n) :: rr, ri
  complex, dimension(n,n) :: a,b,c
  real :: t1, t2
  integer :: i, j, k

  call random_number (rr)
  call random_number (ri)
  a = cmplx (rr, ri)
  call random_number (rr)
  call random_number (ri)
  b = cmplx (rr, ri)

  call cpu_time (t1)

  do j = 1, n
     do i = 1, n
        c(i,j) = cmplx (0., 0.)
        do k = 1, n
           c(i,j) = c(i,j) + a(i,k) * b(k,j)
        end do
     end do
  end do

  call cpu_time (t2)
  write (*,'(F8.4)') t2-t1
  open (10, file="cmatrix", form='unformatted')
  write (10) c
  close (10)

end program mymatmul

Fortran version with real instead of complex:

program mymatmul
  implicit none
  integer, parameter :: n = 300
  real, dimension(n,n) :: a,b,c
  real :: t1, t2
  integer :: i, j, k, tc

  call random_number (a)
  call random_number (b)

  call cpu_time (t1)

  tc = 0
  do j = 1, n
     do i = 1, n
        c(i,j) = 0.
        do k = 1, n
           c(i,j) = c(i,j) + a(i,k) * b(k,j)
           tc = tc + 1
        end do
     end do
  end do

  call cpu_time (t2)
  write (*,'(F8.4)') t2-t1
  write (*, *) 'Trip count: ', tc
  open (10, file="rmatrix", form='unformatted')
  write (10) c
  close (10)

end program mymatmul

And my results:

C version, complex:
-O2
    2.0 s
-ffast-math
    0.9
gfortran -O2:
0.32

float:
-O2     0.6 s
fast math makes no difference!

gfortran -O2 -g
0.07


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=29549

Reply via email to