Hi Jerry,

> I am curious about what performance gain results from this? I can see
> saving a library call to our runtime libraries.  Do you have some timing
> results?

The speedup can be quite drastic for small matrices which can be
completely unrolled by -O3:

b1.f90:

program main
  use b2
  implicit none
  real, dimension(3,3) :: a, b, c
  integer :: i

  call random_number(a)
  call random_number(b)
  do i=1,10**8
     c = matmul(a,b)
     call bar(b,c)
  end do
end program main

b2.f90:

module b2
contains
  subroutine bar(b,c)
    real, dimension(3,3) :: b,c
  end subroutine bar
end module b2

ig25@linux-fd1f:~/Krempel/Matmul> gfortran -O3 -fno-frontend-optimize
b2.f90 b1.f90 && time ./a.out

real    0m15.411s
user    0m15.404s
sys     0m0.001s

ig25@linux-fd1f:~/Krempel/Matmul> gfortran -O3 b2.f90 b1.f90 && time ./a.out

real    0m1.736s
user    0m1.735s
sys     0m0.001s

Reply via email to