This replicates my "problem", f2 is much faster than f1, but like I said 
isn't that because the loops are optimized away during the compilation?
The weird thing I just noticed is that that it still depend on the size of 
A.

function f1(A::Array{Float64,2})

  tmp = 0.0

  for i=1:size(A,1)
    for j=1:size(A,2)
     tmp += A[i,j]
    end
  end

  out = tmp
end

function f2(A::Array{Float64,2})

  tmp = 0.0

  for i=1:size(A,1)
    for j=1:size(A,2)
     tmp += A[i,j]
    end
  end

  out = 1.0
end

A = randn(10^4,10^4);

f1(A);
f2(A);
@time f1(A)
@time f2(A)

Reply via email to